diff mbox series

[RFC,7/7] cifs: Use netfslib to handle reads

Message ID 164311919732.2806745.2743328800847071763.stgit@warthog.procyon.org.uk
State New
Headers show
Series cifs: In-progress conversion to use iov_iters and netfslib | expand

Commit Message

David Howells Jan. 25, 2022, 1:59 p.m. UTC
---

 fs/cifs/Kconfig        |    1 
 fs/cifs/cifsfs.c       |    6 
 fs/cifs/cifsfs.h       |    3 
 fs/cifs/cifsglob.h     |    6 
 fs/cifs/cifssmb.c      |    9 -
 fs/cifs/file.c         |  824 ++++++++----------------------------------------
 fs/cifs/fscache.c      |   31 --
 fs/cifs/fscache.h      |   52 ---
 fs/cifs/inode.c        |   17 +
 fs/cifs/smb2pdu.c      |   15 +
 fs/netfs/read_helper.c |    7 
 11 files changed, 182 insertions(+), 789 deletions(-)

Comments

Rohith Surabattula Feb. 8, 2022, 5:59 a.m. UTC | #1
Hi David,

I have tested netfs integration with fsc mount option enabled. But, I
observed function "netfs_cache_prepare_read" always returns
"NETFS_DOWNLOAD_FROM_SERVER" because cres->ops(i.e cachefiles
operations) is not set.

static enum netfs_read_source netfs_cache_prepare_read(struct
netfs_read_subrequest *subreq,
                                                       loff_t i_size)
{
        struct netfs_read_request *rreq = subreq->rreq;
        struct netfs_cache_resources *cres = &rreq->cache_resources;

        if (cres->ops)
                return cres->ops->prepare_read(subreq, i_size);
        if (subreq->start >= rreq->i_size)
                return NETFS_FILL_WITH_ZEROES;
        return NETFS_DOWNLOAD_FROM_SERVER;

I have used cifs-experimental branch in your repo to test netfs changes.

Please let me know if any work needs to be done for netfs to integrate
with cachefiles?

Regards,
Rohith

On Wed, Jan 26, 2022 at 1:24 AM David Howells <dhowells@redhat.com> wrote:
>
>
> ---
>
>  fs/cifs/Kconfig        |    1
>  fs/cifs/cifsfs.c       |    6
>  fs/cifs/cifsfs.h       |    3
>  fs/cifs/cifsglob.h     |    6
>  fs/cifs/cifssmb.c      |    9 -
>  fs/cifs/file.c         |  824 ++++++++----------------------------------------
>  fs/cifs/fscache.c      |   31 --
>  fs/cifs/fscache.h      |   52 ---
>  fs/cifs/inode.c        |   17 +
>  fs/cifs/smb2pdu.c      |   15 +
>  fs/netfs/read_helper.c |    7
>  11 files changed, 182 insertions(+), 789 deletions(-)
>
> diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
> index 3b7e3b9e4fd2..c47e2d3a101f 100644
> --- a/fs/cifs/Kconfig
> +++ b/fs/cifs/Kconfig
> @@ -2,6 +2,7 @@
>  config CIFS
>         tristate "SMB3 and CIFS support (advanced network filesystem)"
>         depends on INET
> +       select NETFS_SUPPORT
>         select NLS
>         select CRYPTO
>         select CRYPTO_MD5
> diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
> index a56cb9c8c5ff..bd06df3bb24b 100644
> --- a/fs/cifs/cifsfs.c
> +++ b/fs/cifs/cifsfs.c
> @@ -936,7 +936,7 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>         struct inode *inode = file_inode(iocb->ki_filp);
>
>         if (iocb->ki_flags & IOCB_DIRECT)
> -               return cifs_user_readv(iocb, iter);
> +               return netfs_direct_read_iter(iocb, iter);
>
>         rc = cifs_revalidate_mapping(inode);
>         if (rc)
> @@ -1314,7 +1314,7 @@ const struct file_operations cifs_file_strict_ops = {
>  };
>
>  const struct file_operations cifs_file_direct_ops = {
> -       .read_iter = cifs_direct_readv,
> +       .read_iter = netfs_direct_read_iter,
>         .write_iter = cifs_direct_writev,
>         .open = cifs_open,
>         .release = cifs_close,
> @@ -1370,7 +1370,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
>  };
>
>  const struct file_operations cifs_file_direct_nobrl_ops = {
> -       .read_iter = cifs_direct_readv,
> +       .read_iter = netfs_direct_read_iter,
>         .write_iter = cifs_direct_writev,
>         .open = cifs_open,
>         .release = cifs_close,
> diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
> index 1c77bbc0815f..c7d5c268fc47 100644
> --- a/fs/cifs/cifsfs.h
> +++ b/fs/cifs/cifsfs.h
> @@ -85,6 +85,7 @@ extern const struct inode_operations cifs_dfs_referral_inode_operations;
>
>
>  /* Functions related to files and directories */
> +extern const struct netfs_request_ops cifs_req_ops;
>  extern const struct file_operations cifs_file_ops;
>  extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */
>  extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */
> @@ -94,8 +95,6 @@ extern const struct file_operations cifs_file_strict_nobrl_ops;
>  extern int cifs_open(struct inode *inode, struct file *file);
>  extern int cifs_close(struct inode *inode, struct file *file);
>  extern int cifs_closedir(struct inode *inode, struct file *file);
> -extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
> -extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
>  extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
>  extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
>  extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
> diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
> index 3a4fed645636..938e4e9827ed 100644
> --- a/fs/cifs/cifsglob.h
> +++ b/fs/cifs/cifsglob.h
> @@ -1313,18 +1313,14 @@ struct cifs_aio_ctx {
>
>  /* asynchronous read support */
>  struct cifs_readdata {
> +       struct netfs_read_subrequest    *subreq;
>         struct kref                     refcount;
> -       struct list_head                list;
> -       struct completion               done;
>         struct cifsFileInfo             *cfile;
> -       struct address_space            *mapping;
> -       struct cifs_aio_ctx             *ctx;
>         __u64                           offset;
>         ssize_t                         got_bytes;
>         unsigned int                    bytes;
>         pid_t                           pid;
>         int                             result;
> -       struct work_struct              work;
>         struct iov_iter                 iter;
>         struct kvec                     iov[2];
>         struct TCP_Server_Info          *server;
> diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
> index 38e7276352e2..c9fb77a8b31b 100644
> --- a/fs/cifs/cifssmb.c
> +++ b/fs/cifs/cifssmb.c
> @@ -23,6 +23,7 @@
>  #include <linux/swap.h>
>  #include <linux/task_io_accounting_ops.h>
>  #include <linux/uaccess.h>
> +#include <linux/netfs.h>
>  #include "cifspdu.h"
>  #include "cifsfs.h"
>  #include "cifsglob.h"
> @@ -1609,7 +1610,13 @@ cifs_readv_callback(struct mid_q_entry *mid)
>                 rdata->result = -EIO;
>         }
>
> -       queue_work(cifsiod_wq, &rdata->work);
> +       if (rdata->result == 0 || rdata->result == -EAGAIN)
> +               iov_iter_advance(&rdata->subreq->iter, rdata->got_bytes);
> +       netfs_subreq_terminated(rdata->subreq,
> +                               (rdata->result == 0 || rdata->result == -EAGAIN) ?
> +                               rdata->got_bytes : rdata->result,
> +                               false);
> +       kref_put(&rdata->refcount, cifs_readdata_release);
>         DeleteMidQEntry(mid);
>         add_credits(server, &credits, 0);
>  }
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index f9b9a1562e17..36559de02e37 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -21,6 +21,7 @@
>  #include <linux/slab.h>
>  #include <linux/swap.h>
>  #include <linux/mm.h>
> +#include <linux/netfs.h>
>  #include <asm/div64.h>
>  #include "cifsfs.h"
>  #include "cifspdu.h"
> @@ -3306,12 +3307,8 @@ static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
>         struct cifs_readdata *rdata;
>
>         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
> -       if (rdata) {
> +       if (rdata)
>                 kref_init(&rdata->refcount);
> -               INIT_LIST_HEAD(&rdata->list);
> -               init_completion(&rdata->done);
> -               INIT_WORK(&rdata->work, complete);
> -       }
>
>         return rdata;
>  }
> @@ -3322,8 +3319,6 @@ cifs_readdata_release(struct kref *refcount)
>         struct cifs_readdata *rdata = container_of(refcount,
>                                         struct cifs_readdata, refcount);
>
> -       if (rdata->ctx)
> -               kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
>  #ifdef CONFIG_CIFS_SMB_DIRECT
>         if (rdata->mr) {
>                 smbd_deregister_mr(rdata->mr);
> @@ -3336,370 +3331,6 @@ cifs_readdata_release(struct kref *refcount)
>         kfree(rdata);
>  }
>
> -static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
> -
> -static void
> -cifs_uncached_readv_complete(struct work_struct *work)
> -{
> -       struct cifs_readdata *rdata = container_of(work,
> -                                               struct cifs_readdata, work);
> -
> -       complete(&rdata->done);
> -       collect_uncached_read_data(rdata->ctx);
> -       /* the below call can possibly free the last ref to aio ctx */
> -       kref_put(&rdata->refcount, cifs_readdata_release);
> -}
> -
> -static int cifs_resend_rdata(struct cifs_readdata *rdata,
> -                       struct list_head *rdata_list,
> -                       struct cifs_aio_ctx *ctx)
> -{
> -       unsigned int rsize;
> -       struct cifs_credits credits;
> -       int rc;
> -       struct TCP_Server_Info *server;
> -
> -       /* XXX: should we pick a new channel here? */
> -       server = rdata->server;
> -
> -       do {
> -               if (rdata->cfile->invalidHandle) {
> -                       rc = cifs_reopen_file(rdata->cfile, true);
> -                       if (rc == -EAGAIN)
> -                               continue;
> -                       else if (rc)
> -                               break;
> -               }
> -
> -               /*
> -                * Wait for credits to resend this rdata.
> -                * Note: we are attempting to resend the whole rdata not in
> -                * segments
> -                */
> -               do {
> -                       rc = server->ops->wait_mtu_credits(server, rdata->bytes,
> -                                               &rsize, &credits);
> -
> -                       if (rc)
> -                               goto fail;
> -
> -                       if (rsize < rdata->bytes) {
> -                               add_credits_and_wake_if(server, &credits, 0);
> -                               msleep(1000);
> -                       }
> -               } while (rsize < rdata->bytes);
> -               rdata->credits = credits;
> -
> -               rc = adjust_credits(server, &rdata->credits, rdata->bytes);
> -               if (!rc) {
> -                       if (rdata->cfile->invalidHandle)
> -                               rc = -EAGAIN;
> -                       else {
> -#ifdef CONFIG_CIFS_SMB_DIRECT
> -                               if (rdata->mr) {
> -                                       rdata->mr->need_invalidate = true;
> -                                       smbd_deregister_mr(rdata->mr);
> -                                       rdata->mr = NULL;
> -                               }
> -#endif
> -                               rc = server->ops->async_readv(rdata);
> -                       }
> -               }
> -
> -               /* If the read was successfully sent, we are done */
> -               if (!rc) {
> -                       /* Add to aio pending list */
> -                       list_add_tail(&rdata->list, rdata_list);
> -                       return 0;
> -               }
> -
> -               /* Roll back credits and retry if needed */
> -               add_credits_and_wake_if(server, &rdata->credits, 0);
> -       } while (rc == -EAGAIN);
> -
> -fail:
> -       kref_put(&rdata->refcount, cifs_readdata_release);
> -       return rc;
> -}
> -
> -static int
> -cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
> -                    struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
> -                    struct cifs_aio_ctx *ctx)
> -{
> -       struct cifs_readdata *rdata;
> -       unsigned int rsize;
> -       struct cifs_credits credits_on_stack;
> -       struct cifs_credits *credits = &credits_on_stack;
> -       size_t cur_len;
> -       int rc;
> -       pid_t pid;
> -       struct TCP_Server_Info *server;
> -
> -       server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
> -
> -       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> -               pid = open_file->pid;
> -       else
> -               pid = current->tgid;
> -
> -       do {
> -               if (open_file->invalidHandle) {
> -                       rc = cifs_reopen_file(open_file, true);
> -                       if (rc == -EAGAIN)
> -                               continue;
> -                       else if (rc)
> -                               break;
> -               }
> -
> -               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
> -                                                  &rsize, credits);
> -               if (rc)
> -                       break;
> -
> -               cur_len = min_t(const size_t, len, rsize);
> -
> -               rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
> -               if (!rdata) {
> -                       add_credits_and_wake_if(server, credits, 0);
> -                       rc = -ENOMEM;
> -                       break;
> -               }
> -
> -               rdata->server   = server;
> -               rdata->cfile    = cifsFileInfo_get(open_file);
> -               rdata->offset   = offset;
> -               rdata->bytes    = cur_len;
> -               rdata->pid      = pid;
> -               rdata->credits  = credits_on_stack;
> -               rdata->ctx      = ctx;
> -               kref_get(&ctx->refcount);
> -
> -               rdata->iter     = ctx->iter;
> -               iov_iter_advance(&rdata->iter, offset - ctx->pos);
> -               iov_iter_truncate(&rdata->iter, cur_len);
> -
> -               rc = adjust_credits(server, &rdata->credits, rdata->bytes);
> -
> -               if (!rc) {
> -                       if (rdata->cfile->invalidHandle)
> -                               rc = -EAGAIN;
> -                       else
> -                               rc = server->ops->async_readv(rdata);
> -               }
> -
> -               if (rc) {
> -                       add_credits_and_wake_if(server, &rdata->credits, 0);
> -                       kref_put(&rdata->refcount, cifs_readdata_release);
> -                       if (rc == -EAGAIN)
> -                               continue;
> -                       break;
> -               }
> -
> -               list_add_tail(&rdata->list, rdata_list);
> -               offset += cur_len;
> -               len -= cur_len;
> -       } while (len > 0);
> -
> -       return rc;
> -}
> -
> -static void
> -collect_uncached_read_data(struct cifs_aio_ctx *ctx)
> -{
> -       struct cifs_readdata *rdata, *tmp;
> -       struct iov_iter *to = &ctx->iter;
> -       struct cifs_sb_info *cifs_sb;
> -       int rc;
> -
> -       cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
> -
> -       mutex_lock(&ctx->aio_mutex);
> -
> -       if (list_empty(&ctx->list)) {
> -               mutex_unlock(&ctx->aio_mutex);
> -               return;
> -       }
> -
> -       rc = ctx->rc;
> -       /* the loop below should proceed in the order of increasing offsets */
> -again:
> -       list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
> -               if (!rc) {
> -                       if (!try_wait_for_completion(&rdata->done)) {
> -                               mutex_unlock(&ctx->aio_mutex);
> -                               return;
> -                       }
> -
> -                       if (rdata->result == -EAGAIN) {
> -                               /* resend call if it's a retryable error */
> -                               struct list_head tmp_list;
> -                               unsigned int got_bytes = rdata->got_bytes;
> -
> -                               list_del_init(&rdata->list);
> -                               INIT_LIST_HEAD(&tmp_list);
> -
> -                               if (ctx->direct_io) {
> -                                       /*
> -                                        * Re-use rdata as this is a
> -                                        * direct I/O
> -                                        */
> -                                       rc = cifs_resend_rdata(
> -                                               rdata,
> -                                               &tmp_list, ctx);
> -                               } else {
> -                                       rc = cifs_send_async_read(
> -                                               rdata->offset + got_bytes,
> -                                               rdata->bytes - got_bytes,
> -                                               rdata->cfile, cifs_sb,
> -                                               &tmp_list, ctx);
> -
> -                                       kref_put(&rdata->refcount,
> -                                               cifs_readdata_release);
> -                               }
> -
> -                               list_splice(&tmp_list, &ctx->list);
> -
> -                               goto again;
> -                       } else if (rdata->result)
> -                               rc = rdata->result;
> -
> -                       /* if there was a short read -- discard anything left */
> -                       if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
> -                               rc = -ENODATA;
> -
> -                       ctx->total_len += rdata->got_bytes;
> -               }
> -               list_del_init(&rdata->list);
> -               kref_put(&rdata->refcount, cifs_readdata_release);
> -       }
> -
> -       if (!ctx->direct_io)
> -               ctx->total_len = ctx->len - iov_iter_count(to);
> -
> -       /* mask nodata case */
> -       if (rc == -ENODATA)
> -               rc = 0;
> -
> -       ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
> -
> -       mutex_unlock(&ctx->aio_mutex);
> -
> -       if (ctx->iocb && ctx->iocb->ki_complete)
> -               ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
> -       else
> -               complete(&ctx->done);
> -}
> -
> -static ssize_t __cifs_readv(
> -       struct kiocb *iocb, struct iov_iter *to, bool direct)
> -{
> -       size_t len;
> -       struct file *file = iocb->ki_filp;
> -       struct cifs_sb_info *cifs_sb;
> -       struct cifsFileInfo *cfile;
> -       struct cifs_tcon *tcon;
> -       ssize_t rc, total_read = 0;
> -       loff_t offset = iocb->ki_pos;
> -       struct cifs_aio_ctx *ctx;
> -
> -       /*
> -        * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
> -        * fall back to data copy read path
> -        * this could be improved by getting pages directly in ITER_KVEC
> -        */
> -       if (direct && iov_iter_is_kvec(to)) {
> -               cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
> -               direct = false;
> -       }
> -
> -       len = iov_iter_count(to);
> -       if (!len)
> -               return 0;
> -
> -       cifs_sb = CIFS_FILE_SB(file);
> -       cfile = file->private_data;
> -       tcon = tlink_tcon(cfile->tlink);
> -
> -       if (!tcon->ses->server->ops->async_readv)
> -               return -ENOSYS;
> -
> -       if ((file->f_flags & O_ACCMODE) == O_WRONLY)
> -               cifs_dbg(FYI, "attempting read on write only file instance\n");
> -
> -       ctx = cifs_aio_ctx_alloc();
> -       if (!ctx)
> -               return -ENOMEM;
> -
> -       ctx->pos        = offset;
> -       ctx->direct_io  = direct;
> -       ctx->len        = len;
> -       ctx->cfile      = cifsFileInfo_get(cfile);
> -
> -       if (!is_sync_kiocb(iocb))
> -               ctx->iocb = iocb;
> -
> -       if (iter_is_iovec(to))
> -               ctx->should_dirty = true;
> -
> -       rc = extract_iter_to_iter(to, len, &ctx->iter, &ctx->bv);
> -       if (rc < 0) {
> -               kref_put(&ctx->refcount, cifs_aio_ctx_release);
> -               return rc;
> -       }
> -       ctx->npages = rc;
> -
> -       /* grab a lock here due to read response handlers can access ctx */
> -       mutex_lock(&ctx->aio_mutex);
> -
> -       rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
> -
> -       /* if at least one read request send succeeded, then reset rc */
> -       if (!list_empty(&ctx->list))
> -               rc = 0;
> -
> -       mutex_unlock(&ctx->aio_mutex);
> -
> -       if (rc) {
> -               kref_put(&ctx->refcount, cifs_aio_ctx_release);
> -               return rc;
> -       }
> -
> -       if (!is_sync_kiocb(iocb)) {
> -               kref_put(&ctx->refcount, cifs_aio_ctx_release);
> -               return -EIOCBQUEUED;
> -       }
> -
> -       rc = wait_for_completion_killable(&ctx->done);
> -       if (rc) {
> -               mutex_lock(&ctx->aio_mutex);
> -               ctx->rc = rc = -EINTR;
> -               total_read = ctx->total_len;
> -               mutex_unlock(&ctx->aio_mutex);
> -       } else {
> -               rc = ctx->rc;
> -               total_read = ctx->total_len;
> -       }
> -
> -       kref_put(&ctx->refcount, cifs_aio_ctx_release);
> -
> -       if (total_read) {
> -               iocb->ki_pos += total_read;
> -               return total_read;
> -       }
> -       return rc;
> -}
> -
> -ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
> -{
> -       return __cifs_readv(iocb, to, true);
> -}
> -
> -ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
> -{
> -       return __cifs_readv(iocb, to, false);
> -}
> -
>  ssize_t
>  cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
>  {
> @@ -3720,12 +3351,15 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
>          * pos+len-1.
>          */
>         if (!CIFS_CACHE_READ(cinode))
> -               return cifs_user_readv(iocb, to);
> +               return netfs_direct_read_iter(iocb, to);
>
>         if (cap_unix(tcon->ses) &&
>             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
> -           ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
> +           ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
> +               if (iocb->ki_flags & IOCB_DIRECT)
> +                       return netfs_direct_read_iter(iocb, to);
>                 return generic_file_read_iter(iocb, to);
> +       }
>
>         /*
>          * We need to hold the sem to be sure nobody modifies lock list
> @@ -3734,104 +3368,16 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
>         down_read(&cinode->lock_sem);
>         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
>                                      tcon->ses->server->vals->shared_lock_type,
> -                                    0, NULL, CIFS_READ_OP))
> -               rc = generic_file_read_iter(iocb, to);
> +                                    0, NULL, CIFS_READ_OP)) {
> +               if (iocb->ki_flags & IOCB_DIRECT)
> +                       rc = netfs_direct_read_iter(iocb, to);
> +               else
> +                       rc = generic_file_read_iter(iocb, to);
> +       }
>         up_read(&cinode->lock_sem);
>         return rc;
>  }
>
> -static ssize_t
> -cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
> -{
> -       int rc = -EACCES;
> -       unsigned int bytes_read = 0;
> -       unsigned int total_read;
> -       unsigned int current_read_size;
> -       unsigned int rsize;
> -       struct cifs_sb_info *cifs_sb;
> -       struct cifs_tcon *tcon;
> -       struct TCP_Server_Info *server;
> -       unsigned int xid;
> -       char *cur_offset;
> -       struct cifsFileInfo *open_file;
> -       struct cifs_io_parms io_parms = {0};
> -       int buf_type = CIFS_NO_BUFFER;
> -       __u32 pid;
> -
> -       xid = get_xid();
> -       cifs_sb = CIFS_FILE_SB(file);
> -
> -       /* FIXME: set up handlers for larger reads and/or convert to async */
> -       rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
> -
> -       if (file->private_data == NULL) {
> -               rc = -EBADF;
> -               free_xid(xid);
> -               return rc;
> -       }
> -       open_file = file->private_data;
> -       tcon = tlink_tcon(open_file->tlink);
> -       server = cifs_pick_channel(tcon->ses);
> -
> -       if (!server->ops->sync_read) {
> -               free_xid(xid);
> -               return -ENOSYS;
> -       }
> -
> -       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> -               pid = open_file->pid;
> -       else
> -               pid = current->tgid;
> -
> -       if ((file->f_flags & O_ACCMODE) == O_WRONLY)
> -               cifs_dbg(FYI, "attempting read on write only file instance\n");
> -
> -       for (total_read = 0, cur_offset = read_data; read_size > total_read;
> -            total_read += bytes_read, cur_offset += bytes_read) {
> -               do {
> -                       current_read_size = min_t(uint, read_size - total_read,
> -                                                 rsize);
> -                       /*
> -                        * For windows me and 9x we do not want to request more
> -                        * than it negotiated since it will refuse the read
> -                        * then.
> -                        */
> -                       if (!(tcon->ses->capabilities &
> -                               tcon->ses->server->vals->cap_large_files)) {
> -                               current_read_size = min_t(uint,
> -                                       current_read_size, CIFSMaxBufSize);
> -                       }
> -                       if (open_file->invalidHandle) {
> -                               rc = cifs_reopen_file(open_file, true);
> -                               if (rc != 0)
> -                                       break;
> -                       }
> -                       io_parms.pid = pid;
> -                       io_parms.tcon = tcon;
> -                       io_parms.offset = *offset;
> -                       io_parms.length = current_read_size;
> -                       io_parms.server = server;
> -                       rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
> -                                                   &bytes_read, &cur_offset,
> -                                                   &buf_type);
> -               } while (rc == -EAGAIN);
> -
> -               if (rc || (bytes_read == 0)) {
> -                       if (total_read) {
> -                               break;
> -                       } else {
> -                               free_xid(xid);
> -                               return rc;
> -                       }
> -               } else {
> -                       cifs_stats_bytes_read(tcon, total_read);
> -                       *offset += bytes_read;
> -               }
> -       }
> -       free_xid(xid);
> -       return total_read;
> -}
> -
>  /*
>   * If the page is mmap'ed into a process' page tables, then we need to make
>   * sure that it doesn't change while being written back.
> @@ -3901,224 +3447,149 @@ int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
>  }
>
>  /*
> - * Unlock a bunch of folios in the pagecache.
> + * Issue a read operation on behalf of the netfs helper functions.  We're asked
> + * to make a read of a certain size at a point in the file.  We are permitted
> + * to only read a portion of that, but as long as we read something, the netfs
> + * helper will call us again so that we can issue another read.
>   */
> -static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
> -{
> -       struct folio *folio;
> -       XA_STATE(xas, &mapping->i_pages, first);
> -
> -       rcu_read_lock();
> -       xas_for_each(&xas, folio, last) {
> -               folio_unlock(folio);
> -       }
> -       rcu_read_unlock();
> -}
> -
> -static void cifs_readahead_complete(struct work_struct *work)
> -{
> -       struct cifs_readdata *rdata = container_of(work,
> -                                                  struct cifs_readdata, work);
> -       struct folio *folio;
> -       pgoff_t last;
> -       bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
> -
> -       XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
> -
> -#if 0
> -       if (good)
> -               cifs_readpage_to_fscache(rdata->mapping->host, page);
> -#endif
> -
> -       if (iov_iter_count(&rdata->iter) > 0)
> -               iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
> -
> -       last = round_down(rdata->offset + rdata->got_bytes - 1, PAGE_SIZE);
> -
> -       xas_for_each(&xas, folio, last) {
> -               if (good) {
> -                       flush_dcache_folio(folio);
> -                       folio_mark_uptodate(folio);
> -               }
> -               folio_unlock(folio);
> -       }
> -
> -       kref_put(&rdata->refcount, cifs_readdata_release);
> -}
> -
> -static void cifs_readahead(struct readahead_control *ractl)
> +static void cifs_req_issue_op(struct netfs_read_subrequest *subreq)
>  {
> -       struct cifsFileInfo *open_file = ractl->file->private_data;
> -       struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
> +       struct netfs_read_request *rreq = subreq->rreq;
>         struct TCP_Server_Info *server;
> +       struct cifs_readdata *rdata;
> +       struct cifsFileInfo *open_file = rreq->netfs_priv;
> +       struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
> +       struct cifs_credits credits_on_stack, *credits = &credits_on_stack;
>         unsigned int xid;
>         pid_t pid;
>         int rc = 0;
> +       unsigned int rsize;
>
>         xid = get_xid();
>
>         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
>                 pid = open_file->pid;
>         else
> -               pid = current->tgid;
> +               pid = current->tgid; // Ummm...  This may be a workqueue
>
>         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
>
> -       cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
> -                __func__, ractl->file, ractl->mapping, readahead_count(ractl));
> -
> -       /*
> -        * Chop the readahead request up into rsize-sized read requests.
> -        */
> -       while (readahead_count(ractl) - ractl->_batch_count) {
> -               unsigned int i, nr_pages, rsize;
> -               struct cifs_readdata *rdata;
> -               struct cifs_credits credits_on_stack;
> -               struct cifs_credits *credits = &credits_on_stack;
> +       cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
> +                __func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
> +                subreq->transferred, subreq->len);
>
> -               if (open_file->invalidHandle) {
> +       if (open_file->invalidHandle) {
> +               do {
>                         rc = cifs_reopen_file(open_file, true);
> -                       if (rc) {
> -                               if (rc == -EAGAIN)
> -                                       continue;
> -                               break;
> -                       }
> -               }
> -
> -               rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
> -                                                  &rsize, credits);
> +               } while (rc == -EAGAIN);
>                 if (rc)
> -                       break;
> -               nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
> -
> -               /*
> -                * Give up immediately if rsize is too small to read an entire
> -                * page. The VFS will fall back to readpage. We should never
> -                * reach this point however since we set ra_pages to 0 when the
> -                * rsize is smaller than a cache page.
> -                */
> -               if (unlikely(!nr_pages)) {
> -                       add_credits_and_wake_if(server, credits, 0);
> -                       break;
> -               }
> -
> -               rdata = cifs_readdata_alloc(cifs_readahead_complete);
> -               if (!rdata) {
> -                       /* best to give up if we're out of mem */
> -                       add_credits_and_wake_if(server, credits, 0);
> -                       break;
> -               }
> +                       goto out;
> +       }
>
> -               rdata->offset   = readahead_pos(ractl);
> -               rdata->bytes    = nr_pages * PAGE_SIZE;
> -               rdata->cfile    = cifsFileInfo_get(open_file);
> -               rdata->server   = server;
> -               rdata->mapping  = ractl->mapping;
> -               rdata->pid      = pid;
> -               rdata->credits  = credits_on_stack;
> +       rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits);
> +       if (rc)
> +               goto out;
>
> -               for (i = 0; i < nr_pages; i++)
> -                       if (!readahead_folio(ractl))
> -                               BUG();
> +       rdata = cifs_readdata_alloc(NULL);
> +       if (!rdata) {
> +               add_credits_and_wake_if(server, credits, 0);
> +               rc = -ENOMEM;
> +               goto out;
> +       }
>
> -               iov_iter_xarray(&rdata->iter, READ, &rdata->mapping->i_pages,
> -                               rdata->offset, rdata->bytes);
> +       __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
> +       rdata->subreq   = subreq;
> +       rdata->cfile    = cifsFileInfo_get(open_file);
> +       rdata->server   = server;
> +       rdata->offset   = subreq->start + subreq->transferred;
> +       rdata->bytes    = subreq->len   - subreq->transferred;
> +       rdata->pid      = pid;
> +       rdata->credits  = credits_on_stack;
> +       rdata->iter     = subreq->iter;
>
> -               rc = adjust_credits(server, &rdata->credits, rdata->bytes);
> -               if (!rc) {
> -                       if (rdata->cfile->invalidHandle)
> -                               rc = -EAGAIN;
> -                       else
> -                               rc = server->ops->async_readv(rdata);
> -               }
> +       rc = adjust_credits(server, &rdata->credits, rdata->bytes);
> +       if (!rc) {
> +               if (rdata->cfile->invalidHandle)
> +                       rc = -EAGAIN;
> +               else
> +                       rc = server->ops->async_readv(rdata);
> +       }
>
> -               if (rc) {
> -                       add_credits_and_wake_if(server, &rdata->credits, 0);
> -                       cifs_unlock_folios(rdata->mapping,
> -                                          rdata->offset / PAGE_SIZE,
> -                                          (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
> -                       /* Fallback to the readpage in error/reconnect cases */
> -                       kref_put(&rdata->refcount, cifs_readdata_release);
> -                       break;
> -               }
> +       if (rc) {
> +               add_credits_and_wake_if(server, &rdata->credits, 0);
> +               /* Fallback to the readpage in error/reconnect cases */
> +               kref_put(&rdata->refcount, cifs_readdata_release);
> +               goto out;
>         }
>
> +       kref_put(&rdata->refcount, cifs_readdata_release);
> +
> +out:
>         free_xid(xid);
> +       if (rc)
> +               netfs_subreq_terminated(subreq, rc, false);
> +}
> +
> +static int cifs_init_rreq(struct netfs_read_request *rreq, struct file *file)
> +{
> +       rreq->netfs_priv = file->private_data;
> +       return 0;
>  }
>
>  /*
> - * cifs_readpage_worker must be called with the page pinned
> + * Expand the size of a readahead to the size of the rsize, if at least as
> + * large as a page, allowing for the possibility that rsize is not pow-2
> + * aligned.
>   */
> -static int cifs_readpage_worker(struct file *file, struct page *page,
> -       loff_t *poffset)
> +static void cifs_expand_readahead(struct netfs_read_request *rreq)
>  {
> -       char *read_data;
> -       int rc;
> +       struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
> +       unsigned int rsize = cifs_sb->ctx->rsize;
> +       loff_t misalignment, i_size = i_size_read(rreq->inode);
>
> -       /* Is the page cached? */
> -       rc = cifs_readpage_from_fscache(file_inode(file), page);
> -       if (rc == 0)
> -               goto read_complete;
> -
> -       read_data = kmap(page);
> -       /* for reads over a certain size could initiate async read ahead */
> -
> -       rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
> -
> -       if (rc < 0)
> -               goto io_error;
> -       else
> -               cifs_dbg(FYI, "Bytes read %d\n", rc);
> +       if (rsize < PAGE_SIZE)
> +               return;
>
> -       /* we do not want atime to be less than mtime, it broke some apps */
> -       file_inode(file)->i_atime = current_time(file_inode(file));
> -       if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
> -               file_inode(file)->i_atime = file_inode(file)->i_mtime;
> +       if (rsize < INT_MAX)
> +               rsize = roundup_pow_of_two(rsize);
>         else
> -               file_inode(file)->i_atime = current_time(file_inode(file));
> +               rsize = ((unsigned int)INT_MAX + 1) / 2;
>
> -       if (PAGE_SIZE > rc)
> -               memset(read_data + rc, 0, PAGE_SIZE - rc);
> -
> -       flush_dcache_page(page);
> -       SetPageUptodate(page);
> -
> -       /* send this page to the cache */
> -       cifs_readpage_to_fscache(file_inode(file), page);
> -
> -       rc = 0;
> -
> -io_error:
> -       kunmap(page);
> -       unlock_page(page);
> +       misalignment = rreq->start & (rsize - 1);
> +       if (misalignment) {
> +               rreq->start -= misalignment;
> +               rreq->len += misalignment;
> +       }
>
> -read_complete:
> -       return rc;
> +       rreq->len = round_up(rreq->len, rsize);
> +       if (rreq->start < i_size && rreq->len > i_size - rreq->start)
> +               rreq->len = i_size - rreq->start;
>  }
>
> -static int cifs_readpage(struct file *file, struct page *page)
> +static void cifs_rreq_done(struct netfs_read_request *rreq)
>  {
> -       loff_t offset = page_file_offset(page);
> -       int rc = -EACCES;
> -       unsigned int xid;
> +       struct inode *inode = rreq->inode;
>
> -       xid = get_xid();
> -
> -       if (file->private_data == NULL) {
> -               rc = -EBADF;
> -               free_xid(xid);
> -               return rc;
> -       }
> -
> -       cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
> -                page, (int)offset, (int)offset);
> -
> -       rc = cifs_readpage_worker(file, page, &offset);
> +       /* we do not want atime to be less than mtime, it broke some apps */
> +       inode->i_atime = current_time(inode);
> +       if (timespec64_compare(&inode->i_atime, &inode->i_mtime))
> +               inode->i_atime = inode->i_mtime;
> +       else
> +               inode->i_atime = current_time(inode);
> +}
>
> -       free_xid(xid);
> -       return rc;
> +static void cifs_req_cleanup(struct address_space *mapping, void *netfs_priv)
> +{
>  }
>
> +const struct netfs_request_ops cifs_req_ops = {
> +       .init_rreq              = cifs_init_rreq,
> +       .expand_readahead       = cifs_expand_readahead,
> +       .issue_op               = cifs_req_issue_op,
> +       .done                   = cifs_rreq_done,
> +       .cleanup                = cifs_req_cleanup,
> +};
> +
>  static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
>  {
>         struct cifsFileInfo *open_file;
> @@ -4168,34 +3639,20 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
>                         loff_t pos, unsigned len, unsigned flags,
>                         struct page **pagep, void **fsdata)
>  {
> -       int oncethru = 0;
> -       pgoff_t index = pos >> PAGE_SHIFT;
> -       loff_t offset = pos & (PAGE_SIZE - 1);
> -       loff_t page_start = pos & PAGE_MASK;
> -       loff_t i_size;
> -       struct page *page;
> -       int rc = 0;
> +       struct folio *folio;
> +       int rc;
>
>         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
>
> -start:
> -       page = grab_cache_page_write_begin(mapping, index, flags);
> -       if (!page) {
> -               rc = -ENOMEM;
> -               goto out;
> -       }
> -
> -       if (PageUptodate(page))
> -               goto out;
> -
> -       /*
> -        * If we write a full page it will be up to date, no need to read from
> -        * the server. If the write is short, we'll end up doing a sync write
> -        * instead.
> +       /* Prefetch area to be written into the cache if we're caching this
> +        * file.  We need to do this before we get a lock on the page in case
> +        * there's more than one writer competing for the same cache block.
>          */
> -       if (len == PAGE_SIZE)
> -               goto out;
> +       rc = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata);
> +       if (rc < 0)
> +               return rc;
>
> +#if 0
>         /*
>          * optimize away the read when we have an oplock, and we're not
>          * expecting to use any of the data we'd be reading in. That
> @@ -4210,34 +3667,17 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
>                                            offset + len,
>                                            PAGE_SIZE);
>                         /*
> -                        * PageChecked means that the parts of the page
> -                        * to which we're not writing are considered up
> -                        * to date. Once the data is copied to the
> -                        * page, it can be set uptodate.
> +                        * Marking a folio checked means that the parts of the
> +                        * page to which we're not writing are considered up to
> +                        * date. Once the data is copied to the page, it can be
> +                        * set uptodate.
>                          */
> -                       SetPageChecked(page);
> +                       folio_set_checked(folio);
>                         goto out;
>                 }
>         }
> -
> -       if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
> -               /*
> -                * might as well read a page, it is fast enough. If we get
> -                * an error, we don't need to return it. cifs_write_end will
> -                * do a sync write instead since PG_uptodate isn't set.
> -                */
> -               cifs_readpage_worker(file, page, &page_start);
> -               put_page(page);
> -               oncethru = 1;
> -               goto start;
> -       } else {
> -               /* we could try using another file handle if there is one -
> -                  but how would we lock it to prevent close of that handle
> -                  racing with this read? In any case
> -                  this will be written out by write_end so is fine */
> -       }
> -out:
> -       *pagep = page;
> +#endif
> +       *pagep = folio_page(folio, (pos - folio_pos(folio)) / PAGE_SIZE);
>         return rc;
>  }
>
> @@ -4429,8 +3869,8 @@ static int cifs_set_page_dirty(struct page *page)
>  #endif
>
>  const struct address_space_operations cifs_addr_ops = {
> -       .readpage = cifs_readpage,
> -       .readahead = cifs_readahead,
> +       .readpage = netfs_readpage,
> +       .readahead = netfs_readahead,
>         .writepage = cifs_writepage,
>         .writepages = cifs_writepages,
>         .write_begin = cifs_write_begin,
> @@ -4455,7 +3895,7 @@ const struct address_space_operations cifs_addr_ops = {
>   * to leave cifs_readpages out of the address space operations.
>   */
>  const struct address_space_operations cifs_addr_ops_smallbuf = {
> -       .readpage = cifs_readpage,
> +       .readpage = netfs_readpage,
>         .writepage = cifs_writepage,
>         .writepages = cifs_writepages,
>         .write_begin = cifs_write_begin,
> diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
> index a7e7e5a97b7f..bb1c3a372de4 100644
> --- a/fs/cifs/fscache.c
> +++ b/fs/cifs/fscache.c
> @@ -134,34 +134,3 @@ void cifs_fscache_release_inode_cookie(struct inode *inode)
>                 cifsi->netfs_ctx.cache = NULL;
>         }
>  }
> -
> -/*
> - * Retrieve a page from FS-Cache
> - */
> -int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
> -{
> -       cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
> -                __func__, cifs_inode_cookie(inode), page, inode);
> -       return -ENOBUFS; // Needs conversion to using netfslib
> -}
> -
> -/*
> - * Retrieve a set of pages from FS-Cache
> - */
> -int __cifs_readpages_from_fscache(struct inode *inode,
> -                               struct address_space *mapping,
> -                               struct list_head *pages,
> -                               unsigned *nr_pages)
> -{
> -       cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
> -                __func__, cifs_inode_cookie(inode), *nr_pages, inode);
> -       return -ENOBUFS; // Needs conversion to using netfslib
> -}
> -
> -void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
> -{
> -       cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
> -                __func__, cifs_inode_cookie(inode), page, inode);
> -
> -       // Needs conversion to using netfslib
> -}
> diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
> index 9f6e42e85d14..fdc03cd7b881 100644
> --- a/fs/cifs/fscache.h
> +++ b/fs/cifs/fscache.h
> @@ -58,14 +58,6 @@ void cifs_fscache_fill_coherency(struct inode *inode,
>  }
>
>
> -extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
> -extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
> -extern int __cifs_readpages_from_fscache(struct inode *,
> -                                        struct address_space *,
> -                                        struct list_head *,
> -                                        unsigned *);
> -extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
> -
>  static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
>  {
>         return netfs_i_cookie(inode);
> @@ -80,33 +72,6 @@ static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags
>                            i_size_read(inode), flags);
>  }
>
> -static inline int cifs_readpage_from_fscache(struct inode *inode,
> -                                            struct page *page)
> -{
> -       if (cifs_inode_cookie(inode))
> -               return __cifs_readpage_from_fscache(inode, page);
> -
> -       return -ENOBUFS;
> -}
> -
> -static inline int cifs_readpages_from_fscache(struct inode *inode,
> -                                             struct address_space *mapping,
> -                                             struct list_head *pages,
> -                                             unsigned *nr_pages)
> -{
> -       if (cifs_inode_cookie(inode))
> -               return __cifs_readpages_from_fscache(inode, mapping, pages,
> -                                                    nr_pages);
> -       return -ENOBUFS;
> -}
> -
> -static inline void cifs_readpage_to_fscache(struct inode *inode,
> -                                           struct page *page)
> -{
> -       if (PageFsCache(page))
> -               __cifs_readpage_to_fscache(inode, page);
> -}
> -
>  #else /* CONFIG_CIFS_FSCACHE */
>  static inline
>  void cifs_fscache_fill_coherency(struct inode *inode,
> @@ -123,23 +88,6 @@ static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool upd
>  static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
>  static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
>
> -static inline int
> -cifs_readpage_from_fscache(struct inode *inode, struct page *page)
> -{
> -       return -ENOBUFS;
> -}
> -
> -static inline int cifs_readpages_from_fscache(struct inode *inode,
> -                                             struct address_space *mapping,
> -                                             struct list_head *pages,
> -                                             unsigned *nr_pages)
> -{
> -       return -ENOBUFS;
> -}
> -
> -static inline void cifs_readpage_to_fscache(struct inode *inode,
> -                       struct page *page) {}
> -
>  #endif /* CONFIG_CIFS_FSCACHE */
>
>  #endif /* _CIFS_FSCACHE_H */
> diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
> index 7d8b3ceb2af3..b6a9ded9fbb2 100644
> --- a/fs/cifs/inode.c
> +++ b/fs/cifs/inode.c
> @@ -26,6 +26,19 @@
>  #include "fs_context.h"
>  #include "cifs_ioctl.h"
>
> +/*
> + * Set parameters for the netfs library
> + */
> +static void cifs_set_netfs_context(struct inode *inode)
> +{
> +       struct netfs_i_context *ctx = netfs_i_context(inode);
> +       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
> +
> +       netfs_i_context_init(inode, &cifs_req_ops);
> +       ctx->rsize = cifs_sb->ctx->rsize;
> +       ctx->wsize = cifs_sb->ctx->wsize;
> +}
> +
>  static void cifs_set_ops(struct inode *inode)
>  {
>         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
> @@ -209,8 +222,10 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
>
>         if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
>                 inode->i_flags |= S_AUTOMOUNT;
> -       if (inode->i_state & I_NEW)
> +       if (inode->i_state & I_NEW) {
> +               cifs_set_netfs_context(inode);
>                 cifs_set_ops(inode);
> +       }
>         return 0;
>  }
>
> diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
> index ebbea7526ee2..0d76cffb4e75 100644
> --- a/fs/cifs/smb2pdu.c
> +++ b/fs/cifs/smb2pdu.c
> @@ -23,6 +23,7 @@
>  #include <linux/uuid.h>
>  #include <linux/pagemap.h>
>  #include <linux/xattr.h>
> +#include <linux/netfs.h>
>  #include "cifsglob.h"
>  #include "cifsacl.h"
>  #include "cifsproto.h"
> @@ -4185,7 +4186,19 @@ smb2_readv_callback(struct mid_q_entry *mid)
>                                      tcon->tid, tcon->ses->Suid,
>                                      rdata->offset, rdata->got_bytes);
>
> -       queue_work(cifsiod_wq, &rdata->work);
> +       if (rdata->result == -ENODATA) {
> +               /* We may have got an EOF error because fallocate
> +                * failed to enlarge the file.
> +                */
> +               if (rdata->subreq->start < rdata->subreq->rreq->i_size)
> +                       rdata->result = 0;
> +       }
> +       if (rdata->result == 0 || rdata->result == -EAGAIN)
> +               iov_iter_advance(&rdata->subreq->iter, rdata->got_bytes);
> +       netfs_subreq_terminated(rdata->subreq,
> +                               (rdata->result == 0 || rdata->result == -EAGAIN) ?
> +                               rdata->got_bytes : rdata->result, false);
> +       kref_put(&rdata->refcount, cifs_readdata_release);
>         DeleteMidQEntry(mid);
>         add_credits(server, &credits, 0);
>  }
> diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
> index df13c9b22ca8..1fa242140dc4 100644
> --- a/fs/netfs/read_helper.c
> +++ b/fs/netfs/read_helper.c
> @@ -553,8 +553,13 @@ static void netfs_rreq_assess_dio(struct netfs_read_request *rreq)
>         list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
>                 if (subreq->error || subreq->transferred == 0)
>                         break;
> -               for (i = 0; i < subreq->bv_count; i++)
> +               for (i = 0; i < subreq->bv_count; i++) {
>                         flush_dcache_page(subreq->bv[i].bv_page);
> +                       // TODO: cifs marks pages in the destination buffer
> +                       // dirty under some circumstances after a read.  Do we
> +                       // need to do that too?
> +                       set_page_dirty(subreq->bv[i].bv_page);
> +               }
>                 transferred += subreq->transferred;
>                 if (subreq->transferred < subreq->len)
>                         break;
>
>
David Howells Feb. 14, 2022, 4:33 p.m. UTC | #2
Rohith Surabattula <rohiths.msft@gmail.com> wrote:

> I have tested netfs integration with fsc mount option enabled. But, I
> observed function "netfs_cache_prepare_read" always returns
> "NETFS_DOWNLOAD_FROM_SERVER" because cres->ops(i.e cachefiles
> operations) is not set.

I see it download from the server and write to the cache:

	# cat /proc/fs/fscache/stats 
	...
	IO     : rd=0 wr=4     <---- no reads, four writes made
	RdHelp : DR=0 RA=4 RP=0 WB=0 WBZ=0 rr=0 sr=0
	RdHelp : ZR=0 sh=0 sk=0
	RdHelp : DL=4 ds=4 df=0 di=0
	RdHelp : RD=0 rs=0 rf=0
	RdHelp : WR=4 ws=4 wf=0

Turning on the cachefiles_vol_coherency tracepoint, I see:

     kworker/2:2-1040    [002] .....   585.499799: cachefiles_vol_coherency: V=00000003 VOL BAD cmp  B=480004
     kworker/2:2-1040    [002] .....   585.499872: cachefiles_vol_coherency: V=00000003 VOL SET ok   B=480005

every time I unmount and mount again.  One of the fields is different each
time.

Using the netfs tracepoints, I can see the download being made from the server
and then the subsequent write to the cache:

          md5sum-4689    [003] .....   887.382290: netfs_read: R=00000005 READAHEAD c=0000004e ni=86 s=0 20000
          md5sum-4689    [003] .....   887.383076: netfs_read: R=00000005 EXPANDED  c=0000004e ni=86 s=0 400000
          md5sum-4689    [003] .....   887.383252: netfs_sreq: R=00000005[0] PREP  DOWN f=01 s=0 0/400000 e=0
          md5sum-4689    [003] .....   887.383252: netfs_sreq: R=00000005[0] SUBMT DOWN f=01 s=0 0/400000 e=0
           cifsd-4687    [002] .....   887.394926: netfs_sreq: R=00000005[0] TERM  DOWN f=03 s=0 400000/400000 e=0
           cifsd-4687    [002] .....   887.394928: netfs_rreq: R=00000005 ASSESS f=22
           cifsd-4687    [002] .....   887.394928: netfs_rreq: R=00000005 UNLOCK f=22
    kworker/u8:4-776     [000] .....   887.395000: netfs_rreq: R=00000005 WRITE  f=02
    kworker/u8:4-776     [000] .....   887.395005: netfs_sreq: R=00000005[0] WRITE DOWN f=03 s=0 400000/400000 e=0
     kworker/3:2-1001    [003] .....   887.627881: netfs_sreq: R=00000005[0] WTERM DOWN f=03 s=0 400000/400000 e=0
     kworker/3:2-1001    [003] .....   887.628163: netfs_rreq: R=00000005 DONE   f=02
     kworker/3:2-1001    [003] .....   887.628165: netfs_sreq: R=00000005[0] FREE  DOWN f=03 s=0 400000/400000 e=0
    kworker/u8:4-776     [000] .....   887.628216: netfs_rreq: R=00000005 FREE   f=02

Can you mount a cifs share with "-o fsc", read a file and then look in
/proc/fs/fscache/cookies and /proc/fs/fscache/stats for me?

David
Rohith Surabattula Feb. 28, 2022, 2:14 p.m. UTC | #3
Hi David,

Below is the trace o/p when mounted with fsc option:
              vi-1631    [000] .....  2519.247539: netfs_read:
R=00000006 READAHEAD c=00000000 ni=0 s=0 1000
              vi-1631    [000] .....  2519.247540: netfs_read:
R=00000006 EXPANDED  c=00000000 ni=0 s=0 1000
              vi-1631    [000] .....  2519.247550: netfs_sreq:
R=00000006[0] PREP  DOWN f=00 s=0 0/100000 e=0
              vi-1631    [000] .....  2519.247551: netfs_sreq:
R=00000006[0] SUBMT DOWN f=00 s=0 0/100000 e=0
           cifsd-1390    [001] .....  2519.287542: netfs_sreq:
R=00000006[0] TERM  DOWN f=02 s=0 100000/100000 e=0
           cifsd-1390    [001] .....  2519.287545: netfs_rreq:
R=00000006 ASSESS f=20
           cifsd-1390    [001] .....  2519.287545: netfs_rreq:
R=00000006 UNLOCK f=20
           cifsd-1390    [001] .....  2519.287571: netfs_rreq:
R=00000006 DONE   f=00
           cifsd-1390    [001] .....  2519.287572: netfs_sreq:
R=00000006[0] FREE  DOWN f=02 s=0 100000/100000 e=0
           cifsd-1390    [001] .....  2519.287573: netfs_rreq:
R=00000006 FREE   f=00

Mount :
root@netfsvm:/sys/kernel/debug/tracing# sudo mount -t cifs
//netfsstg.file.core.windows.net/testshare on /mnt/testshare type cifs
(rw,relatime,vers=3.0,cache=strict,username=netfsstg,uid=0,noforceuid,gid=0,noforcegid,addr=52.239.170.72,file_mode=0777,dir_mode=0777,soft,persistenthandles,nounix,serverino,mapposix,fsc,rsize=1048576,wsize=1048576,bsize=1048576,echo_interval=60,actimeo=1)

I dont see writing fscache. It always downloads from the server.

root@netfsvm:/sys/kernel/debug/tracing# ps -A | grep cache
    450 ?        00:00:00 mkey_cache
   1361 ?        00:00:00 cachefilesd

root@netfsvm:/sys/kernel/debug/tracing# cat /proc/fs/fscache/stats
FS-Cache statistics
Cookies: n=29 v=1 vcol=0 voom=0
Acquire: n=29 ok=29 oom=0
LRU    : n=0 exp=0 rmv=0 drp=0 at=0
Invals : n=0
Updates: n=0 rsz=0 rsn=0
Relinqs: n=0 rtr=0 drop=0
NoSpace: nwr=0 ncr=0 cull=0
IO     : rd=0 wr=0
RdHelp : DR=0 RA=6 RP=0 WB=0 WBZ=7 rr=0 sr=0
RdHelp : ZR=0 sh=0 sk=7
RdHelp : DL=6 ds=6 df=0 di=0
RdHelp : RD=0 rs=0 rf=0
RdHelp : WR=0 ws=0 wf=0

root@netfsvm:/sys/kernel/debug/tracing# cat /proc/fs/fscache/cookies
COOKIE   VOLUME   REF ACT ACC S FL DEF
======== ======== === === === = == ================
00000002 00000001   1   0   0 - 4008 302559bec76a7924,
0a13e961000000000a13e96100000000d01f4719d01f4719
00000003 00000001   1   0   0 - 4000 0000000000640090,
37630162000000003763016200000000e8650f119c49f411
00000004 00000001   1   0   0 - 4000 00000000001800f0,
244e016200000000244e01620000000044975123c042f525
00000005 00000001   1   0   0 - 4000 00000000007000a0,
ea92e96100000000ea92e96100000000acee2035acee2035
00000006 00000001   1   0   0 - 4000 00000000007000c0,
ad92e96100000000ad92e96100000000407da317407da317
00000007 00000001   1   0   0 - 4000 00000000002800e0,
4aeaf361000000004aeaf3610000000078c77b0d6850dc1f
00000008 00000001   1   0   0 - 4008 0000000000140080,
df92136200000000df92136200000000b8e0f30eb8e0f30e
00000009 00000001   1   0   0 - 4008 00000000001400e0,
d39d136200000000d39d136200000000f4e6e51bf4e6e51b
0000000a 00000001   1   0   0 - 4008 0000000000140090,
d99d136200000000d99d136200000000dcd77d28dcd77d28
0000000b 00000001   1   0   0 - 4008 0000000000540080,
cdd21c6200000000cdd21c62000000009c8cd90c9c8cd90c
0000000c 00000001   1   0   0 - 4008 00000000005400c0,
cdd21c6200000000cdd21c6200000000f44b440df44b440d
0000000d 00000001   1   0   0 - 4008 00000000005400a0,
cdd21c6200000000cdd21c62000000005487b50f5487b50f
0000000e 00000001   1   0   0 - 4008 00000000005400e0,
ebd21c6200000000ebd21c6200000000c07c1800c07c1800
0000000f 00000001   1   0   0 - 4008 0000000000540090,
ebd21c6200000000ebd21c620000000094fc730094fc7300
00000010 00000001   1   0   0 - 4008 00000000005400d0,
ebd21c6200000000ebd21c6200000000bcb78902bcb78902
00000011 00000001   1   0   0 - 4008 00000000005400b0,
29d31c620000000029d31c62000000002c02e8252c02e825
00000012 00000001   1   0   0 - 4008 00000000005400f0,
29d31c620000000029d31c6200000000c83fae26c83fae26
00000013 00000001   1   0   0 - 4008 0000000000540088,
29d31c620000000029d31c6200000000e4fcc328e4fcc328
00000014 00000001   1   0   0 - 4008 00000000005400c8,
3bd31c62000000003bd31c6200000000747b780b747b780b
00000015 00000001   1   0   0 - 4008 00000000005400a8,
3bd31c62000000003bd31c6200000000ecf57e0decf57e0d
00000016 00000001   1   0   0 - 4008 00000000005400e8,
b0d51c6200000000b0d51c62000000002005e5092005e509
00000017 00000001   1   0   0 - 4008 0000000000540098,
b0d51c6200000000b0d51c620000000034035f0a34035f0a
00000018 00000001   1   0   0 - 4008 00000000005400d8,
b0d51c6200000000b0d51c62000000001cfdc00c1cfdc00c
00000019 00000001   1   0   0 - 4008 00000000005400b8,
50d61c620000000050d61c62000000004453d0384453d038
0000001a 00000001   1   0   0 - 4008 00000000005400f8,
50d61c620000000050d61c6200000000d4113b39d4113b39
0000001b 00000001   1   0   0 - 4008 0000000000540084,
51d61c620000000051d61c62000000002042020020420200
0000001c 00000001   1   0   0 - 4008 00000000005400c4,
16d71c620000000016d71c62000000009ceb0d019ceb0d01
0000001d 00000001   1   0   0 - 4008 00000000005400a4,
16d71c620000000016d71c6200000000dcae7801dcae7801
0000001e 00000001   1   0   0 - 4008 00000000005400e4,
16d71c620000000016d71c6200000000ec2af903ec2af903

I have enabled below fscache and cachefiles related tracepoints. But
nothing is getting printed in trace o/p.
echo 1 >/sys/kernel/debug/tracing/events/fscache/fscache_access/enable
echo 1 >/sys/kernel/debug/tracing/events/fscache/fscache_active/enable
echo 1 >/sys/kernel/debug/tracing/events/cachefiles/cachefiles_coherency/enable
echo 1 >/sys/kernel/debug/tracing/events/cachefiles/cachefiles_read/enable
echo 1 >/sys/kernel/debug/tracing/events/cachefiles/cachefiles_write/enable
echo 1 >/sys/kernel/debug/tracing/events/cachefiles/cachefiles_io_error/enable
echo 1 >/sys/kernel/debug/tracing/events/cachefiles/cachefiles_vfs_error/enable
echo 1 > events/cachefiles/cachefiles_vol_coherency/enable

Regards,
Rohith

On Mon, Feb 14, 2022 at 10:03 PM David Howells <dhowells@redhat.com> wrote:
>
> Rohith Surabattula <rohiths.msft@gmail.com> wrote:
>
> > I have tested netfs integration with fsc mount option enabled. But, I
> > observed function "netfs_cache_prepare_read" always returns
> > "NETFS_DOWNLOAD_FROM_SERVER" because cres->ops(i.e cachefiles
> > operations) is not set.
>
> I see it download from the server and write to the cache:
>
>         # cat /proc/fs/fscache/stats
>         ...
>         IO     : rd=0 wr=4     <---- no reads, four writes made
>         RdHelp : DR=0 RA=4 RP=0 WB=0 WBZ=0 rr=0 sr=0
>         RdHelp : ZR=0 sh=0 sk=0
>         RdHelp : DL=4 ds=4 df=0 di=0
>         RdHelp : RD=0 rs=0 rf=0
>         RdHelp : WR=4 ws=4 wf=0
>
> Turning on the cachefiles_vol_coherency tracepoint, I see:
>
>      kworker/2:2-1040    [002] .....   585.499799: cachefiles_vol_coherency: V=00000003 VOL BAD cmp  B=480004
>      kworker/2:2-1040    [002] .....   585.499872: cachefiles_vol_coherency: V=00000003 VOL SET ok   B=480005
>
> every time I unmount and mount again.  One of the fields is different each
> time.
>
> Using the netfs tracepoints, I can see the download being made from the server
> and then the subsequent write to the cache:
>
>           md5sum-4689    [003] .....   887.382290: netfs_read: R=00000005 READAHEAD c=0000004e ni=86 s=0 20000
>           md5sum-4689    [003] .....   887.383076: netfs_read: R=00000005 EXPANDED  c=0000004e ni=86 s=0 400000
>           md5sum-4689    [003] .....   887.383252: netfs_sreq: R=00000005[0] PREP  DOWN f=01 s=0 0/400000 e=0
>           md5sum-4689    [003] .....   887.383252: netfs_sreq: R=00000005[0] SUBMT DOWN f=01 s=0 0/400000 e=0
>            cifsd-4687    [002] .....   887.394926: netfs_sreq: R=00000005[0] TERM  DOWN f=03 s=0 400000/400000 e=0
>            cifsd-4687    [002] .....   887.394928: netfs_rreq: R=00000005 ASSESS f=22
>            cifsd-4687    [002] .....   887.394928: netfs_rreq: R=00000005 UNLOCK f=22
>     kworker/u8:4-776     [000] .....   887.395000: netfs_rreq: R=00000005 WRITE  f=02
>     kworker/u8:4-776     [000] .....   887.395005: netfs_sreq: R=00000005[0] WRITE DOWN f=03 s=0 400000/400000 e=0
>      kworker/3:2-1001    [003] .....   887.627881: netfs_sreq: R=00000005[0] WTERM DOWN f=03 s=0 400000/400000 e=0
>      kworker/3:2-1001    [003] .....   887.628163: netfs_rreq: R=00000005 DONE   f=02
>      kworker/3:2-1001    [003] .....   887.628165: netfs_sreq: R=00000005[0] FREE  DOWN f=03 s=0 400000/400000 e=0
>     kworker/u8:4-776     [000] .....   887.628216: netfs_rreq: R=00000005 FREE   f=02
>
> Can you mount a cifs share with "-o fsc", read a file and then look in
> /proc/fs/fscache/cookies and /proc/fs/fscache/stats for me?
>
> David
>
David Howells Feb. 28, 2022, 2:28 p.m. UTC | #4
Rohith Surabattula <rohiths.msft@gmail.com> wrote:

> R=00000006 READAHEAD c=00000000 ni=0 s=0 1000
>               vi-1631    [000] .....  2519.247540: netfs_read:

"c=00000000" would indicate that no fscache cookie was allocated for this
inode.

> COOKIE   VOLUME   REF ACT ACC S FL DEF
> ======== ======== === === === = == ================
> 00000002 00000001   1   0   0 - 4008 302559bec76a7924,
> 0a13e961000000000a13e96100000000d01f4719d01f4719
> 00000003 00000001   1   0   0 - 4000 0000000000640090,
> 37630162000000003763016200000000e8650f119c49f411

But we can see some cookies have been allocated.

Can you turn on:

  echo 1 >/sys/kernel/debug/tracing/events/fscache/fscache_acquire/enable

David
diff mbox series

Patch

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 3b7e3b9e4fd2..c47e2d3a101f 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,7 @@ 
 config CIFS
 	tristate "SMB3 and CIFS support (advanced network filesystem)"
 	depends on INET
+	select NETFS_SUPPORT
 	select NLS
 	select CRYPTO
 	select CRYPTO_MD5
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index a56cb9c8c5ff..bd06df3bb24b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -936,7 +936,7 @@  cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	struct inode *inode = file_inode(iocb->ki_filp);
 
 	if (iocb->ki_flags & IOCB_DIRECT)
-		return cifs_user_readv(iocb, iter);
+		return netfs_direct_read_iter(iocb, iter);
 
 	rc = cifs_revalidate_mapping(inode);
 	if (rc)
@@ -1314,7 +1314,7 @@  const struct file_operations cifs_file_strict_ops = {
 };
 
 const struct file_operations cifs_file_direct_ops = {
-	.read_iter = cifs_direct_readv,
+	.read_iter = netfs_direct_read_iter,
 	.write_iter = cifs_direct_writev,
 	.open = cifs_open,
 	.release = cifs_close,
@@ -1370,7 +1370,7 @@  const struct file_operations cifs_file_strict_nobrl_ops = {
 };
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
-	.read_iter = cifs_direct_readv,
+	.read_iter = netfs_direct_read_iter,
 	.write_iter = cifs_direct_writev,
 	.open = cifs_open,
 	.release = cifs_close,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 1c77bbc0815f..c7d5c268fc47 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -85,6 +85,7 @@  extern const struct inode_operations cifs_dfs_referral_inode_operations;
 
 
 /* Functions related to files and directories */
+extern const struct netfs_request_ops cifs_req_ops;
 extern const struct file_operations cifs_file_ops;
 extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */
 extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */
@@ -94,8 +95,6 @@  extern const struct file_operations cifs_file_strict_nobrl_ops;
 extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
-extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
-extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
 extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3a4fed645636..938e4e9827ed 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1313,18 +1313,14 @@  struct cifs_aio_ctx {
 
 /* asynchronous read support */
 struct cifs_readdata {
+	struct netfs_read_subrequest	*subreq;
 	struct kref			refcount;
-	struct list_head		list;
-	struct completion		done;
 	struct cifsFileInfo		*cfile;
-	struct address_space		*mapping;
-	struct cifs_aio_ctx		*ctx;
 	__u64				offset;
 	ssize_t				got_bytes;
 	unsigned int			bytes;
 	pid_t				pid;
 	int				result;
-	struct work_struct		work;
 	struct iov_iter			iter;
 	struct kvec			iov[2];
 	struct TCP_Server_Info		*server;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 38e7276352e2..c9fb77a8b31b 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -23,6 +23,7 @@ 
 #include <linux/swap.h>
 #include <linux/task_io_accounting_ops.h>
 #include <linux/uaccess.h>
+#include <linux/netfs.h>
 #include "cifspdu.h"
 #include "cifsfs.h"
 #include "cifsglob.h"
@@ -1609,7 +1610,13 @@  cifs_readv_callback(struct mid_q_entry *mid)
 		rdata->result = -EIO;
 	}
 
-	queue_work(cifsiod_wq, &rdata->work);
+	if (rdata->result == 0 || rdata->result == -EAGAIN)
+		iov_iter_advance(&rdata->subreq->iter, rdata->got_bytes);
+	netfs_subreq_terminated(rdata->subreq,
+				(rdata->result == 0 || rdata->result == -EAGAIN) ?
+				rdata->got_bytes : rdata->result,
+				false);
+	kref_put(&rdata->refcount, cifs_readdata_release);
 	DeleteMidQEntry(mid);
 	add_credits(server, &credits, 0);
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index f9b9a1562e17..36559de02e37 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -21,6 +21,7 @@ 
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/mm.h>
+#include <linux/netfs.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -3306,12 +3307,8 @@  static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
 	struct cifs_readdata *rdata;
 
 	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
-	if (rdata) {
+	if (rdata)
 		kref_init(&rdata->refcount);
-		INIT_LIST_HEAD(&rdata->list);
-		init_completion(&rdata->done);
-		INIT_WORK(&rdata->work, complete);
-	}
 
 	return rdata;
 }
@@ -3322,8 +3319,6 @@  cifs_readdata_release(struct kref *refcount)
 	struct cifs_readdata *rdata = container_of(refcount,
 					struct cifs_readdata, refcount);
 
-	if (rdata->ctx)
-		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 	if (rdata->mr) {
 		smbd_deregister_mr(rdata->mr);
@@ -3336,370 +3331,6 @@  cifs_readdata_release(struct kref *refcount)
 	kfree(rdata);
 }
 
-static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
-
-static void
-cifs_uncached_readv_complete(struct work_struct *work)
-{
-	struct cifs_readdata *rdata = container_of(work,
-						struct cifs_readdata, work);
-
-	complete(&rdata->done);
-	collect_uncached_read_data(rdata->ctx);
-	/* the below call can possibly free the last ref to aio ctx */
-	kref_put(&rdata->refcount, cifs_readdata_release);
-}
-
-static int cifs_resend_rdata(struct cifs_readdata *rdata,
-			struct list_head *rdata_list,
-			struct cifs_aio_ctx *ctx)
-{
-	unsigned int rsize;
-	struct cifs_credits credits;
-	int rc;
-	struct TCP_Server_Info *server;
-
-	/* XXX: should we pick a new channel here? */
-	server = rdata->server;
-
-	do {
-		if (rdata->cfile->invalidHandle) {
-			rc = cifs_reopen_file(rdata->cfile, true);
-			if (rc == -EAGAIN)
-				continue;
-			else if (rc)
-				break;
-		}
-
-		/*
-		 * Wait for credits to resend this rdata.
-		 * Note: we are attempting to resend the whole rdata not in
-		 * segments
-		 */
-		do {
-			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
-						&rsize, &credits);
-
-			if (rc)
-				goto fail;
-
-			if (rsize < rdata->bytes) {
-				add_credits_and_wake_if(server, &credits, 0);
-				msleep(1000);
-			}
-		} while (rsize < rdata->bytes);
-		rdata->credits = credits;
-
-		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
-		if (!rc) {
-			if (rdata->cfile->invalidHandle)
-				rc = -EAGAIN;
-			else {
-#ifdef CONFIG_CIFS_SMB_DIRECT
-				if (rdata->mr) {
-					rdata->mr->need_invalidate = true;
-					smbd_deregister_mr(rdata->mr);
-					rdata->mr = NULL;
-				}
-#endif
-				rc = server->ops->async_readv(rdata);
-			}
-		}
-
-		/* If the read was successfully sent, we are done */
-		if (!rc) {
-			/* Add to aio pending list */
-			list_add_tail(&rdata->list, rdata_list);
-			return 0;
-		}
-
-		/* Roll back credits and retry if needed */
-		add_credits_and_wake_if(server, &rdata->credits, 0);
-	} while (rc == -EAGAIN);
-
-fail:
-	kref_put(&rdata->refcount, cifs_readdata_release);
-	return rc;
-}
-
-static int
-cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
-		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
-		     struct cifs_aio_ctx *ctx)
-{
-	struct cifs_readdata *rdata;
-	unsigned int rsize;
-	struct cifs_credits credits_on_stack;
-	struct cifs_credits *credits = &credits_on_stack;
-	size_t cur_len;
-	int rc;
-	pid_t pid;
-	struct TCP_Server_Info *server;
-
-	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
-
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
-		pid = open_file->pid;
-	else
-		pid = current->tgid;
-
-	do {
-		if (open_file->invalidHandle) {
-			rc = cifs_reopen_file(open_file, true);
-			if (rc == -EAGAIN)
-				continue;
-			else if (rc)
-				break;
-		}
-
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
-						   &rsize, credits);
-		if (rc)
-			break;
-
-		cur_len = min_t(const size_t, len, rsize);
-
-		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
-		if (!rdata) {
-			add_credits_and_wake_if(server, credits, 0);
-			rc = -ENOMEM;
-			break;
-		}
-
-		rdata->server	= server;
-		rdata->cfile	= cifsFileInfo_get(open_file);
-		rdata->offset	= offset;
-		rdata->bytes	= cur_len;
-		rdata->pid	= pid;
-		rdata->credits	= credits_on_stack;
-		rdata->ctx	= ctx;
-		kref_get(&ctx->refcount);
-
-		rdata->iter	= ctx->iter;
-		iov_iter_advance(&rdata->iter, offset - ctx->pos);
-		iov_iter_truncate(&rdata->iter, cur_len);
-
-		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
-
-		if (!rc) {
-			if (rdata->cfile->invalidHandle)
-				rc = -EAGAIN;
-			else
-				rc = server->ops->async_readv(rdata);
-		}
-
-		if (rc) {
-			add_credits_and_wake_if(server, &rdata->credits, 0);
-			kref_put(&rdata->refcount, cifs_readdata_release);
-			if (rc == -EAGAIN)
-				continue;
-			break;
-		}
-
-		list_add_tail(&rdata->list, rdata_list);
-		offset += cur_len;
-		len -= cur_len;
-	} while (len > 0);
-
-	return rc;
-}
-
-static void
-collect_uncached_read_data(struct cifs_aio_ctx *ctx)
-{
-	struct cifs_readdata *rdata, *tmp;
-	struct iov_iter *to = &ctx->iter;
-	struct cifs_sb_info *cifs_sb;
-	int rc;
-
-	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
-
-	mutex_lock(&ctx->aio_mutex);
-
-	if (list_empty(&ctx->list)) {
-		mutex_unlock(&ctx->aio_mutex);
-		return;
-	}
-
-	rc = ctx->rc;
-	/* the loop below should proceed in the order of increasing offsets */
-again:
-	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
-		if (!rc) {
-			if (!try_wait_for_completion(&rdata->done)) {
-				mutex_unlock(&ctx->aio_mutex);
-				return;
-			}
-
-			if (rdata->result == -EAGAIN) {
-				/* resend call if it's a retryable error */
-				struct list_head tmp_list;
-				unsigned int got_bytes = rdata->got_bytes;
-
-				list_del_init(&rdata->list);
-				INIT_LIST_HEAD(&tmp_list);
-
-				if (ctx->direct_io) {
-					/*
-					 * Re-use rdata as this is a
-					 * direct I/O
-					 */
-					rc = cifs_resend_rdata(
-						rdata,
-						&tmp_list, ctx);
-				} else {
-					rc = cifs_send_async_read(
-						rdata->offset + got_bytes,
-						rdata->bytes - got_bytes,
-						rdata->cfile, cifs_sb,
-						&tmp_list, ctx);
-
-					kref_put(&rdata->refcount,
-						cifs_readdata_release);
-				}
-
-				list_splice(&tmp_list, &ctx->list);
-
-				goto again;
-			} else if (rdata->result)
-				rc = rdata->result;
-
-			/* if there was a short read -- discard anything left */
-			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
-				rc = -ENODATA;
-
-			ctx->total_len += rdata->got_bytes;
-		}
-		list_del_init(&rdata->list);
-		kref_put(&rdata->refcount, cifs_readdata_release);
-	}
-
-	if (!ctx->direct_io)
-		ctx->total_len = ctx->len - iov_iter_count(to);
-
-	/* mask nodata case */
-	if (rc == -ENODATA)
-		rc = 0;
-
-	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
-
-	mutex_unlock(&ctx->aio_mutex);
-
-	if (ctx->iocb && ctx->iocb->ki_complete)
-		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
-	else
-		complete(&ctx->done);
-}
-
-static ssize_t __cifs_readv(
-	struct kiocb *iocb, struct iov_iter *to, bool direct)
-{
-	size_t len;
-	struct file *file = iocb->ki_filp;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsFileInfo *cfile;
-	struct cifs_tcon *tcon;
-	ssize_t rc, total_read = 0;
-	loff_t offset = iocb->ki_pos;
-	struct cifs_aio_ctx *ctx;
-
-	/*
-	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
-	 * fall back to data copy read path
-	 * this could be improved by getting pages directly in ITER_KVEC
-	 */
-	if (direct && iov_iter_is_kvec(to)) {
-		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
-		direct = false;
-	}
-
-	len = iov_iter_count(to);
-	if (!len)
-		return 0;
-
-	cifs_sb = CIFS_FILE_SB(file);
-	cfile = file->private_data;
-	tcon = tlink_tcon(cfile->tlink);
-
-	if (!tcon->ses->server->ops->async_readv)
-		return -ENOSYS;
-
-	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-		cifs_dbg(FYI, "attempting read on write only file instance\n");
-
-	ctx = cifs_aio_ctx_alloc();
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->pos	= offset;
-	ctx->direct_io	= direct;
-	ctx->len	= len;
-	ctx->cfile	= cifsFileInfo_get(cfile);
-
-	if (!is_sync_kiocb(iocb))
-		ctx->iocb = iocb;
-
-	if (iter_is_iovec(to))
-		ctx->should_dirty = true;
-
-	rc = extract_iter_to_iter(to, len, &ctx->iter, &ctx->bv);
-	if (rc < 0) {
-		kref_put(&ctx->refcount, cifs_aio_ctx_release);
-		return rc;
-	}
-	ctx->npages = rc;
-
-	/* grab a lock here due to read response handlers can access ctx */
-	mutex_lock(&ctx->aio_mutex);
-
-	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
-
-	/* if at least one read request send succeeded, then reset rc */
-	if (!list_empty(&ctx->list))
-		rc = 0;
-
-	mutex_unlock(&ctx->aio_mutex);
-
-	if (rc) {
-		kref_put(&ctx->refcount, cifs_aio_ctx_release);
-		return rc;
-	}
-
-	if (!is_sync_kiocb(iocb)) {
-		kref_put(&ctx->refcount, cifs_aio_ctx_release);
-		return -EIOCBQUEUED;
-	}
-
-	rc = wait_for_completion_killable(&ctx->done);
-	if (rc) {
-		mutex_lock(&ctx->aio_mutex);
-		ctx->rc = rc = -EINTR;
-		total_read = ctx->total_len;
-		mutex_unlock(&ctx->aio_mutex);
-	} else {
-		rc = ctx->rc;
-		total_read = ctx->total_len;
-	}
-
-	kref_put(&ctx->refcount, cifs_aio_ctx_release);
-
-	if (total_read) {
-		iocb->ki_pos += total_read;
-		return total_read;
-	}
-	return rc;
-}
-
-ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
-{
-	return __cifs_readv(iocb, to, true);
-}
-
-ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
-{
-	return __cifs_readv(iocb, to, false);
-}
-
 ssize_t
 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -3720,12 +3351,15 @@  cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 	 * pos+len-1.
 	 */
 	if (!CIFS_CACHE_READ(cinode))
-		return cifs_user_readv(iocb, to);
+		return netfs_direct_read_iter(iocb, to);
 
 	if (cap_unix(tcon->ses) &&
 	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
-	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
+		if (iocb->ki_flags & IOCB_DIRECT)
+			return netfs_direct_read_iter(iocb, to);
 		return generic_file_read_iter(iocb, to);
+	}
 
 	/*
 	 * We need to hold the sem to be sure nobody modifies lock list
@@ -3734,104 +3368,16 @@  cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 	down_read(&cinode->lock_sem);
 	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
 				     tcon->ses->server->vals->shared_lock_type,
-				     0, NULL, CIFS_READ_OP))
-		rc = generic_file_read_iter(iocb, to);
+				     0, NULL, CIFS_READ_OP)) {
+		if (iocb->ki_flags & IOCB_DIRECT)
+			rc = netfs_direct_read_iter(iocb, to);
+		else
+			rc = generic_file_read_iter(iocb, to);
+	}
 	up_read(&cinode->lock_sem);
 	return rc;
 }
 
-static ssize_t
-cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
-{
-	int rc = -EACCES;
-	unsigned int bytes_read = 0;
-	unsigned int total_read;
-	unsigned int current_read_size;
-	unsigned int rsize;
-	struct cifs_sb_info *cifs_sb;
-	struct cifs_tcon *tcon;
-	struct TCP_Server_Info *server;
-	unsigned int xid;
-	char *cur_offset;
-	struct cifsFileInfo *open_file;
-	struct cifs_io_parms io_parms = {0};
-	int buf_type = CIFS_NO_BUFFER;
-	__u32 pid;
-
-	xid = get_xid();
-	cifs_sb = CIFS_FILE_SB(file);
-
-	/* FIXME: set up handlers for larger reads and/or convert to async */
-	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
-
-	if (file->private_data == NULL) {
-		rc = -EBADF;
-		free_xid(xid);
-		return rc;
-	}
-	open_file = file->private_data;
-	tcon = tlink_tcon(open_file->tlink);
-	server = cifs_pick_channel(tcon->ses);
-
-	if (!server->ops->sync_read) {
-		free_xid(xid);
-		return -ENOSYS;
-	}
-
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
-		pid = open_file->pid;
-	else
-		pid = current->tgid;
-
-	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-		cifs_dbg(FYI, "attempting read on write only file instance\n");
-
-	for (total_read = 0, cur_offset = read_data; read_size > total_read;
-	     total_read += bytes_read, cur_offset += bytes_read) {
-		do {
-			current_read_size = min_t(uint, read_size - total_read,
-						  rsize);
-			/*
-			 * For windows me and 9x we do not want to request more
-			 * than it negotiated since it will refuse the read
-			 * then.
-			 */
-			if (!(tcon->ses->capabilities &
-				tcon->ses->server->vals->cap_large_files)) {
-				current_read_size = min_t(uint,
-					current_read_size, CIFSMaxBufSize);
-			}
-			if (open_file->invalidHandle) {
-				rc = cifs_reopen_file(open_file, true);
-				if (rc != 0)
-					break;
-			}
-			io_parms.pid = pid;
-			io_parms.tcon = tcon;
-			io_parms.offset = *offset;
-			io_parms.length = current_read_size;
-			io_parms.server = server;
-			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
-						    &bytes_read, &cur_offset,
-						    &buf_type);
-		} while (rc == -EAGAIN);
-
-		if (rc || (bytes_read == 0)) {
-			if (total_read) {
-				break;
-			} else {
-				free_xid(xid);
-				return rc;
-			}
-		} else {
-			cifs_stats_bytes_read(tcon, total_read);
-			*offset += bytes_read;
-		}
-	}
-	free_xid(xid);
-	return total_read;
-}
-
 /*
  * If the page is mmap'ed into a process' page tables, then we need to make
  * sure that it doesn't change while being written back.
@@ -3901,224 +3447,149 @@  int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 }
 
 /*
- * Unlock a bunch of folios in the pagecache.
+ * Issue a read operation on behalf of the netfs helper functions.  We're asked
+ * to make a read of a certain size at a point in the file.  We are permitted
+ * to only read a portion of that, but as long as we read something, the netfs
+ * helper will call us again so that we can issue another read.
  */
-static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
-{
-       struct folio *folio;
-       XA_STATE(xas, &mapping->i_pages, first);
-
-       rcu_read_lock();
-       xas_for_each(&xas, folio, last) {
-               folio_unlock(folio);
-       }
-       rcu_read_unlock();
-}
-
-static void cifs_readahead_complete(struct work_struct *work)
-{
-	struct cifs_readdata *rdata = container_of(work,
-						   struct cifs_readdata, work);
-	struct folio *folio;
-	pgoff_t last;
-	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
-
-	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
-
-#if 0
-	if (good)
-		cifs_readpage_to_fscache(rdata->mapping->host, page);
-#endif
-
-	if (iov_iter_count(&rdata->iter) > 0)
-		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
-
-	last = round_down(rdata->offset + rdata->got_bytes - 1, PAGE_SIZE);
-
-	xas_for_each(&xas, folio, last) {
-		if (good) {
-			flush_dcache_folio(folio);
-			folio_mark_uptodate(folio);
-		}
-		folio_unlock(folio);
-	}
-
-	kref_put(&rdata->refcount, cifs_readdata_release);
-}
-
-static void cifs_readahead(struct readahead_control *ractl)
+static void cifs_req_issue_op(struct netfs_read_subrequest *subreq)
 {
-	struct cifsFileInfo *open_file = ractl->file->private_data;
-	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
+	struct netfs_read_request *rreq = subreq->rreq;
 	struct TCP_Server_Info *server;
+	struct cifs_readdata *rdata;
+	struct cifsFileInfo *open_file = rreq->netfs_priv;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
+	struct cifs_credits credits_on_stack, *credits = &credits_on_stack;
 	unsigned int xid;
 	pid_t pid;
 	int rc = 0;
+	unsigned int rsize;
 
 	xid = get_xid();
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
 		pid = open_file->pid;
 	else
-		pid = current->tgid;
+		pid = current->tgid; // Ummm...  This may be a workqueue
 
 	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
 
-	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
-		 __func__, ractl->file, ractl->mapping, readahead_count(ractl));
-
-	/*
-	 * Chop the readahead request up into rsize-sized read requests.
-	 */
-	while (readahead_count(ractl) - ractl->_batch_count) {
-		unsigned int i, nr_pages, rsize;
-		struct cifs_readdata *rdata;
-		struct cifs_credits credits_on_stack;
-		struct cifs_credits *credits = &credits_on_stack;
+	cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n",
+		 __func__, rreq->debug_id, subreq->debug_index, rreq->mapping,
+		 subreq->transferred, subreq->len);
 
-		if (open_file->invalidHandle) {
+	if (open_file->invalidHandle) {
+		do {
 			rc = cifs_reopen_file(open_file, true);
-			if (rc) {
-				if (rc == -EAGAIN)
-					continue;
-				break;
-			}
-		}
-
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
-						   &rsize, credits);
+		} while (rc == -EAGAIN);
 		if (rc)
-			break;
-		nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
-
-		/*
-		 * Give up immediately if rsize is too small to read an entire
-		 * page. The VFS will fall back to readpage. We should never
-		 * reach this point however since we set ra_pages to 0 when the
-		 * rsize is smaller than a cache page.
-		 */
-		if (unlikely(!nr_pages)) {
-			add_credits_and_wake_if(server, credits, 0);
-			break;
-		}
-
-		rdata = cifs_readdata_alloc(cifs_readahead_complete);
-		if (!rdata) {
-			/* best to give up if we're out of mem */
-			add_credits_and_wake_if(server, credits, 0);
-			break;
-		}
+			goto out;
+	}
 
-		rdata->offset	= readahead_pos(ractl);
-		rdata->bytes	= nr_pages * PAGE_SIZE;
-		rdata->cfile	= cifsFileInfo_get(open_file);
-		rdata->server	= server;
-		rdata->mapping	= ractl->mapping;
-		rdata->pid	= pid;
-		rdata->credits	= credits_on_stack;
+	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits);
+	if (rc)
+		goto out;
 
-		for (i = 0; i < nr_pages; i++)
-			if (!readahead_folio(ractl))
-				BUG();
+	rdata = cifs_readdata_alloc(NULL);
+	if (!rdata) {
+		add_credits_and_wake_if(server, credits, 0);
+		rc = -ENOMEM;
+		goto out;
+	}
 
-		iov_iter_xarray(&rdata->iter, READ, &rdata->mapping->i_pages,
-				rdata->offset, rdata->bytes);
+	__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+	rdata->subreq	= subreq;
+	rdata->cfile	= cifsFileInfo_get(open_file);
+	rdata->server	= server;
+	rdata->offset	= subreq->start + subreq->transferred;
+	rdata->bytes	= subreq->len   - subreq->transferred;
+	rdata->pid	= pid;
+	rdata->credits	= credits_on_stack;
+	rdata->iter	= subreq->iter;
 
-		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
-		if (!rc) {
-			if (rdata->cfile->invalidHandle)
-				rc = -EAGAIN;
-			else
-				rc = server->ops->async_readv(rdata);
-		}
+	rc = adjust_credits(server, &rdata->credits, rdata->bytes);
+	if (!rc) {
+		if (rdata->cfile->invalidHandle)
+			rc = -EAGAIN;
+		else
+			rc = server->ops->async_readv(rdata);
+	}
 
-		if (rc) {
-			add_credits_and_wake_if(server, &rdata->credits, 0);
-			cifs_unlock_folios(rdata->mapping,
-					   rdata->offset / PAGE_SIZE,
-					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
-			/* Fallback to the readpage in error/reconnect cases */
-			kref_put(&rdata->refcount, cifs_readdata_release);
-			break;
-		}
+	if (rc) {
+		add_credits_and_wake_if(server, &rdata->credits, 0);
+		/* Fallback to the readpage in error/reconnect cases */
+		kref_put(&rdata->refcount, cifs_readdata_release);
+		goto out;
 	}
 
+	kref_put(&rdata->refcount, cifs_readdata_release);
+
+out:
 	free_xid(xid);
+	if (rc)
+		netfs_subreq_terminated(subreq, rc, false);
+}
+
+static int cifs_init_rreq(struct netfs_read_request *rreq, struct file *file)
+{
+	rreq->netfs_priv = file->private_data;
+	return 0;
 }
 
 /*
- * cifs_readpage_worker must be called with the page pinned
+ * Expand the size of a readahead to the size of the rsize, if at least as
+ * large as a page, allowing for the possibility that rsize is not pow-2
+ * aligned.
  */
-static int cifs_readpage_worker(struct file *file, struct page *page,
-	loff_t *poffset)
+static void cifs_expand_readahead(struct netfs_read_request *rreq)
 {
-	char *read_data;
-	int rc;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb);
+	unsigned int rsize = cifs_sb->ctx->rsize;
+	loff_t misalignment, i_size = i_size_read(rreq->inode);
 
-	/* Is the page cached? */
-	rc = cifs_readpage_from_fscache(file_inode(file), page);
-	if (rc == 0)
-		goto read_complete;
-
-	read_data = kmap(page);
-	/* for reads over a certain size could initiate async read ahead */
-
-	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
-
-	if (rc < 0)
-		goto io_error;
-	else
-		cifs_dbg(FYI, "Bytes read %d\n", rc);
+	if (rsize < PAGE_SIZE)
+		return;
 
-	/* we do not want atime to be less than mtime, it broke some apps */
-	file_inode(file)->i_atime = current_time(file_inode(file));
-	if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
-		file_inode(file)->i_atime = file_inode(file)->i_mtime;
+	if (rsize < INT_MAX)
+		rsize = roundup_pow_of_two(rsize);
 	else
-		file_inode(file)->i_atime = current_time(file_inode(file));
+		rsize = ((unsigned int)INT_MAX + 1) / 2;
 
-	if (PAGE_SIZE > rc)
-		memset(read_data + rc, 0, PAGE_SIZE - rc);
-
-	flush_dcache_page(page);
-	SetPageUptodate(page);
-
-	/* send this page to the cache */
-	cifs_readpage_to_fscache(file_inode(file), page);
-
-	rc = 0;
-
-io_error:
-	kunmap(page);
-	unlock_page(page);
+	misalignment = rreq->start & (rsize - 1);
+	if (misalignment) {
+		rreq->start -= misalignment;
+		rreq->len += misalignment;
+	}
 
-read_complete:
-	return rc;
+	rreq->len = round_up(rreq->len, rsize);
+	if (rreq->start < i_size && rreq->len > i_size - rreq->start)
+		rreq->len = i_size - rreq->start;
 }
 
-static int cifs_readpage(struct file *file, struct page *page)
+static void cifs_rreq_done(struct netfs_read_request *rreq)
 {
-	loff_t offset = page_file_offset(page);
-	int rc = -EACCES;
-	unsigned int xid;
+	struct inode *inode = rreq->inode;
 
-	xid = get_xid();
-
-	if (file->private_data == NULL) {
-		rc = -EBADF;
-		free_xid(xid);
-		return rc;
-	}
-
-	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
-		 page, (int)offset, (int)offset);
-
-	rc = cifs_readpage_worker(file, page, &offset);
+	/* we do not want atime to be less than mtime, it broke some apps */
+	inode->i_atime = current_time(inode);
+	if (timespec64_compare(&inode->i_atime, &inode->i_mtime))
+		inode->i_atime = inode->i_mtime;
+	else
+		inode->i_atime = current_time(inode);
+}
 
-	free_xid(xid);
-	return rc;
+static void cifs_req_cleanup(struct address_space *mapping, void *netfs_priv)
+{
 }
 
+const struct netfs_request_ops cifs_req_ops = {
+	.init_rreq		= cifs_init_rreq,
+	.expand_readahead	= cifs_expand_readahead,
+	.issue_op		= cifs_req_issue_op,
+	.done			= cifs_rreq_done,
+	.cleanup		= cifs_req_cleanup,
+};
+
 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
 {
 	struct cifsFileInfo *open_file;
@@ -4168,34 +3639,20 @@  static int cifs_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
-	int oncethru = 0;
-	pgoff_t index = pos >> PAGE_SHIFT;
-	loff_t offset = pos & (PAGE_SIZE - 1);
-	loff_t page_start = pos & PAGE_MASK;
-	loff_t i_size;
-	struct page *page;
-	int rc = 0;
+	struct folio *folio;
+	int rc;
 
 	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
 
-start:
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page) {
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	if (PageUptodate(page))
-		goto out;
-
-	/*
-	 * If we write a full page it will be up to date, no need to read from
-	 * the server. If the write is short, we'll end up doing a sync write
-	 * instead.
+	/* Prefetch area to be written into the cache if we're caching this
+	 * file.  We need to do this before we get a lock on the page in case
+	 * there's more than one writer competing for the same cache block.
 	 */
-	if (len == PAGE_SIZE)
-		goto out;
+	rc = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata);
+	if (rc < 0)
+		return rc;
 
+#if 0
 	/*
 	 * optimize away the read when we have an oplock, and we're not
 	 * expecting to use any of the data we'd be reading in. That
@@ -4210,34 +3667,17 @@  static int cifs_write_begin(struct file *file, struct address_space *mapping,
 					   offset + len,
 					   PAGE_SIZE);
 			/*
-			 * PageChecked means that the parts of the page
-			 * to which we're not writing are considered up
-			 * to date. Once the data is copied to the
-			 * page, it can be set uptodate.
+			 * Marking a folio checked means that the parts of the
+			 * page to which we're not writing are considered up to
+			 * date. Once the data is copied to the page, it can be
+			 * set uptodate.
 			 */
-			SetPageChecked(page);
+			folio_set_checked(folio);
 			goto out;
 		}
 	}
-
-	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
-		/*
-		 * might as well read a page, it is fast enough. If we get
-		 * an error, we don't need to return it. cifs_write_end will
-		 * do a sync write instead since PG_uptodate isn't set.
-		 */
-		cifs_readpage_worker(file, page, &page_start);
-		put_page(page);
-		oncethru = 1;
-		goto start;
-	} else {
-		/* we could try using another file handle if there is one -
-		   but how would we lock it to prevent close of that handle
-		   racing with this read? In any case
-		   this will be written out by write_end so is fine */
-	}
-out:
-	*pagep = page;
+#endif
+	*pagep = folio_page(folio, (pos - folio_pos(folio)) / PAGE_SIZE);
 	return rc;
 }
 
@@ -4429,8 +3869,8 @@  static int cifs_set_page_dirty(struct page *page)
 #endif
 
 const struct address_space_operations cifs_addr_ops = {
-	.readpage = cifs_readpage,
-	.readahead = cifs_readahead,
+	.readpage = netfs_readpage,
+	.readahead = netfs_readahead,
 	.writepage = cifs_writepage,
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
@@ -4455,7 +3895,7 @@  const struct address_space_operations cifs_addr_ops = {
  * to leave cifs_readpages out of the address space operations.
  */
 const struct address_space_operations cifs_addr_ops_smallbuf = {
-	.readpage = cifs_readpage,
+	.readpage = netfs_readpage,
 	.writepage = cifs_writepage,
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a7e7e5a97b7f..bb1c3a372de4 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -134,34 +134,3 @@  void cifs_fscache_release_inode_cookie(struct inode *inode)
 		cifsi->netfs_ctx.cache = NULL;
 	}
 }
-
-/*
- * Retrieve a page from FS-Cache
- */
-int __cifs_readpage_from_fscache(struct inode *inode, struct page *page)
-{
-	cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n",
-		 __func__, cifs_inode_cookie(inode), page, inode);
-	return -ENOBUFS; // Needs conversion to using netfslib
-}
-
-/*
- * Retrieve a set of pages from FS-Cache
- */
-int __cifs_readpages_from_fscache(struct inode *inode,
-				struct address_space *mapping,
-				struct list_head *pages,
-				unsigned *nr_pages)
-{
-	cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n",
-		 __func__, cifs_inode_cookie(inode), *nr_pages, inode);
-	return -ENOBUFS; // Needs conversion to using netfslib
-}
-
-void __cifs_readpage_to_fscache(struct inode *inode, struct page *page)
-{
-	cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n",
-		 __func__, cifs_inode_cookie(inode), page, inode);
-
-	// Needs conversion to using netfslib
-}
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 9f6e42e85d14..fdc03cd7b881 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -58,14 +58,6 @@  void cifs_fscache_fill_coherency(struct inode *inode,
 }
 
 
-extern int cifs_fscache_release_page(struct page *page, gfp_t gfp);
-extern int __cifs_readpage_from_fscache(struct inode *, struct page *);
-extern int __cifs_readpages_from_fscache(struct inode *,
-					 struct address_space *,
-					 struct list_head *,
-					 unsigned *);
-extern void __cifs_readpage_to_fscache(struct inode *, struct page *);
-
 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
 {
 	return netfs_i_cookie(inode);
@@ -80,33 +72,6 @@  static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags
 			   i_size_read(inode), flags);
 }
 
-static inline int cifs_readpage_from_fscache(struct inode *inode,
-					     struct page *page)
-{
-	if (cifs_inode_cookie(inode))
-		return __cifs_readpage_from_fscache(inode, page);
-
-	return -ENOBUFS;
-}
-
-static inline int cifs_readpages_from_fscache(struct inode *inode,
-					      struct address_space *mapping,
-					      struct list_head *pages,
-					      unsigned *nr_pages)
-{
-	if (cifs_inode_cookie(inode))
-		return __cifs_readpages_from_fscache(inode, mapping, pages,
-						     nr_pages);
-	return -ENOBUFS;
-}
-
-static inline void cifs_readpage_to_fscache(struct inode *inode,
-					    struct page *page)
-{
-	if (PageFsCache(page))
-		__cifs_readpage_to_fscache(inode, page);
-}
-
 #else /* CONFIG_CIFS_FSCACHE */
 static inline
 void cifs_fscache_fill_coherency(struct inode *inode,
@@ -123,23 +88,6 @@  static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool upd
 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; }
 static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {}
 
-static inline int
-cifs_readpage_from_fscache(struct inode *inode, struct page *page)
-{
-	return -ENOBUFS;
-}
-
-static inline int cifs_readpages_from_fscache(struct inode *inode,
-					      struct address_space *mapping,
-					      struct list_head *pages,
-					      unsigned *nr_pages)
-{
-	return -ENOBUFS;
-}
-
-static inline void cifs_readpage_to_fscache(struct inode *inode,
-			struct page *page) {}
-
 #endif /* CONFIG_CIFS_FSCACHE */
 
 #endif /* _CIFS_FSCACHE_H */
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 7d8b3ceb2af3..b6a9ded9fbb2 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -26,6 +26,19 @@ 
 #include "fs_context.h"
 #include "cifs_ioctl.h"
 
+/*
+ * Set parameters for the netfs library
+ */
+static void cifs_set_netfs_context(struct inode *inode)
+{
+	struct netfs_i_context *ctx = netfs_i_context(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	netfs_i_context_init(inode, &cifs_req_ops);
+	ctx->rsize = cifs_sb->ctx->rsize;
+	ctx->wsize = cifs_sb->ctx->wsize;
+}
+
 static void cifs_set_ops(struct inode *inode)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -209,8 +222,10 @@  cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
 
 	if (fattr->cf_flags & CIFS_FATTR_DFS_REFERRAL)
 		inode->i_flags |= S_AUTOMOUNT;
-	if (inode->i_state & I_NEW)
+	if (inode->i_state & I_NEW) {
+		cifs_set_netfs_context(inode);
 		cifs_set_ops(inode);
+	}
 	return 0;
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index ebbea7526ee2..0d76cffb4e75 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -23,6 +23,7 @@ 
 #include <linux/uuid.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
+#include <linux/netfs.h>
 #include "cifsglob.h"
 #include "cifsacl.h"
 #include "cifsproto.h"
@@ -4185,7 +4186,19 @@  smb2_readv_callback(struct mid_q_entry *mid)
 				     tcon->tid, tcon->ses->Suid,
 				     rdata->offset, rdata->got_bytes);
 
-	queue_work(cifsiod_wq, &rdata->work);
+	if (rdata->result == -ENODATA) {
+		/* We may have got an EOF error because fallocate
+		 * failed to enlarge the file.
+		 */
+		if (rdata->subreq->start < rdata->subreq->rreq->i_size)
+			rdata->result = 0;
+	}
+	if (rdata->result == 0 || rdata->result == -EAGAIN)
+		iov_iter_advance(&rdata->subreq->iter, rdata->got_bytes);
+	netfs_subreq_terminated(rdata->subreq,
+				(rdata->result == 0 || rdata->result == -EAGAIN) ?
+				rdata->got_bytes : rdata->result, false);
+	kref_put(&rdata->refcount, cifs_readdata_release);
 	DeleteMidQEntry(mid);
 	add_credits(server, &credits, 0);
 }
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index df13c9b22ca8..1fa242140dc4 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -553,8 +553,13 @@  static void netfs_rreq_assess_dio(struct netfs_read_request *rreq)
 	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
 		if (subreq->error || subreq->transferred == 0)
 			break;
-		for (i = 0; i < subreq->bv_count; i++)
+		for (i = 0; i < subreq->bv_count; i++) {
 			flush_dcache_page(subreq->bv[i].bv_page);
+			// TODO: cifs marks pages in the destination buffer
+			// dirty under some circumstances after a read.  Do we
+			// need to do that too?
+			set_page_dirty(subreq->bv[i].bv_page);
+		}
 		transferred += subreq->transferred;
 		if (subreq->transferred < subreq->len)
 			break;