Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2216349/?format=api
{ "id": 2216349, "url": "http://patchwork.ozlabs.org/api/patches/2216349/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-cifs-client/patch/20260326104544.509518-9-dhowells@redhat.com/", "project": { "id": 12, "url": "http://patchwork.ozlabs.org/api/projects/12/?format=api", "name": "Linux CIFS Client", "link_name": "linux-cifs-client", "list_id": "linux-cifs.vger.kernel.org", "list_email": "linux-cifs@vger.kernel.org", "web_url": "", "scm_url": "", "webscm_url": "", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260326104544.509518-9-dhowells@redhat.com>", "list_archive_url": null, "date": "2026-03-26T10:45:23", "name": "[08/26] cachefiles: Don't rely on backing fs storage map for most use cases", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "223b2c10d3040cae767d1f4b27a8320422f0cf76", "submitter": { "id": 59, "url": "http://patchwork.ozlabs.org/api/people/59/?format=api", "name": "David Howells", "email": "dhowells@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linux-cifs-client/patch/20260326104544.509518-9-dhowells@redhat.com/mbox/", "series": [ { "id": 497565, "url": "http://patchwork.ozlabs.org/api/series/497565/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-cifs-client/list/?series=497565", "date": "2026-03-26T10:45:15", "name": "netfs: Keep track of folios in a segmented bio_vec[] chain", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/497565/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2216349/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2216349/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <linux-cifs+bounces-10531-incoming=patchwork.ozlabs.org@vger.kernel.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linux-cifs@vger.kernel.org" ], "Delivered-To": "patchwork-incoming@legolas.ozlabs.org", "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=TgaBtYYZ;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=2600:3c0a:e001:db::12fc:5321; helo=sea.lore.kernel.org;\n envelope-from=linux-cifs+bounces-10531-incoming=patchwork.ozlabs.org@vger.kernel.org;\n receiver=patchwork.ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com\n header.b=\"TgaBtYYZ\"", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=170.10.133.124", "smtp.subspace.kernel.org;\n dmarc=pass (p=quarantine dis=none) header.from=redhat.com", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=redhat.com" ], "Received": [ "from sea.lore.kernel.org (sea.lore.kernel.org\n [IPv6:2600:3c0a:e001:db::12fc:5321])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fhLPr11rHz1y1G\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 26 Mar 2026 21:59:28 +1100 (AEDT)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby sea.lore.kernel.org (Postfix) with ESMTP id EC06E3164A3E\n\tfor <incoming@patchwork.ozlabs.org>; Thu, 26 Mar 2026 10:49:56 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 432EE346E70;\n\tThu, 26 Mar 2026 10:47:31 +0000 (UTC)", "from us-smtp-delivery-124.mimecast.com\n (us-smtp-delivery-124.mimecast.com [170.10.133.124])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id DE74F2F4A14\n\tfor <linux-cifs@vger.kernel.org>; Thu, 26 Mar 2026 10:47:27 +0000 (UTC)", "from mx-prod-mc-06.mail-002.prod.us-west-2.aws.redhat.com\n (ec2-35-165-154-97.us-west-2.compute.amazonaws.com [35.165.154.97]) by\n relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3,\n cipher=TLS_AES_256_GCM_SHA384) id us-mta-658-2RDWs7cwP920WiGnWDa5Tg-1; Thu,\n 26 Mar 2026 06:47:20 -0400", "from mx-prod-int-06.mail-002.prod.us-west-2.aws.redhat.com\n (mx-prod-int-06.mail-002.prod.us-west-2.aws.redhat.com [10.30.177.93])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n\t(No client certificate requested)\n\tby mx-prod-mc-06.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTPS\n id CF695180060D;\n\tThu, 26 Mar 2026 10:47:17 +0000 (UTC)", "from warthog.procyon.org.com (unknown [10.44.33.121])\n\tby mx-prod-int-06.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTP\n id 095E31800673;\n\tThu, 26 Mar 2026 10:47:10 +0000 (UTC)" ], "ARC-Seal": "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1774522051; cv=none;\n b=Ceq8iN/fxEezN1V7A0oLgJKE9YNz3y0SxKF9snAyfrGq6/zIUmhqzTjR07bnbstrBX7EOuMrjX4/rGQlFYAOPH5VUDH6bDnnI+9oz51AploRXFT94UqmiwqDjMBxUA/Sv7gw5K+6pV4m04OQxykWWt4M2ULTc9y4ipfggZX3ycs=", "ARC-Message-Signature": "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1774522051; c=relaxed/simple;\n\tbh=mSttHELtORA3TF+uaSywZfpmCBiron6A6GzGwHAVu9c=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=I9UZX27AlQuuZIpFzhTzjQs4FoPhnLpdAptfqe+KlcI3Gvp0LBfGZ/bPrm5AXfyKrkmmTEiz0pWerm9hOxrf4/+YBN5/cxkuQJiizG8vGtPYzBvBq6G7fRfG6BEu2T03yiUPUQ4PTnIqONf/5ApUR9ke7ivzI/CgDPjRmaOgeGw=", "ARC-Authentication-Results": "i=1; smtp.subspace.kernel.org;\n dmarc=pass (p=quarantine dis=none) header.from=redhat.com;\n spf=pass smtp.mailfrom=redhat.com;\n dkim=pass (1024-bit key) header.d=redhat.com header.i=@redhat.com\n header.b=TgaBtYYZ; arc=none smtp.client-ip=170.10.133.124", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n\ts=mimecast20190719; t=1774522047;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\t to:to:cc:cc:mime-version:mime-version:\n\t content-transfer-encoding:content-transfer-encoding:\n\t in-reply-to:in-reply-to:references:references;\n\tbh=wLthSvn8qvWjKJAvzgNmkKxDSR01zA8txUKhM97kKV8=;\n\tb=TgaBtYYZ6NnROKdqfm2iBwLYyse8fUcwp7r5g3RQrkoqYoe6ItYYp26NGPmZcaIjxYW11R\n\tv9FHZ53DTAU4CDUGSEmaZm/Gx/dlbJiHqSrLPvaVonmUfUC/l11dQfiTE6wXp0Io5A/9vm\n\tMIW6cth2HVTNbuO8UCXNnAGM4X1scYM=", "X-MC-Unique": "2RDWs7cwP920WiGnWDa5Tg-1", "X-Mimecast-MFC-AGG-ID": "2RDWs7cwP920WiGnWDa5Tg_1774522038", "From": "David Howells <dhowells@redhat.com>", "To": "Christian Brauner <christian@brauner.io>,\n\tMatthew Wilcox <willy@infradead.org>,\n\tChristoph Hellwig <hch@infradead.org>", "Cc": "David Howells <dhowells@redhat.com>,\n\tPaulo Alcantara <pc@manguebit.com>,\n\tJens Axboe <axboe@kernel.dk>,\n\tLeon Romanovsky <leon@kernel.org>,\n\tSteve French <sfrench@samba.org>,\n\tChenXiaoSong <chenxiaosong@chenxiaosong.com>,\n\tMarc Dionne <marc.dionne@auristor.com>,\n\tEric Van Hensbergen <ericvh@kernel.org>,\n\tDominique Martinet <asmadeus@codewreck.org>,\n\tIlya Dryomov <idryomov@gmail.com>,\n\tTrond Myklebust <trondmy@kernel.org>,\n\tnetfs@lists.linux.dev,\n\tlinux-afs@lists.infradead.org,\n\tlinux-cifs@vger.kernel.org,\n\tlinux-nfs@vger.kernel.org,\n\tceph-devel@vger.kernel.org,\n\tv9fs@lists.linux.dev,\n\tlinux-erofs@lists.ozlabs.org,\n\tlinux-fsdevel@vger.kernel.org,\n\tlinux-kernel@vger.kernel.org,\n\tPaulo Alcantara <pc@manguebit.org>", "Subject": "[PATCH 08/26] cachefiles: Don't rely on backing fs storage map for\n most use cases", "Date": "Thu, 26 Mar 2026 10:45:23 +0000", "Message-ID": "<20260326104544.509518-9-dhowells@redhat.com>", "In-Reply-To": "<20260326104544.509518-1-dhowells@redhat.com>", "References": "<20260326104544.509518-1-dhowells@redhat.com>", "Precedence": "bulk", "X-Mailing-List": "linux-cifs@vger.kernel.org", "List-Id": "<linux-cifs.vger.kernel.org>", "List-Subscribe": "<mailto:linux-cifs+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:linux-cifs+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-Scanned-By": "MIMEDefang 3.4.1 on 10.30.177.93" }, "content": "Cachefiles currently uses the backing filesystem's idea of what data is\nheld in a backing file and queries this by means of SEEK_DATA and\nSEEK_HOLE. However, this means it does two seek operations on the backing\nfile for each individual read call it wants to prepare (unless the first\nreturns -ENXIO). Worse, the backing filesystem is at liberty to insert or\nremove blocks of zeros in order to optimise its layout which may cause\nfalse positives and false negatives.\n\nThe problem is that keeping track of what is dirty is tricky (if storing\ninfo in xattrs, which may have limited capacity and must be read and\nwritten as one piece) and expensive (in terms of diskspace at least) and is\nbasically duplicating what a filesystem does.\n\nHowever, the most common write case, in which the application does {\nopen(O_TRUNC); write(); write(); ... write(); close(); } where each write\nfollows directly on from the previous and leaves no gaps in the file is\nreasonably easy to detect and can be noted in the primary xattr as\nCACHEFILES_CONTENT_ALL, indicating we have everything up to the object size\nstored.\n\nIn this specific case, given that it is known that there are no holes in\nthe file, there's no need to call SEEK_DATA/HOLE or use any other mechanism\nto track the contents. That speeds things up enormously.\n\nEven when it is necessary to use SEEK_DATA/HOLE, it may not be necessary to\ncall it for each cache read subrequest generated.\n\nImplement this by adding support for the CACHEFILES_CONTENT_ALL content\ntype (which is defined, but currently unused), which requires a slight\nadjustment in how backing files are managed. Specifically, the driver\nneeds to know how much of the tail block is data and whether storing more\ndata will create a hole.\n\nTo this end, the way that the size of a backing file is managed is changed.\nCurrently, the backing file is expanded to strictly match the size of the\nnetwork file, but this can be changed to carry more useful information.\nThis makes two pieces of metadata available: xattr.object_size and the\nbacking file's i_size. Apply the following schema:\n\n (a) i_size is always a multiple of the DIO block size.\n\n (b) i_size is only updated to the end of the highest write stored. This\n is used to work out if we are following on without leaving a hole.\n\n (c) xattr.object_size is the size of the network filesystem file cached\n in this backing file.\n\n (d) xattr.object_size must point after the start of the last block\n (unless both are 0).\n\n (e) If xattr.object_size is at or after the block at the current end of\n the backing file (ie. i_size), then we have all the contents of the\n block (if xattr.content == CACHEFILES_CONTENT_ALL).\n\n (f) If xattr.object_size is somewhere in the middle of the last block,\n then the data following it is invalid and must be ignored.\n\n (g) If data is added to the last block, then that block must be fetched,\n modified and rewritten (it must be a buffered write through the\n pagecache and not DIO).\n\n (h) Writes to cache are rounded out to blocks on both sides and the\n folios used as sources must contain data for any lower gap and must\n have been cleared for any upper gap, and so will rewrite any\n non-data area in the tail block.\n\nTo implement this, the following changes are made:\n\n (1) cookie->object_size is no longer updated when writes are copied into\n the pagecache, but rather only updated when a write request completes.\n\n This prevents object size miscomparison when checking the xattr\n causing the backing file to be invalidated (opening and marking the\n backing file and modifying the pagecache run in parallel).\n\n (2) The cache's current idea of the amount of data that should be stored\n in the backing file is kept track of in object->object_size.\n\n Possibly this is redundant with cookie->object_size, but the latter\n gets updated in some addition circumstances.\n\n (3) The size of the backing file at the start of a request is now tracked\n in struct netfs_cache_resources so that the partial EOF block can be\n located and cleaned.\n\n (4) The cache block size is now used consistently rather than using\n CACHEFILES_DIO_BLOCK_SIZE (4096).\n\n (5) The backing file size is no longer adjusted when looking up an object.\n\n (6) When shortening a file, if the new size is not block aligned, the part\n beyond the new size is cleared. If the file is truncated to zero, the\n content_info gets reset to CACHEFILES_CONTENT_NO_DATA.\n\n (7) A new struct, fscache_occupancy, is instituted to track the region\n being read. Netfslib allocates it and fills in the start and end of\n the region to be read then calls the ->query_occupancy() method to\n find and fill in the extents. It also indicates whether a recorded\n extent contains data or just contains a region that's all zeros\n (FSCACHE_EXTENT_DATA or FSCACHE_EXTENT_ZERO).\n\n (8) The ->prepare_read() cache method is changed such that, if given, it\n just limits the amount that can be read from the cache in one go. It\n no longer indicates what source of read should be done; that\n information is now obtained from ->query_occupancy().\n\n (9) A new cache method, ->collect_write(), is added that is called when a\n contiguous series of writes have completed and a discontiguity or the\n end of the request has been hit. It it supplied with the start and\n length of the write made to the backing file and can use this\n information to update the cache metadata.\n\n(10) cachefiles_query_occupancy() is altered to find the next two \"extents\"\n of data stored in the backing file by doing SEEK_DATA/HOLE between the\n bounds set - unless it is known that there are no holes, in which case\n a whole-file first extent can be set.\n\n(11) cachefiles_collect_write() is implemented to take the collated write\n completion information and use this to update the cache metadata, in\n particular working out whether there's now a hole in the backing file\n requiring future use of SEEK_DATA/HOLE instead of just assuming the\n data is all present.\n\n It also uses fallocate(FALLOC_FL_ZERO_RANGE) to clean the part of a\n partial block that extended beyond the old object size. It might be\n better to perform a synchronous DIO write for this purpose, but that\n would mandate an RMW cycle. Ideally, it should be all zeros anyway,\n but, unfortunately, shared-writable mmap can interfere.\n\n(12) cachefiles_begin_operation() is updated to note the current backing\n file size and the cache DIO size.\n\n(13) cachefiles_create_tmpfile() no longer expands the backing file when it\n creates it.\n\n(14) cachefiles_set_object_xattr() is changed to use object->object_size\n rather than cookie->object_size.\n\n(15) cachefiles_check_auxdata() is altered to actually store the content\n type and to also set object->object_size. The cachefiles_coherency\n tracepoint is also modified to display xattr.object_size.\n\n(16) netfs_read_to_pagecache() is reworked. The cache ->prepare_read()\n method is replaced with ->query_occupancy() as the arbiter of what\n region of the file is read from where, and that retrieves up to two\n occupied extents of the backing file at once.\n\n The cache ->prepare_read() method is now repurposed to be the same as\n the equivalent network filesystem method and allows the cache to limit\n the size of the read before the iterator is prepared.\n\n netfs_single_dispatch_read() is similarly modified.\n\n(17) netfs_update_i_size() and afs_update_i_size() no longer call\n fscache_update_cookie() to update cookie->object_size.\n\n(18) Write collection now collates contiguous sequences of writes to the\n cache and calls the cache ->collect_write() method.\n\nSigned-off-by: David Howells <dhowells@redhat.com>\ncc: Paulo Alcantara <pc@manguebit.org>\ncc: Matthew Wilcox <willy@infradead.org>\ncc: Christoph Hellwig <hch@infradead.org>\ncc: linux-cifs@vger.kernel.org\ncc: netfs@lists.linux.dev\ncc: linux-fsdevel@vger.kernel.org\n---\n fs/afs/file.c | 1 -\n fs/cachefiles/interface.c | 82 ++-------\n fs/cachefiles/internal.h | 10 +-\n fs/cachefiles/io.c | 265 +++++++++++++++++++++++-------\n fs/cachefiles/namei.c | 19 +--\n fs/cachefiles/xattr.c | 20 ++-\n fs/netfs/buffered_read.c | 185 +++++++++++++--------\n fs/netfs/buffered_write.c | 3 -\n fs/netfs/internal.h | 2 +\n fs/netfs/read_single.c | 39 +++--\n fs/netfs/write_collect.c | 49 +++++-\n fs/netfs/write_issue.c | 3 +\n include/linux/fscache.h | 17 ++\n include/linux/netfs.h | 16 +-\n include/trace/events/cachefiles.h | 15 +-\n 15 files changed, 466 insertions(+), 260 deletions(-)", "diff": "diff --git a/fs/afs/file.c b/fs/afs/file.c\nindex f609366fd2ac..424e0c98d67f 100644\n--- a/fs/afs/file.c\n+++ b/fs/afs/file.c\n@@ -436,7 +436,6 @@ static void afs_update_i_size(struct inode *inode, loff_t new_i_size)\n \t\tinode_set_bytes(&vnode->netfs.inode, new_i_size);\n \t}\n \twrite_sequnlock(&vnode->cb_lock);\n-\tfscache_update_cookie(afs_vnode_cache(vnode), NULL, &new_i_size);\n }\n \n static void afs_netfs_invalidate_cache(struct netfs_io_request *wreq)\ndiff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c\nindex a08250d244ea..736bfcaa4e1d 100644\n--- a/fs/cachefiles/interface.c\n+++ b/fs/cachefiles/interface.c\n@@ -105,73 +105,6 @@ void cachefiles_put_object(struct cachefiles_object *object,\n \t_leave(\"\");\n }\n \n-/*\n- * Adjust the size of a cache file if necessary to match the DIO size. We keep\n- * the EOF marker a multiple of DIO blocks so that we don't fall back to doing\n- * non-DIO for a partial block straddling the EOF, but we also have to be\n- * careful of someone expanding the file and accidentally accreting the\n- * padding.\n- */\n-static int cachefiles_adjust_size(struct cachefiles_object *object)\n-{\n-\tstruct iattr newattrs;\n-\tstruct file *file = object->file;\n-\tuint64_t ni_size;\n-\tloff_t oi_size;\n-\tint ret;\n-\n-\tni_size = object->cookie->object_size;\n-\tni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);\n-\n-\t_enter(\"{OBJ%x},[%llu]\",\n-\t object->debug_id, (unsigned long long) ni_size);\n-\n-\tif (!file)\n-\t\treturn -ENOBUFS;\n-\n-\toi_size = i_size_read(file_inode(file));\n-\tif (oi_size == ni_size)\n-\t\treturn 0;\n-\n-\tinode_lock(file_inode(file));\n-\n-\t/* if there's an extension to a partial page at the end of the backing\n-\t * file, we need to discard the partial page so that we pick up new\n-\t * data after it */\n-\tif (oi_size & ~PAGE_MASK && ni_size > oi_size) {\n-\t\t_debug(\"discard tail %llx\", oi_size);\n-\t\tnewattrs.ia_valid = ATTR_SIZE;\n-\t\tnewattrs.ia_size = oi_size & PAGE_MASK;\n-\t\tret = cachefiles_inject_remove_error();\n-\t\tif (ret == 0)\n-\t\t\tret = notify_change(&nop_mnt_idmap, file->f_path.dentry,\n-\t\t\t\t\t &newattrs, NULL);\n-\t\tif (ret < 0)\n-\t\t\tgoto truncate_failed;\n-\t}\n-\n-\tnewattrs.ia_valid = ATTR_SIZE;\n-\tnewattrs.ia_size = ni_size;\n-\tret = cachefiles_inject_write_error();\n-\tif (ret == 0)\n-\t\tret = notify_change(&nop_mnt_idmap, file->f_path.dentry,\n-\t\t\t\t &newattrs, NULL);\n-\n-truncate_failed:\n-\tinode_unlock(file_inode(file));\n-\n-\tif (ret < 0)\n-\t\ttrace_cachefiles_io_error(NULL, file_inode(file), ret,\n-\t\t\t\t\t cachefiles_trace_notify_change_error);\n-\tif (ret == -EIO) {\n-\t\tcachefiles_io_error_obj(object, \"Size set failed\");\n-\t\tret = -ENOBUFS;\n-\t}\n-\n-\t_leave(\" = %d\", ret);\n-\treturn ret;\n-}\n-\n /*\n * Attempt to look up the nominated node in this cache\n */\n@@ -204,7 +137,6 @@ static bool cachefiles_lookup_cookie(struct fscache_cookie *cookie)\n \tspin_lock(&cache->object_list_lock);\n \tlist_add(&object->cache_link, &cache->object_list);\n \tspin_unlock(&cache->object_list_lock);\n-\tcachefiles_adjust_size(object);\n \n \tcachefiles_end_secure(cache, saved_cred);\n \t_leave(\" = t\");\n@@ -238,7 +170,7 @@ static bool cachefiles_shorten_object(struct cachefiles_object *object,\n \tloff_t i_size, dio_size;\n \tint ret;\n \n-\tdio_size = round_up(new_size, CACHEFILES_DIO_BLOCK_SIZE);\n+\tdio_size = round_up(new_size, cache->bsize);\n \ti_size = i_size_read(inode);\n \n \ttrace_cachefiles_trunc(object, inode, i_size, dio_size,\n@@ -270,6 +202,7 @@ static bool cachefiles_shorten_object(struct cachefiles_object *object,\n \t\t}\n \t}\n \n+\tobject->object_size = new_size;\n \treturn true;\n }\n \n@@ -284,15 +217,20 @@ static void cachefiles_resize_cookie(struct netfs_cache_resources *cres,\n \tstruct fscache_cookie *cookie = object->cookie;\n \tconst struct cred *saved_cred;\n \tstruct file *file = cachefiles_cres_file(cres);\n-\tloff_t old_size = cookie->object_size;\n+\tunsigned long long i_size = i_size_read(file_inode(file));\n \n-\t_enter(\"%llu->%llu\", old_size, new_size);\n+\t_enter(\"%llu->%llu\", i_size, new_size);\n \n-\tif (new_size < old_size) {\n+\tif (new_size < i_size) {\n+\t\t/* The file is being shrunk - we need to downsize the backing\n+\t\t * file and clear the end of the final block.\n+\t\t */\n \t\tcachefiles_begin_secure(cache, &saved_cred);\n \t\tcachefiles_shorten_object(object, file, new_size);\n \t\tcachefiles_end_secure(cache, saved_cred);\n \t\tobject->cookie->object_size = new_size;\n+\t\tif (new_size == 0)\n+\t\t\tobject->content_info = CACHEFILES_CONTENT_NO_DATA;\n \t\treturn;\n \t}\n \ndiff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h\nindex b62cd3e9a18e..00482a13fc48 100644\n--- a/fs/cachefiles/internal.h\n+++ b/fs/cachefiles/internal.h\n@@ -18,8 +18,6 @@\n #include <linux/xarray.h>\n #include <linux/cachefiles.h>\n \n-#define CACHEFILES_DIO_BLOCK_SIZE 4096\n-\n struct cachefiles_cache;\n struct cachefiles_object;\n \n@@ -68,12 +66,16 @@ struct cachefiles_object {\n \tstruct list_head\t\tcache_link;\t/* Link in cache->*_list */\n \tstruct file\t\t\t*file;\t\t/* The file representing this object */\n \tchar\t\t\t\t*d_name;\t/* Backing file name */\n+\tunsigned long\t\t\tflags;\n+#define CACHEFILES_OBJECT_USING_TMPFILE\t0\t\t/* Have an unlinked tmpfile */\n+\tunsigned long long\t\tobject_size;\t/* Size of the object stored\n+\t\t\t\t\t\t\t * (independent of cookie->object_size for\n+\t\t\t\t\t\t\t * coherency reasons)\n+\t\t\t\t\t\t\t */\n \tint\t\t\t\tdebug_id;\n \tspinlock_t\t\t\tlock;\n \trefcount_t\t\t\tref;\n \tenum cachefiles_content\t\tcontent_info:8;\t/* Info about content presence */\n-\tunsigned long\t\t\tflags;\n-#define CACHEFILES_OBJECT_USING_TMPFILE\t0\t\t/* Have an unlinked tmpfile */\n #ifdef CONFIG_CACHEFILES_ONDEMAND\n \tstruct cachefiles_ondemand_info\t*ondemand;\n #endif\ndiff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c\nindex eaf47851c65f..b5ff75697b3e 100644\n--- a/fs/cachefiles/io.c\n+++ b/fs/cachefiles/io.c\n@@ -32,6 +32,8 @@ struct cachefiles_kiocb {\n \tu64\t\t\tb_writing;\n };\n \n+#define IS_ERR_VALUE_LL(x) unlikely((x) >= (unsigned long long)-MAX_ERRNO)\n+\n static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)\n {\n \tif (refcount_dec_and_test(&ki->ki_refcnt)) {\n@@ -193,60 +195,81 @@ static int cachefiles_read(struct netfs_cache_resources *cres,\n }\n \n /*\n- * Query the occupancy of the cache in a region, returning where the next chunk\n- * of data starts and how long it is.\n+ * Query the occupancy of the cache in a region, returning the extent of the\n+ * next two chunks of cached data and the next hole.\n */\n static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,\n-\t\t\t\t loff_t start, size_t len, size_t granularity,\n-\t\t\t\t loff_t *_data_start, size_t *_data_len)\n+\t\t\t\t struct fscache_occupancy *occ)\n {\n \tstruct cachefiles_object *object;\n+\tstruct inode *inode;\n \tstruct file *file;\n-\tloff_t off, off2;\n-\n-\t*_data_start = -1;\n-\t*_data_len = 0;\n+\tunsigned long long i_size;\n+\tloff_t ret;\n+\tint i;\n \n \tif (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))\n \t\treturn -ENOBUFS;\n \n \tobject = cachefiles_cres_object(cres);\n \tfile = cachefiles_cres_file(cres);\n-\tgranularity = max_t(size_t, object->volume->cache->bsize, granularity);\n+\tinode = file_inode(file);\n+\tocc->granularity = object->volume->cache->bsize;\n+\n+\t_enter(\"%pD,%li,%llx-%llx/%llx\",\n+\t file, inode->i_ino, occ->query_from, occ->query_to,\n+\t i_size_read(inode));\n+\n+\tif (i_size_read(inode) == 0)\n+\t\tgoto done;\n+\n+\tswitch (object->content_info) {\n+\tcase CACHEFILES_CONTENT_ALL:\n+\tcase CACHEFILES_CONTENT_SINGLE:\n+\t\ti_size = i_size_read(inode);\n+\t\tif (i_size > occ->query_from) {\n+\t\t\tocc->cached_from[0] = 0;\n+\t\t\tocc->cached_to[0] = i_size;\n+\t\t\tocc->cached_type[0] = FSCACHE_EXTENT_DATA;\n+\t\t\tocc->query_from = ULLONG_MAX;\n+\t\t}\n+\t\tgoto done;\n+\tdefault:\n+\t\tbreak;\n+\t}\n \n-\t_enter(\"%pD,%li,%llx,%zx/%llx\",\n-\t file, file_inode(file)->i_ino, start, len,\n-\t i_size_read(file_inode(file)));\n+\tfor (i = 0; i < ARRAY_SIZE(occ->cached_from); i++) {\n+\t\tret = cachefiles_inject_read_error();\n+\t\tif (ret == 0)\n+\t\t\tret = file->f_op->llseek(file, occ->query_from, SEEK_DATA);\n+\t\tif (IS_ERR_VALUE_LL(ret)) {\n+\t\t\tif (ret != -ENXIO)\n+\t\t\t\treturn ret;\n+\t\t\tocc->query_from = ULLONG_MAX;\n+\t\t\tgoto done;\n+\t\t}\n+\t\tocc->cached_type[i] = FSCACHE_EXTENT_DATA;\n+\t\tocc->cached_from[i] = ret;\n+\t\tocc->query_from = ret;\n+\n+\t\tret = cachefiles_inject_read_error();\n+\t\tif (ret == 0)\n+\t\t\tret = file->f_op->llseek(file, occ->query_from, SEEK_HOLE);\n+\t\tif (IS_ERR_VALUE_LL(ret)) {\n+\t\t\tif (ret != -ENXIO)\n+\t\t\t\treturn ret;\n+\t\t\tocc->query_from = ULLONG_MAX;\n+\t\t\tgoto done;\n+\t\t}\n+\t\tocc->cached_to[i] = ret;\n+\t\tocc->query_from = ret;\n+\t\tif (occ->query_from >= occ->query_to)\n+\t\t\tbreak;\n+\t}\n \n-\toff = cachefiles_inject_read_error();\n-\tif (off == 0)\n-\t\toff = vfs_llseek(file, start, SEEK_DATA);\n-\tif (off == -ENXIO)\n-\t\treturn -ENODATA; /* Beyond EOF */\n-\tif (off < 0 && off >= (loff_t)-MAX_ERRNO)\n-\t\treturn -ENOBUFS; /* Error. */\n-\tif (round_up(off, granularity) >= start + len)\n-\t\treturn -ENODATA; /* No data in range */\n-\n-\toff2 = cachefiles_inject_read_error();\n-\tif (off2 == 0)\n-\t\toff2 = vfs_llseek(file, off, SEEK_HOLE);\n-\tif (off2 == -ENXIO)\n-\t\treturn -ENODATA; /* Beyond EOF */\n-\tif (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)\n-\t\treturn -ENOBUFS; /* Error. */\n-\n-\t/* Round away partial blocks */\n-\toff = round_up(off, granularity);\n-\toff2 = round_down(off2, granularity);\n-\tif (off2 <= off)\n-\t\treturn -ENODATA;\n-\n-\t*_data_start = off;\n-\tif (off2 > start + len)\n-\t\t*_data_len = len;\n-\telse\n-\t\t*_data_len = off2 - off;\n+done:\n+\t_debug(\"query[0] %llx-%llx\", occ->cached_from[0], occ->cached_to[0]);\n+\t_debug(\"query[1] %llx-%llx\", occ->cached_from[1], occ->cached_to[1]);\n \treturn 0;\n }\n \n@@ -489,18 +512,6 @@ cachefiles_do_prepare_read(struct netfs_cache_resources *cres,\n \treturn ret;\n }\n \n-/*\n- * Prepare a read operation, shortening it to a cached/uncached\n- * boundary as appropriate.\n- */\n-static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,\n-\t\t\t\t\t\t unsigned long long i_size)\n-{\n-\treturn cachefiles_do_prepare_read(&subreq->rreq->cache_resources,\n-\t\t\t\t\t subreq->start, &subreq->len, i_size,\n-\t\t\t\t\t &subreq->flags, subreq->rreq->inode->i_ino);\n-}\n-\n /*\n * Prepare an on-demand read operation, shortening it to a cached/uncached\n * boundary as appropriate.\n@@ -658,9 +669,9 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)\n \t wreq->debug_id, subreq->debug_index, start, start + len - 1);\n \n \t/* We need to start on the cache granularity boundary */\n-\toff = start & (CACHEFILES_DIO_BLOCK_SIZE - 1);\n+\toff = start & (cache->bsize - 1);\n \tif (off) {\n-\t\tpre = CACHEFILES_DIO_BLOCK_SIZE - off;\n+\t\tpre = cache->bsize - off;\n \t\tif (pre >= len) {\n \t\t\tfscache_count_dio_misfit();\n \t\t\tnetfs_write_subrequest_terminated(subreq, len);\n@@ -674,8 +685,8 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)\n \n \t/* We also need to end on the cache granularity boundary */\n \tif (start + len == wreq->i_size) {\n-\t\tsize_t part = len % CACHEFILES_DIO_BLOCK_SIZE;\n-\t\tsize_t need = CACHEFILES_DIO_BLOCK_SIZE - part;\n+\t\tsize_t part = len & (cache->bsize - 1);\n+\t\tsize_t need = cache->bsize - part;\n \n \t\tif (part && stream->submit_extendable_to >= need) {\n \t\t\tlen += need;\n@@ -684,7 +695,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)\n \t\t}\n \t}\n \n-\tpost = len & (CACHEFILES_DIO_BLOCK_SIZE - 1);\n+\tpost = len & (cache->bsize - 1);\n \tif (post) {\n \t\tlen -= post;\n \t\tif (len == 0) {\n@@ -711,6 +722,134 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)\n \t\t\t netfs_write_subrequest_terminated, subreq);\n }\n \n+/*\n+ * Collect the result of buffered writeback to the cache. This includes\n+ * copying a read to the cache. Netfslib collates the results, which might\n+ * occur out of order, and delivers them to the cache so that it can update its\n+ * content record.\n+ *\n+ * The writes we made are all rounded out at both sides to the nearest DIO\n+ * block boundary, so if the final block contains the EOF in the middle of it\n+ * (rather than at the end), padding will have been written to the file. The\n+ * backing file's filesize will have been updated if the write extended the\n+ * file; the filesize may still change due to outstanding subreqs.\n+ *\n+ * The metadata in the cache file xattr records the size of the object we have\n+ * stored, but the cache file EOF only goes up to where we've cached data to\n+ * and, furthermore, is rounded up to the nearest DIO block boundary.\n+ */\n+static void cachefiles_collect_write(struct netfs_io_request *wreq,\n+\t\t\t\t unsigned long long start, size_t len)\n+{\n+\tstruct netfs_cache_resources *cres = &wreq->cache_resources;\n+\tstruct cachefiles_object *object = cachefiles_cres_object(cres);\n+\tstruct cachefiles_cache *cache = object->volume->cache;\n+\tstruct fscache_cookie *cookie = fscache_cres_cookie(cres);\n+\tstruct file *file = cachefiles_cres_file(cres);\n+\tunsigned long long old_size = cres->cache_i_size;\n+\tunsigned long long new_size = i_size_read(file_inode(file));\n+\tunsigned long long data_to = cookie->object_size;\n+\tunsigned long long end = start + len;\n+\tint ret;\n+\n+\t_enter(\"%llx,%zx,%x\", start, len, cache->bsize);\n+\n+\tif (WARN_ON(old_size\t& (cache->bsize - 1)) ||\n+\t WARN_ON(new_size\t& (cache->bsize - 1)) ||\n+\t WARN_ON(start\t& (cache->bsize - 1)) ||\n+\t WARN_ON(len\t\t& (cache->bsize - 1))) {\n+\t\ttrace_cachefiles_io_error(object, file_inode(file), -EIO,\n+\t\t\t\t\t cachefiles_trace_alignment_error);\n+\t\tcachefiles_remove_object_xattr(cache, object, file->f_path.dentry);\n+\t\treturn;\n+\t}\n+\n+\t/* Zeroth case: Single monolithic files are handled specially.\n+\t */\n+\tif (wreq->origin == NETFS_WRITEBACK_SINGLE) {\n+\t\tobject->content_info = CACHEFILES_CONTENT_SINGLE;\n+\t\tgoto update_sizes;\n+\t}\n+\n+\t/* First case: The backing file was empty. */\n+\tif (old_size == 0) {\n+\t\tif (start == 0)\n+\t\t\tobject->content_info = CACHEFILES_CONTENT_ALL;\n+\t\telse\n+\t\t\tobject->content_info = CACHEFILES_CONTENT_BACKFS_MAP;\n+\t\tgoto update_sizes;\n+\t}\n+\n+\t/* Second case: The backing file is entirely within the old object size\n+\t * and thus there can be no partial tail block to deal with in the\n+\t * cache file.\n+\t */\n+\tif (old_size <= data_to) {\n+\t\tif (start > old_size)\n+\t\t\tgoto discontiguous;\n+\t\tgoto update_sizes;\n+\t}\n+\n+\t/* Third case: The write happened entirely within the bounds of the\n+\t * current cache file's size.\n+\t */\n+\tif (end <= old_size)\n+\t\tgoto update_sizes;\n+\n+\t/* Fourth case: The write overwrote the partial tail block and extended\n+\t * the file. We only need to update the object size because netfslib\n+\t * rounds out/pads cache writes to whole disk blocks.\n+\t */\n+\tif (start < old_size)\n+\t\tgoto update_sizes;\n+\n+\t/* Fifth case: The write started from the end of the whole tail block\n+\t * and extended the file. Just extend our notion of the filesize.\n+\t */\n+\tif (start == old_size && old_size == data_to)\n+\t\tgoto update_sizes;\n+\n+\t/* Sixth case: The write continued on from the partial tail block and\n+\t * extended the file. Need to clear the gap.\n+\t */\n+\tif (start == old_size && old_size > data_to)\n+\t\tgoto clear_gap;\n+\n+discontiguous:\n+\t/* Seventh case: The write was beyond the EOF on the cache file, so now\n+\t * there's a hole in the file and we can no longer say in the metadata\n+\t * that we can assume we have it all. We may also need to clear the\n+\t * end of the partial tail block.\n+\t */\n+\t/* TODO: For the moment, we will have to use SEEK_HOLE/SEEK_DATA. */\n+\tobject->content_info = CACHEFILES_CONTENT_BACKFS_MAP;\n+\n+clear_gap:\n+\t/* We need to clear any partial padding that got jumped over. It\n+\t * *should* be all zeros, but shared-writable mmap exists...\n+\t */\n+\tif (old_size > data_to) {\n+\t\ttrace_cachefiles_trunc(object, file_inode(file), data_to, old_size,\n+\t\t\t\t cachefiles_trunc_clear_padding);\n+\t\tret = cachefiles_inject_write_error();\n+\t\tif (ret == 0)\n+\t\t\tret = vfs_fallocate(file, FALLOC_FL_ZERO_RANGE,\n+\t\t\t\t\t data_to, old_size - data_to);\n+\t\tif (ret < 0) {\n+\t\t\ttrace_cachefiles_io_error(object, file_inode(file), ret,\n+\t\t\t\t\t\t cachefiles_trace_fallocate_error);\n+\t\t\tcachefiles_io_error_obj(object, \"fallocate zero pad failed %d\", ret);\n+\t\t\tcachefiles_remove_object_xattr(cache, object, file->f_path.dentry);\n+\t\t\treturn;\n+\t\t}\n+\t}\n+\n+update_sizes:\n+\tcres->cache_i_size = umax(old_size, end);\n+\tobject->object_size = cookie->object_size;\n+\treturn;\n+}\n+\n /*\n * Clean up an operation.\n */\n@@ -728,11 +867,11 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {\n \t.read\t\t\t= cachefiles_read,\n \t.write\t\t\t= cachefiles_write,\n \t.issue_write\t\t= cachefiles_issue_write,\n-\t.prepare_read\t\t= cachefiles_prepare_read,\n \t.prepare_write\t\t= cachefiles_prepare_write,\n \t.prepare_write_subreq\t= cachefiles_prepare_write_subreq,\n \t.prepare_ondemand_read\t= cachefiles_prepare_ondemand_read,\n \t.query_occupancy\t= cachefiles_query_occupancy,\n+\t.collect_write\t\t= cachefiles_collect_write,\n };\n \n /*\n@@ -742,14 +881,18 @@ bool cachefiles_begin_operation(struct netfs_cache_resources *cres,\n \t\t\t\tenum fscache_want_state want_state)\n {\n \tstruct cachefiles_object *object = cachefiles_cres_object(cres);\n+\tstruct file *file;\n \n \tif (!cachefiles_cres_file(cres)) {\n \t\tcres->ops = &cachefiles_netfs_cache_ops;\n \t\tif (object->file) {\n \t\t\tspin_lock(&object->lock);\n-\t\t\tif (!cres->cache_priv2 && object->file)\n-\t\t\t\tcres->cache_priv2 = get_file(object->file);\n+\t\t\tfile = object->file;\n+\t\t\tif (!cres->cache_priv2 && file)\n+\t\t\t\tcres->cache_priv2 = get_file(file);\n \t\t\tspin_unlock(&object->lock);\n+\t\t\tcres->cache_i_size = i_size_read(file_inode(file));\n+\t\t\tcres->dio_size = object->volume->cache->bsize;\n \t\t}\n \t}\n \ndiff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c\nindex 20138309733f..38d730233658 100644\n--- a/fs/cachefiles/namei.c\n+++ b/fs/cachefiles/namei.c\n@@ -449,7 +449,6 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)\n \tstruct dentry *fan = volume->fanout[(u8)object->cookie->key_hash];\n \tstruct file *file;\n \tconst struct path parentpath = { .mnt = cache->mnt, .dentry = fan };\n-\tuint64_t ni_size;\n \tlong ret;\n \n \n@@ -481,23 +480,6 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)\n \tif (ret < 0)\n \t\tgoto err_unuse;\n \n-\tni_size = object->cookie->object_size;\n-\tni_size = round_up(ni_size, CACHEFILES_DIO_BLOCK_SIZE);\n-\n-\tif (ni_size > 0) {\n-\t\ttrace_cachefiles_trunc(object, file_inode(file), 0, ni_size,\n-\t\t\t\t cachefiles_trunc_expand_tmpfile);\n-\t\tret = cachefiles_inject_write_error();\n-\t\tif (ret == 0)\n-\t\t\tret = vfs_truncate(&file->f_path, ni_size);\n-\t\tif (ret < 0) {\n-\t\t\ttrace_cachefiles_vfs_error(\n-\t\t\t\tobject, file_inode(file), ret,\n-\t\t\t\tcachefiles_trace_trunc_error);\n-\t\t\tgoto err_unuse;\n-\t\t}\n-\t}\n-\n \tret = -EINVAL;\n \tif (unlikely(!file->f_op->read_iter) ||\n \t unlikely(!file->f_op->write_iter)) {\n@@ -507,6 +489,7 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)\n \t}\n out:\n \tcachefiles_end_secure(cache, saved_cred);\n+\tobject->content_info = CACHEFILES_CONTENT_ALL;\n \treturn file;\n \n err_unuse:\ndiff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c\nindex 52383b1d0ba6..27f969c41eef 100644\n--- a/fs/cachefiles/xattr.c\n+++ b/fs/cachefiles/xattr.c\n@@ -54,7 +54,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)\n \tif (!buf)\n \t\treturn -ENOMEM;\n \n-\tbuf->object_size\t= cpu_to_be64(object->cookie->object_size);\n+\tbuf->object_size\t= cpu_to_be64(object->object_size);\n \tbuf->zero_point\t\t= 0;\n \tbuf->type\t\t= CACHEFILES_COOKIE_TYPE_DATA;\n \tbuf->content\t\t= object->content_info;\n@@ -77,6 +77,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)\n \t\ttrace_cachefiles_vfs_error(object, file_inode(file), ret,\n \t\t\t\t\t cachefiles_trace_setxattr_error);\n \t\ttrace_cachefiles_coherency(object, file_inode(file)->i_ino,\n+\t\t\t\t\t object->object_size,\n \t\t\t\t\t be64_to_cpup((__be64 *)buf->data),\n \t\t\t\t\t buf->content,\n \t\t\t\t\t cachefiles_coherency_set_fail);\n@@ -86,6 +87,7 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object)\n \t\t\t\t\"Failed to set xattr with error %d\", ret);\n \t} else {\n \t\ttrace_cachefiles_coherency(object, file_inode(file)->i_ino,\n+\t\t\t\t\t object->object_size,\n \t\t\t\t\t be64_to_cpup((__be64 *)buf->data),\n \t\t\t\t\t buf->content,\n \t\t\t\t\t cachefiles_coherency_set_ok);\n@@ -106,6 +108,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file\n \tunsigned int len = object->cookie->aux_len, tlen;\n \tconst void *p = fscache_get_aux(object->cookie);\n \tenum cachefiles_coherency_trace why;\n+\tunsigned long long obj_size;\n \tssize_t xlen;\n \tint ret = -ESTALE;\n \n@@ -127,29 +130,33 @@ int cachefiles_check_auxdata(struct cachefiles_object *object, struct file *file\n \t\t\tcachefiles_io_error_obj(\n \t\t\t\tobject,\n \t\t\t\t\"Failed to read aux with error %zd\", xlen);\n-\t\twhy = cachefiles_coherency_check_xattr;\n+\t\ttrace_cachefiles_coherency(object, file_inode(file)->i_ino, 0, 0, 0,\n+\t\t\t\t\t cachefiles_coherency_check_xattr);\n \t\tgoto out;\n \t}\n \n+\tobj_size = be64_to_cpu(buf->object_size);\n \tif (buf->type != CACHEFILES_COOKIE_TYPE_DATA) {\n \t\twhy = cachefiles_coherency_check_type;\n \t} else if (memcmp(buf->data, p, len) != 0) {\n \t\twhy = cachefiles_coherency_check_aux;\n-\t} else if (be64_to_cpu(buf->object_size) != object->cookie->object_size) {\n+\t} else if (obj_size != object->cookie->object_size) {\n \t\twhy = cachefiles_coherency_check_objsize;\n \t} else if (buf->content == CACHEFILES_CONTENT_DIRTY) {\n \t\t// TODO: Begin conflict resolution\n \t\tpr_warn(\"Dirty object in cache\\n\");\n \t\twhy = cachefiles_coherency_check_dirty;\n \t} else {\n+\t\tobject->content_info = buf->content;\n+\t\tobject->object_size = obj_size;\n \t\twhy = cachefiles_coherency_check_ok;\n \t\tret = 0;\n \t}\n \n-out:\n-\ttrace_cachefiles_coherency(object, file_inode(file)->i_ino,\n+\ttrace_cachefiles_coherency(object, file_inode(file)->i_ino, obj_size,\n \t\t\t\t be64_to_cpup((__be64 *)buf->data),\n \t\t\t\t buf->content, why);\n+out:\n \tkfree(buf);\n \treturn ret;\n }\n@@ -163,6 +170,9 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,\n {\n \tint ret;\n \n+\ttrace_cachefiles_coherency(object, d_inode(dentry)->i_ino, 0, 0, 0,\n+\t\t\t\t cachefiles_coherency_remove);\n+\n \tret = cachefiles_inject_remove_error();\n \tif (ret == 0) {\n \t\tret = mnt_want_write(cache->mnt);\ndiff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c\nindex a8c0d86118c5..aee59ccea257 100644\n--- a/fs/netfs/buffered_read.c\n+++ b/fs/netfs/buffered_read.c\n@@ -127,21 +127,6 @@ static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq,\n \treturn subreq->len;\n }\n \n-static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,\n-\t\t\t\t\t\t struct netfs_io_subrequest *subreq,\n-\t\t\t\t\t\t loff_t i_size)\n-{\n-\tstruct netfs_cache_resources *cres = &rreq->cache_resources;\n-\tenum netfs_io_source source;\n-\n-\tif (!cres->ops)\n-\t\treturn NETFS_DOWNLOAD_FROM_SERVER;\n-\tsource = cres->ops->prepare_read(subreq, i_size);\n-\ttrace_netfs_sreq(subreq, netfs_sreq_trace_prepare);\n-\treturn source;\n-\n-}\n-\n /*\n * Issue a read against the cache.\n * - Eats the caller's ref on subreq.\n@@ -156,6 +141,19 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,\n \t\t\tnetfs_cache_read_terminated, subreq);\n }\n \n+int netfs_read_query_cache(struct netfs_io_request *rreq, struct fscache_occupancy *occ)\n+{\n+\tstruct netfs_cache_resources *cres = &rreq->cache_resources;\n+\n+\tocc->granularity = PAGE_SIZE;\n+\tif (occ->query_from >= occ->query_to)\n+\t\treturn 0;\n+\tif (!cres->ops)\n+\t\treturn 0;\n+\tocc->query_from = round_up(occ->query_from, occ->granularity);\n+\treturn cres->ops->query_occupancy(cres, occ);\n+}\n+\n static void netfs_queue_read(struct netfs_io_request *rreq,\n \t\t\t struct netfs_io_subrequest *subreq,\n \t\t\t bool last_subreq)\n@@ -214,16 +212,55 @@ static void netfs_issue_read(struct netfs_io_request *rreq,\n static void netfs_read_to_pagecache(struct netfs_io_request *rreq,\n \t\t\t\t struct readahead_control *ractl)\n {\n+\tstruct fscache_occupancy _occ = {\n+\t\t.query_from\t= rreq->start,\n+\t\t.query_to\t= rreq->start + rreq->len,\n+\t\t.cached_from[0]\t= 0,\n+\t\t.cached_to[0]\t= 0,\n+\t\t.cached_from[1]\t= ULLONG_MAX,\n+\t\t.cached_to[1]\t= ULLONG_MAX,\n+\t};\n+\tstruct fscache_occupancy *occ = &_occ;\n \tstruct netfs_inode *ictx = netfs_inode(rreq->inode);\n \tunsigned long long start = rreq->start;\n \tssize_t size = rreq->len;\n \tint ret = 0;\n \n \tdo {\n+\t\tint (*prepare_read)(struct netfs_io_subrequest *subreq) = NULL;\n \t\tstruct netfs_io_subrequest *subreq;\n-\t\tenum netfs_io_source source = NETFS_SOURCE_UNKNOWN;\n+\t\tunsigned long long hole_to, cache_to;\n \t\tssize_t slice;\n \n+\t\t/* If we don't have any, find out the next couple of data\n+\t\t * extents from the cache, containing of following the\n+\t\t * specified start offset. Holes have to be fetched from the\n+\t\t * server; data regions from the cache.\n+\t\t */\n+\t\thole_to = occ->cached_from[0];\n+\t\tcache_to = occ->cached_to[0];\n+\t\tif (start >= cache_to) {\n+\t\t\t/* Extent exhausted; shuffle down. */\n+\t\t\tint i;\n+\n+\t\t\tfor (i = 0; i < ARRAY_SIZE(occ->cached_from) - 1; i++) {\n+\t\t\t\tocc->cached_from[i] = occ->cached_from[i + 1];\n+\t\t\t\tocc->cached_to[i] = occ->cached_to[i + 1];\n+\t\t\t\tocc->cached_type[i] = occ->cached_type[i + 1];\n+\t\t\t}\n+\t\t\tocc->cached_from[i] = ULLONG_MAX;\n+\t\t\tocc->cached_to[i] = ULLONG_MAX;\n+\n+\t\t\tif (occ->cached_from[0] != ULLONG_MAX)\n+\t\t\t\tcontinue;\n+\n+\t\t\t/* Get new extents */\n+\t\t\tret = netfs_read_query_cache(rreq, occ);\n+\t\t\tif (ret < 0)\n+\t\t\t\tbreak;\n+\t\t\tcontinue;\n+\t\t}\n+\n \t\tsubreq = netfs_alloc_subrequest(rreq);\n \t\tif (!subreq) {\n \t\t\tret = -ENOMEM;\n@@ -233,65 +270,81 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,\n \t\tsubreq->start\t= start;\n \t\tsubreq->len\t= size;\n \n-\t\tsource = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);\n-\t\tsubreq->source = source;\n-\t\tif (source == NETFS_DOWNLOAD_FROM_SERVER) {\n-\t\t\tunsigned long long zp = umin(ictx->zero_point, rreq->i_size);\n-\t\t\tsize_t len = subreq->len;\n-\n-\t\t\tif (unlikely(rreq->origin == NETFS_READ_SINGLE))\n-\t\t\t\tzp = rreq->i_size;\n-\t\t\tif (subreq->start >= zp) {\n-\t\t\t\tsubreq->source = source = NETFS_FILL_WITH_ZEROES;\n-\t\t\t\tgoto fill_with_zeroes;\n+\t\t_debug(\"rsub %llx %llx-%llx\", subreq->start, hole_to, cache_to);\n+\n+\t\tif (start >= hole_to && start < cache_to) {\n+\t\t\t/* Overlap with a cached region, where the cache may\n+\t\t\t * record a block of zeroes.\n+\t\t\t */\n+\t\t\t_debug(\"cached s=%llx c=%llx l=%zx\", start, cache_to, size);\n+\t\t\tsubreq->len = umin(cache_to - start, size);\n+\t\t\tsubreq->len = round_up(subreq->len, occ->granularity);\n+\t\t\tif (occ->cached_type[0] == FSCACHE_EXTENT_ZERO) {\n+\t\t\t\tsubreq->source = NETFS_FILL_WITH_ZEROES;\n+\t\t\t\tnetfs_stat(&netfs_n_rh_zero);\n+\t\t\t} else {\n+\t\t\t\tsubreq->source = NETFS_READ_FROM_CACHE;\n+\t\t\t\tprepare_read = rreq->cache_resources.ops->prepare_read;\n \t\t\t}\n \n-\t\t\tif (len > zp - subreq->start)\n-\t\t\t\tlen = zp - subreq->start;\n-\t\t\tif (len == 0) {\n-\t\t\t\tpr_err(\"ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx\",\n-\t\t\t\t rreq->debug_id, subreq->debug_index,\n-\t\t\t\t subreq->len, size,\n-\t\t\t\t subreq->start, ictx->zero_point, rreq->i_size);\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t\tsubreq->len = len;\n-\n-\t\t\tnetfs_stat(&netfs_n_rh_download);\n-\t\t\tif (rreq->netfs_ops->prepare_read) {\n-\t\t\t\tret = rreq->netfs_ops->prepare_read(subreq);\n-\t\t\t\tif (ret < 0) {\n-\t\t\t\t\tsubreq->error = ret;\n-\t\t\t\t\t/* Not queued - release both refs. */\n-\t\t\t\t\tnetfs_put_subrequest(subreq,\n-\t\t\t\t\t\t\t netfs_sreq_trace_put_cancel);\n-\t\t\t\t\tnetfs_put_subrequest(subreq,\n-\t\t\t\t\t\t\t netfs_sreq_trace_put_cancel);\n-\t\t\t\t\tbreak;\n-\t\t\t\t}\n-\t\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_prepare);\n-\t\t\t}\n-\t\t\tgoto issue;\n-\t\t}\n+\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_prepare);\n \n-\tfill_with_zeroes:\n-\t\tif (source == NETFS_FILL_WITH_ZEROES) {\n+\t\t} else if ((subreq->start >= ictx->zero_point ||\n+\t\t\t subreq->start >= rreq->i_size) &&\n+\t\t\t size > 0) {\n+\t\t\t/* If this range lies beyond the zero-point, that part\n+\t\t\t * can just be cleared locally.\n+\t\t\t */\n+\t\t\t_debug(\"zero %llx-%llx\", start, start + size);\n+\t\t\tsubreq->len = size;\n \t\t\tsubreq->source = NETFS_FILL_WITH_ZEROES;\n-\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_submit);\n+\t\t\tif (rreq->cache_resources.ops)\n+\t\t\t\t__set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);\n \t\t\tnetfs_stat(&netfs_n_rh_zero);\n-\t\t\tgoto issue;\n+\t\t} else {\n+\t\t\t/* Read a cache hole from the server. If any part of\n+\t\t\t * this range lies beyond the zero-point or the EOF,\n+\t\t\t * that part can just be cleared locally.\n+\t\t\t */\n+\t\t\tunsigned long long zlimit = umin(rreq->i_size, ictx->zero_point);\n+\t\t\tunsigned long long limit = min3(zlimit, start + size, hole_to);\n+\n+\t\t\t_debug(\"limit %llx %llx\", rreq->i_size, ictx->zero_point);\n+\t\t\t_debug(\"download %llx-%llx\", start, start + size);\n+\t\t\tsubreq->len = umin(limit - subreq->start, ULONG_MAX);\n+\t\t\tsubreq->source = NETFS_DOWNLOAD_FROM_SERVER;\n+\t\t\tif (rreq->cache_resources.ops)\n+\t\t\t\t__set_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);\n+\t\t\tnetfs_stat(&netfs_n_rh_download);\n \t\t}\n \n-\t\tif (source == NETFS_READ_FROM_CACHE) {\n-\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_submit);\n-\t\t\tgoto issue;\n+\t\tif (size == 0) {\n+\t\t\tpr_err(\"ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx\",\n+\t\t\t rreq->debug_id, subreq->debug_index,\n+\t\t\t subreq->len, size,\n+\t\t\t subreq->start, ictx->zero_point, rreq->i_size);\n+\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_cancel);\n+\t\t\t/* Not queued - release both refs. */\n+\t\t\tnetfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);\n+\t\t\tnetfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);\n+\t\t\tbreak;\n \t\t}\n \n-\t\tpr_err(\"Unexpected read source %u\\n\", source);\n-\t\tWARN_ON_ONCE(1);\n-\t\tbreak;\n+\t\trreq->io_streams[0].sreq_max_len = MAX_RW_COUNT;\n+\t\trreq->io_streams[0].sreq_max_segs = INT_MAX;\n+\n+\t\tif (prepare_read) {\n+\t\t\tret = prepare_read(subreq);\n+\t\t\tif (ret < 0) {\n+\t\t\t\tsubreq->error = ret;\n+\t\t\t\t/* Not queued - release both refs. */\n+\t\t\t\tnetfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);\n+\t\t\t\tnetfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_prepare);\n+\t\t}\n \n-\tissue:\n \t\tslice = netfs_prepare_read_iterator(subreq, ractl);\n \t\tif (slice < 0) {\n \t\t\tret = slice;\n@@ -305,6 +358,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq,\n \t\tsize -= slice;\n \t\tstart += slice;\n \n+\t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_submit);\n+\n \t\tnetfs_queue_read(rreq, subreq, size <= 0);\n \t\tnetfs_issue_read(rreq, subreq);\n \t\tcond_resched();\ndiff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c\nindex 22a4d61631c9..bce3e7109ec1 100644\n--- a/fs/netfs/buffered_write.c\n+++ b/fs/netfs/buffered_write.c\n@@ -73,9 +73,6 @@ void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,\n \ti_size = i_size_read(inode);\n \tif (end > i_size) {\n \t\ti_size_write(inode, end);\n-#if IS_ENABLED(CONFIG_FSCACHE)\n-\t\tfscache_update_cookie(ctx->cache, NULL, &end);\n-#endif\n \n \t\tgap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));\n \t\tif (copied > gap) {\ndiff --git a/fs/netfs/internal.h b/fs/netfs/internal.h\nindex d436e20d3418..2fcf31de5f2c 100644\n--- a/fs/netfs/internal.h\n+++ b/fs/netfs/internal.h\n@@ -23,6 +23,8 @@\n /*\n * buffered_read.c\n */\n+int netfs_read_query_cache(struct netfs_io_request *rreq,\n+\t\t\t struct fscache_occupancy *occ);\n void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);\n int netfs_prefetch_for_write(struct file *file, struct folio *folio,\n \t\t\t size_t offset, size_t len);\ndiff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c\nindex d0e23bc42445..d87a03859ebd 100644\n--- a/fs/netfs/read_single.c\n+++ b/fs/netfs/read_single.c\n@@ -58,20 +58,6 @@ static int netfs_single_begin_cache_read(struct netfs_io_request *rreq, struct n\n \treturn fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));\n }\n \n-static void netfs_single_cache_prepare_read(struct netfs_io_request *rreq,\n-\t\t\t\t\t struct netfs_io_subrequest *subreq)\n-{\n-\tstruct netfs_cache_resources *cres = &rreq->cache_resources;\n-\n-\tif (!cres->ops) {\n-\t\tsubreq->source = NETFS_DOWNLOAD_FROM_SERVER;\n-\t\treturn;\n-\t}\n-\tsubreq->source = cres->ops->prepare_read(subreq, rreq->i_size);\n-\ttrace_netfs_sreq(subreq, netfs_sreq_trace_prepare);\n-\n-}\n-\n static void netfs_single_read_cache(struct netfs_io_request *rreq,\n \t\t\t\t struct netfs_io_subrequest *subreq)\n {\n@@ -90,6 +76,14 @@ static void netfs_single_read_cache(struct netfs_io_request *rreq,\n static int netfs_single_dispatch_read(struct netfs_io_request *rreq)\n {\n \tstruct netfs_io_stream *stream = &rreq->io_streams[0];\n+\tstruct fscache_occupancy occ = {\n+\t\t.query_from\t= 0,\n+\t\t.query_to\t= rreq->len,\n+\t\t.cached_from[0]\t= ULLONG_MAX,\n+\t\t.cached_to[0]\t= ULLONG_MAX,\n+\t\t.cached_from[1]\t= ULLONG_MAX,\n+\t\t.cached_to[1]\t= ULLONG_MAX,\n+\t};\n \tstruct netfs_io_subrequest *subreq;\n \tint ret = 0;\n \n@@ -97,11 +91,19 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)\n \tif (!subreq)\n \t\treturn -ENOMEM;\n \n-\tsubreq->source\t= NETFS_SOURCE_UNKNOWN;\n+\tsubreq->source\t= NETFS_DOWNLOAD_FROM_SERVER;\n \tsubreq->start\t= 0;\n \tsubreq->len\t= rreq->len;\n \tsubreq->io_iter\t= rreq->buffer.iter;\n \n+\t/* Try to use the cache if the cache content matches the size of the\n+\t * remote file.\n+\t */\n+\tnetfs_read_query_cache(rreq, &occ);\n+\tif (occ.cached_from[0] == 0 &&\n+\t occ.cached_to[0] == rreq->len)\n+\t\tsubreq->source = NETFS_READ_FROM_CACHE;\n+\n \t__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);\n \n \tspin_lock(&rreq->lock);\n@@ -111,7 +113,6 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)\n \tsmp_store_release(&stream->active, true);\n \tspin_unlock(&rreq->lock);\n \n-\tnetfs_single_cache_prepare_read(rreq, subreq);\n \tswitch (subreq->source) {\n \tcase NETFS_DOWNLOAD_FROM_SERVER:\n \t\tnetfs_stat(&netfs_n_rh_download);\n@@ -125,6 +126,12 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)\n \t\trreq->submitted += subreq->len;\n \t\tbreak;\n \tcase NETFS_READ_FROM_CACHE:\n+\t\tif (rreq->cache_resources.ops->prepare_read) {\n+\t\t\tret = rreq->cache_resources.ops->prepare_read(subreq);\n+\t\t\tif (ret < 0)\n+\t\t\t\tgoto cancel;\n+\t\t}\n+\n \t\ttrace_netfs_sreq(subreq, netfs_sreq_trace_submit);\n \t\tnetfs_single_read_cache(rreq, subreq);\n \t\trreq->submitted += subreq->len;\ndiff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c\nindex b194447f4b11..a839735d5675 100644\n--- a/fs/netfs/write_collect.c\n+++ b/fs/netfs/write_collect.c\n@@ -185,6 +185,16 @@ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,\n \twreq->buffer.first_tail_slot = slot;\n }\n \n+static void netfs_cache_collect(struct netfs_io_request *wreq,\n+\t\t\t\tstruct netfs_io_stream *stream)\n+{\n+\tstruct netfs_cache_resources *cres = &wreq->cache_resources;\n+\n+\tif (cres->ops && cres->ops->collect_write)\n+\t\tcres->ops->collect_write(wreq, wreq->cache_coll_to,\n+\t\t\t\t\t stream->collected_to - wreq->cache_coll_to);\n+}\n+\n /*\n * Collect and assess the results of various write subrequests. We may need to\n * retry some of the results - or even do an RMW cycle for content crypto.\n@@ -238,6 +248,11 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)\n \t\t\tif (stream->collected_to < front->start) {\n \t\t\t\ttrace_netfs_collect_gap(wreq, stream, issued_to, 'F');\n \t\t\t\tstream->collected_to = front->start;\n+\t\t\t\tif (stream->source == NETFS_WRITE_TO_CACHE) {\n+\t\t\t\t\tif (wreq->cache_coll_to < stream->collected_to)\n+\t\t\t\t\t\tnetfs_cache_collect(wreq, stream);\n+\t\t\t\t\twreq->cache_coll_to = stream->collected_to;\n+\t\t\t\t}\n \t\t\t}\n \n \t\t\t/* Stall if the front is still undergoing I/O. */\n@@ -261,8 +276,19 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)\n \t\t\tif (test_bit(NETFS_SREQ_FAILED, &front->flags)) {\n \t\t\t\tstream->failed = true;\n \t\t\t\tstream->error = front->error;\n-\t\t\t\tif (stream->source == NETFS_UPLOAD_TO_SERVER)\n+\t\t\t\tswitch (stream->source) {\n+\t\t\t\tcase NETFS_UPLOAD_TO_SERVER:\n \t\t\t\t\tmapping_set_error(wreq->mapping, front->error);\n+\t\t\t\t\tbreak;\n+\t\t\t\tcase NETFS_WRITE_TO_CACHE:\n+\t\t\t\t\tif (wreq->cache_coll_to < stream->collected_to)\n+\t\t\t\t\t\tnetfs_cache_collect(wreq, stream);\n+\t\t\t\t\twreq->cache_coll_to = stream->collected_to + front->len;\n+\t\t\t\t\tbreak;\n+\t\t\t\tdefault:\n+\t\t\t\t\tWARN_ON(1);\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n \t\t\t\tnotes |= NEED_REASSESS | SAW_FAILURE;\n \t\t\t\tbreak;\n \t\t\t}\n@@ -355,6 +381,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq)\n */\n bool netfs_write_collection(struct netfs_io_request *wreq)\n {\n+\tstruct netfs_io_stream *cstream = &wreq->io_streams[1];\n \tstruct netfs_inode *ictx = netfs_inode(wreq->inode);\n \tsize_t transferred;\n \tbool transferred_valid = false;\n@@ -390,13 +417,19 @@ bool netfs_write_collection(struct netfs_io_request *wreq)\n \t\twreq->transferred = transferred;\n \ttrace_netfs_rreq(wreq, netfs_rreq_trace_write_done);\n \n-\tif (wreq->io_streams[1].active &&\n-\t wreq->io_streams[1].failed &&\n-\t ictx->ops->invalidate_cache) {\n-\t\t/* Cache write failure doesn't prevent writeback completion\n-\t\t * unless we're in disconnected mode.\n-\t\t */\n-\t\tictx->ops->invalidate_cache(wreq);\n+\tif (cstream->active) {\n+\t\tif (cstream->failed) {\n+\t\t\tif (ictx->ops->invalidate_cache)\n+\t\t\t\t/* Cache write failure doesn't prevent\n+\t\t\t\t * writeback completion unless we're in\n+\t\t\t\t * disconnected mode.\n+\t\t\t\t */\n+\t\t\t\tictx->ops->invalidate_cache(wreq);\n+\t\t} else {\n+\t\t\tif (wreq->cache_coll_to < cstream->collected_to)\n+\t\t\t\tnetfs_cache_collect(wreq, cstream);\n+\t\t\twreq->cache_coll_to = cstream->collected_to;\n+\t\t}\n \t}\n \n \t_debug(\"finished\");\ndiff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c\nindex 2db688f94125..2de6b8621e11 100644\n--- a/fs/netfs/write_issue.c\n+++ b/fs/netfs/write_issue.c\n@@ -112,6 +112,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,\n \t\tgoto nomem;\n \n \twreq->cleaned_to = wreq->start;\n+\tif (wreq->cache_resources.dio_size > 1)\n+\t\twreq->cache_coll_to = round_down(wreq->start, wreq->cache_resources.dio_size);\n \n \twreq->io_streams[0].stream_nr\t\t= 0;\n \twreq->io_streams[0].source\t\t= NETFS_UPLOAD_TO_SERVER;\n@@ -263,6 +265,7 @@ void netfs_issue_write(struct netfs_io_request *wreq,\n \n \tif (!subreq)\n \t\treturn;\n+\n \tstream->construct = NULL;\n \tsubreq->io_iter.count = subreq->len;\n \tnetfs_do_issue_write(stream, subreq);\ndiff --git a/include/linux/fscache.h b/include/linux/fscache.h\nindex 58fdb9605425..850d20241075 100644\n--- a/include/linux/fscache.h\n+++ b/include/linux/fscache.h\n@@ -147,6 +147,23 @@ struct fscache_cookie {\n \t};\n };\n \n+enum fscache_extent_type {\n+\tFSCACHE_EXTENT_DATA,\n+\tFSCACHE_EXTENT_ZERO,\n+} __mode(byte);\n+\n+/*\n+ * Cache occupancy information.\n+ */\n+struct fscache_occupancy {\n+\tunsigned long long\tquery_from;\t/* Point to query from */\n+\tunsigned long long\tquery_to;\t/* Point to query to */\n+\tunsigned long long\tcached_from[2];\t/* Point at which cache extents start */\n+\tunsigned long long\tcached_to[2];\t/* Point at which cache extents end */\n+\tunsigned int\t\tgranularity;\t/* Granularity desired */\n+\tenum fscache_extent_type cached_type[2];\t/* Type of cache extent */\n+};\n+\n /*\n * slow-path functions for when there is actually caching available, and the\n * netfs does actually have a valid token\ndiff --git a/include/linux/netfs.h b/include/linux/netfs.h\nindex ba17ac5bf356..77238bc4a712 100644\n--- a/include/linux/netfs.h\n+++ b/include/linux/netfs.h\n@@ -22,6 +22,7 @@\n \n enum netfs_sreq_ref_trace;\n typedef struct mempool mempool_t;\n+struct fscache_occupancy;\n struct folio_queue;\n \n /**\n@@ -159,8 +160,10 @@ struct netfs_cache_resources {\n \tconst struct netfs_cache_ops\t*ops;\n \tvoid\t\t\t\t*cache_priv;\n \tvoid\t\t\t\t*cache_priv2;\n+\tunsigned long long\t\tcache_i_size;\t/* Initial size of cache file */\n \tunsigned int\t\t\tdebug_id;\t/* Cookie debug ID */\n \tunsigned int\t\t\tinval_counter;\t/* object->inval_counter at begin_op */\n+\tunsigned int\t\t\tdio_size;\t/* DIO block size */\n };\n \n /*\n@@ -250,6 +253,7 @@ struct netfs_io_request {\n \tunsigned long long\tstart;\t\t/* Start position */\n \tatomic64_t\t\tissued_to;\t/* Write issuer folio cursor */\n \tunsigned long long\tcollected_to;\t/* Point we've collected to */\n+\tunsigned long long\tcache_coll_to;\t/* Point the cache has collected to */\n \tunsigned long long\tcleaned_to;\t/* Position we've cleaned folios to */\n \tunsigned long long\tabandon_to;\t/* Position to abandon folios to */\n \tpgoff_t\t\t\tno_unlock_folio; /* Don't unlock this folio after read */\n@@ -354,8 +358,7 @@ struct netfs_cache_ops {\n \t/* Prepare a read operation, shortening it to a cached/uncached\n \t * boundary as appropriate.\n \t */\n-\tenum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,\n-\t\t\t\t\t unsigned long long i_size);\n+\tint (*prepare_read)(struct netfs_io_subrequest *subreq);\n \n \t/* Prepare a write subrequest, working out if we're allowed to do it\n \t * and finding out the maximum amount of data to gather before\n@@ -383,8 +386,13 @@ struct netfs_cache_ops {\n \t * next chunk of data starts and how long it is.\n \t */\n \tint (*query_occupancy)(struct netfs_cache_resources *cres,\n-\t\t\t loff_t start, size_t len, size_t granularity,\n-\t\t\t loff_t *_data_start, size_t *_data_len);\n+\t\t\t struct fscache_occupancy *occ);\n+\n+\t/* Collect the result of buffered writeback to the cache.\n+\t * This includes copying a read to the cache.\n+\t */\n+\tvoid (*collect_write)(struct netfs_io_request *wreq,\n+\t\t\t unsigned long long start, size_t len);\n };\n \n /* High-level read API. */\ndiff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h\nindex a743b2a35ea7..4bba6fda1f8b 100644\n--- a/include/trace/events/cachefiles.h\n+++ b/include/trace/events/cachefiles.h\n@@ -56,6 +56,7 @@ enum cachefiles_coherency_trace {\n \tcachefiles_coherency_check_ok,\n \tcachefiles_coherency_check_type,\n \tcachefiles_coherency_check_xattr,\n+\tcachefiles_coherency_remove,\n \tcachefiles_coherency_set_fail,\n \tcachefiles_coherency_set_ok,\n \tcachefiles_coherency_vol_check_cmp,\n@@ -67,6 +68,7 @@ enum cachefiles_coherency_trace {\n };\n \n enum cachefiles_trunc_trace {\n+\tcachefiles_trunc_clear_padding,\n \tcachefiles_trunc_dio_adjust,\n \tcachefiles_trunc_expand_tmpfile,\n \tcachefiles_trunc_shrink,\n@@ -84,6 +86,7 @@ enum cachefiles_prepare_read_trace {\n };\n \n enum cachefiles_error_trace {\n+\tcachefiles_trace_alignment_error,\n \tcachefiles_trace_fallocate_error,\n \tcachefiles_trace_getxattr_error,\n \tcachefiles_trace_link_error,\n@@ -144,6 +147,7 @@ enum cachefiles_error_trace {\n \tEM(cachefiles_coherency_check_ok,\t\"OK \")\t\t\\\n \tEM(cachefiles_coherency_check_type,\t\"BAD type\")\t\t\\\n \tEM(cachefiles_coherency_check_xattr,\t\"BAD xatt\")\t\t\\\n+\tEM(cachefiles_coherency_remove,\t\t\"REMOVE \")\t\t\\\n \tEM(cachefiles_coherency_set_fail,\t\"SET fail\")\t\t\\\n \tEM(cachefiles_coherency_set_ok,\t\t\"SET ok \")\t\t\\\n \tEM(cachefiles_coherency_vol_check_cmp,\t\"VOL BAD cmp \")\t\t\\\n@@ -154,6 +158,7 @@ enum cachefiles_error_trace {\n \tE_(cachefiles_coherency_vol_set_ok,\t\"VOL SET ok \")\n \n #define cachefiles_trunc_traces\t\t\t\t\t\t\\\n+\tEM(cachefiles_trunc_clear_padding,\t\"CLRPAD\")\t\t\\\n \tEM(cachefiles_trunc_dio_adjust,\t\t\"DIOADJ\")\t\t\\\n \tEM(cachefiles_trunc_expand_tmpfile,\t\"EXPTMP\")\t\t\\\n \tE_(cachefiles_trunc_shrink,\t\t\"SHRINK\")\n@@ -169,6 +174,7 @@ enum cachefiles_error_trace {\n \tE_(cachefiles_trace_read_seek_nxio,\t\"seek-enxio\")\n \n #define cachefiles_error_traces\t\t\t\t\t\t\\\n+\tEM(cachefiles_trace_alignment_error,\t\"align\")\t\t\\\n \tEM(cachefiles_trace_fallocate_error,\t\"fallocate\")\t\t\\\n \tEM(cachefiles_trace_getxattr_error,\t\"getxattr\")\t\t\\\n \tEM(cachefiles_trace_link_error,\t\t\"link\")\t\t\t\\\n@@ -379,12 +385,12 @@ TRACE_EVENT(cachefiles_rename,\n \n TRACE_EVENT(cachefiles_coherency,\n \t TP_PROTO(struct cachefiles_object *obj,\n-\t\t ino_t ino,\n+\t\t ino_t ino, unsigned long long obj_size,\n \t\t u64 disk_aux,\n \t\t enum cachefiles_content content,\n \t\t enum cachefiles_coherency_trace why),\n \n-\t TP_ARGS(obj, ino, disk_aux, content, why),\n+\t TP_ARGS(obj, ino, obj_size, disk_aux, content, why),\n \n \t /* Note that obj may be NULL */\n \t TP_STRUCT__entry(\n@@ -392,6 +398,7 @@ TRACE_EVENT(cachefiles_coherency,\n \t\t __field(enum cachefiles_coherency_trace,\twhy)\n \t\t __field(enum cachefiles_content,\t\tcontent)\n \t\t __field(u64,\t\t\t\tino)\n+\t\t __field(u64,\t\t\t\tobj_size)\n \t\t __field(u64,\t\t\t\taux)\n \t\t __field(u64,\t\t\t\tdisk_aux)\n \t\t\t ),\n@@ -401,14 +408,16 @@ TRACE_EVENT(cachefiles_coherency,\n \t\t __entry->why\t= why;\n \t\t __entry->content\t= content;\n \t\t __entry->ino\t= ino;\n+\t\t __entry->obj_size\t= obj_size,\n \t\t __entry->aux\t= be64_to_cpup((__be64 *)obj->cookie->inline_aux);\n \t\t __entry->disk_aux\t= disk_aux;\n \t\t\t ),\n \n-\t TP_printk(\"o=%08x %s B=%llx c=%u aux=%llx dsk=%llx\",\n+\t TP_printk(\"o=%08x %s B=%llx oz=%llx c=%u aux=%llx dsk=%llx\",\n \t\t __entry->obj,\n \t\t __print_symbolic(__entry->why, cachefiles_coherency_traces),\n \t\t __entry->ino,\n+\t\t __entry->obj_size,\n \t\t __entry->content,\n \t\t __entry->aux,\n \t\t __entry->disk_aux)\n", "prefixes": [ "08/26" ] }