Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/2226018/?format=api
{ "id": 2226018, "url": "http://patchwork.ozlabs.org/api/1.2/patches/2226018/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-ext4/patch/20260422021042.4157510-10-yi.zhang@huaweicloud.com/", "project": { "id": 8, "url": "http://patchwork.ozlabs.org/api/1.2/projects/8/?format=api", "name": "Linux ext4 filesystem development", "link_name": "linux-ext4", "list_id": "linux-ext4.vger.kernel.org", "list_email": "linux-ext4@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260422021042.4157510-10-yi.zhang@huaweicloud.com>", "list_archive_url": null, "date": "2026-04-22T02:10:29", "name": "[v3,09/22] ext4: implement writeback path using iomap", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "c78c341c489e5beec784c1baa8105970c0ff3c9b", "submitter": { "id": 85428, "url": "http://patchwork.ozlabs.org/api/1.2/people/85428/?format=api", "name": "Zhang Yi", "email": "yi.zhang@huaweicloud.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linux-ext4/patch/20260422021042.4157510-10-yi.zhang@huaweicloud.com/mbox/", "series": [ { "id": 500911, "url": "http://patchwork.ozlabs.org/api/1.2/series/500911/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-ext4/list/?series=500911", "date": "2026-04-22T02:10:23", "name": "ext4: use iomap for regular file's buffered I/O path", "version": 3, "mbox": "http://patchwork.ozlabs.org/series/500911/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2226018/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2226018/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <SRS0=4K98=CV=vger.kernel.org=linux-ext4+bounces-15976-patchwork-incoming=ozlabs.org@ozlabs.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linux-ext4@vger.kernel.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "patchwork-incoming@ozlabs.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=ozlabs.org\n (client-ip=2404:9400:2221:ea00::3; helo=mail.ozlabs.org;\n envelope-from=srs0=4k98=cv=vger.kernel.org=linux-ext4+bounces-15976-patchwork-incoming=ozlabs.org@ozlabs.org;\n receiver=patchwork.ozlabs.org)", "gandalf.ozlabs.org;\n arc=pass smtp.remote-ip=172.234.253.10 arc.chain=subspace.kernel.org", "gandalf.ozlabs.org;\n dmarc=none (p=none dis=none) header.from=huaweicloud.com", "gandalf.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=172.234.253.10; helo=sea.lore.kernel.org;\n envelope-from=linux-ext4+bounces-15976-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org)", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=45.249.212.51", "smtp.subspace.kernel.org;\n dmarc=none (p=none dis=none) header.from=huaweicloud.com", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=huaweicloud.com" ], "Received": [ "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g0jgp0nfcz1yD5\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 22 Apr 2026 12:23:18 +1000 (AEST)", "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\tby gandalf.ozlabs.org (Postfix) with ESMTP id 4g0jgp0LBZz4wK3\n\tfor <incoming@patchwork.ozlabs.org>; Wed, 22 Apr 2026 12:23:18 +1000 (AEST)", "by gandalf.ozlabs.org (Postfix)\n\tid 4g0jgp0GZ1z4wKJ; Wed, 22 Apr 2026 12:23:18 +1000 (AEST)", "from sea.lore.kernel.org (sea.lore.kernel.org [172.234.253.10])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby gandalf.ozlabs.org (Postfix) with ESMTPS id 4g0jgk4611z4wK3\n\tfor <patchwork-incoming@ozlabs.org>; Wed, 22 Apr 2026 12:23:14 +1000 (AEST)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby sea.lore.kernel.org (Postfix) with ESMTP id 975FE30B0791\n\tfor <patchwork-incoming@ozlabs.org>; Wed, 22 Apr 2026 02:17:42 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 315563612E2;\n\tWed, 22 Apr 2026 02:17:07 +0000 (UTC)", "from dggsgout11.his.huawei.com (dggsgout11.his.huawei.com\n [45.249.212.51])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id DD17B345CCE;\n\tWed, 22 Apr 2026 02:17:02 +0000 (UTC)", "from mail.maildlp.com (unknown [172.19.163.198])\n\tby dggsgout11.his.huawei.com (SkyGuard) with ESMTPS id 4g0jWM1WtBzYQtqs;\n\tWed, 22 Apr 2026 10:15:59 +0800 (CST)", "from mail02.huawei.com (unknown [10.116.40.252])\n\tby mail.maildlp.com (Postfix) with ESMTP id 6818340601;\n\tWed, 22 Apr 2026 10:16:56 +0800 (CST)", "from huaweicloud.com (unknown [10.50.85.155])\n\tby APP3 (Coremail) with SMTP id _Ch0CgB3JL6PL+hpqkgUBQ--.2635S13;\n\tWed, 22 Apr 2026 10:16:56 +0800 (CST)" ], "ARC-Seal": [ "i=2; a=rsa-sha256; d=ozlabs.org; s=201707; t=1776824598; cv=pass;\n\tb=n5dU6Am+zZweF3VIIYoGzAvUeYqjg8+5wvNUg3+NGZuLKWfrNvirZN/QwvcZAAVaI+xpeKGPKDgQFLMgNCW8DD7OEMKwQRtOAUpynO+GQQOPxTAFeljtrWqNqNhYijc1jr4qul8Cf7wudAnYMIg0OiyOsvTcmoLTV+SCpfZjYBiWKBfIJuebW4JRh7pfQmJewNCGR6ED30U/q8HHv70a0g92yWT+edJicUK549HDhSSW7xCX02+zbC15qc4SUmY73aC7otsSmKd+0hC2+X0KRORq7yBfXpZlv/emCyFexK2OHqfDlBflmst+U8Fff+QQFORKjLIpnlT5j0BZcnvUtA==", "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1776824226; cv=none;\n b=mku+I8Dm3IVdwa7EFgeE4ba7tMdePFBpc07FnmaJV/e2j+couafUoNmIb7ykpVk07hhlXqA+oRDfB1FdH35H9H0ATo6sQWYdFfpHESbZ0bxgtFYtTRnT8mkUPuoYEbutzrxb6etoIy4HNDP58MUsAxcEmY5l06lL3ITgUhciZ6o=" ], "ARC-Message-Signature": [ "i=2; a=rsa-sha256; d=ozlabs.org; s=201707;\n\tt=1776824598; c=relaxed/relaxed;\n\tbh=Q8bquLJikpaGyVWdh1kLmCUXLMfMj/EhuX64sT1MNwM=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=VHzD+ENR/SoV8Kjo+dknv9WhBcn8eCkGGH85Z6CVUgHj6zKabCkmCFvIUInaiB3YzdthjofqJNZq1+HoXZUIFZcoyuJrZG6pgvEInqMhg1XLrqal45wqp9RUcebXdjSWbiIyV45TO3IvslxJv3et7CB12VGv3AGk+1MTnpZuFE3+6v5St7x9i7k4RVX+H/eaCW/axS5fKPDEu59eeVQzqz8M4Cpu784k4a1xVdRNDRk0X0aC0iy6+qM2AD1eZc3c+Mq4zlFp3441HCaGyqr+WZYv5nbA1UIJB3dEqPED2fhYMgcRHNP/8NAHgKPO34w3qF56qrvVkqsUR5PsiIZAwQ==", "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1776824226; c=relaxed/simple;\n\tbh=4uuxHvJ0MnSuwgpqC0GALTHuP3B7gXA7d1fSBtbxNhM=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version;\n b=n8q3Vo/qsZvMcTWceqS35o7LxRely2hQ5toBiZ51BGnNb01jy3OtDNh8i+tV9d6w/5Hswvcpv1cFF7QXtSM28fmFVhxsn9en21ANkKbHeLmi/fuQ7FeyKrWUN4+DSuvogyzWgcTqM4uc3iSgii1aCxnfCxhk3DVrzzHklwadZZw=" ], "ARC-Authentication-Results": [ "i=2; gandalf.ozlabs.org;\n dmarc=none (p=none dis=none) header.from=huaweicloud.com;\n spf=pass (client-ip=172.234.253.10; helo=sea.lore.kernel.org;\n envelope-from=linux-ext4+bounces-15976-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org) smtp.mailfrom=vger.kernel.org", "i=1; smtp.subspace.kernel.org;\n dmarc=none (p=none dis=none) header.from=huaweicloud.com;\n spf=pass smtp.mailfrom=huaweicloud.com; arc=none smtp.client-ip=45.249.212.51" ], "From": "Zhang Yi <yi.zhang@huaweicloud.com>", "To": "linux-ext4@vger.kernel.org,\n\tlinux-fsdevel@vger.kernel.org", "Cc": "linux-kernel@vger.kernel.org,\n\ttytso@mit.edu,\n\tadilger.kernel@dilger.ca,\n\tlibaokun@linux.alibaba.com,\n\tjack@suse.cz,\n\tojaswin@linux.ibm.com,\n\tritesh.list@gmail.com,\n\tdjwong@kernel.org,\n\thch@infradead.org,\n\tyi.zhang@huawei.com,\n\tyi.zhang@huaweicloud.com,\n\tyizhang089@gmail.com,\n\tyangerkun@huawei.com,\n\tyukuai@fnnas.com", "Subject": "[PATCH v3 09/22] ext4: implement writeback path using iomap", "Date": "Wed, 22 Apr 2026 10:10:29 +0800", "Message-ID": "<20260422021042.4157510-10-yi.zhang@huaweicloud.com>", "X-Mailer": "git-send-email 2.52.0", "In-Reply-To": "<20260422021042.4157510-1-yi.zhang@huaweicloud.com>", "References": "<20260422021042.4157510-1-yi.zhang@huaweicloud.com>", "Precedence": "bulk", "X-Mailing-List": "linux-ext4@vger.kernel.org", "List-Id": "<linux-ext4.vger.kernel.org>", "List-Subscribe": "<mailto:linux-ext4+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:linux-ext4+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-CM-TRANSID": "_Ch0CgB3JL6PL+hpqkgUBQ--.2635S13", "X-Coremail-Antispam": "1UD129KBjvAXoW3ury7GFykuw18KFy7Gw4Uurg_yoW8Xw48Ao\n\tWaqa13Xr48Jry5t3yrCr1ftFyUuan7Gw4rJr45ursFvF9xJa4Yyw4xGw43W3W7Xw4FkFWf\n\tZrWxJ3WrGr4xJF1rn29KB7ZKAUJUUUU8529EdanIXcx71UUUUU7v73VFW2AGmfu7bjvjm3\n\tAaLaJ3UjIYCTnIWjp_UUUOV7AC8VAFwI0_Wr0E3s1l1xkIjI8I6I8E6xAIw20EY4v20xva\n\tj40_Wr0E3s1l1IIY67AEw4v_Jr0_Jr4l82xGYIkIc2x26280x7IE14v26r126s0DM28Irc\n\tIa0xkI8VCY1x0267AKxVW5JVCq3wA2ocxC64kIII0Yj41l84x0c7CEw4AK67xGY2AK021l\n\t84ACjcxK6xIIjxv20xvE14v26w1j6s0DM28EF7xvwVC0I7IYx2IY6xkF7I0E14v26r4UJV\n\tWxJr1l84ACjcxK6I8E87Iv67AKxVW0oVCq3wA2z4x0Y4vEx4A2jsIEc7CjxVAFwI0_GcCE\n\t3s1le2I262IYc4CY6c8Ij28IcVAaY2xG8wAqx4xG64xvF2IEw4CE5I8CrVC2j2WlYx0E2I\n\tx0cI8IcVAFwI0_JrI_JrylYx0Ex4A2jsIE14v26r1j6r4UMcvjeVCFs4IE7xkEbVWUJVW8\n\tJwACjcxG0xvY0x0EwIxGrwACjI8F5VA0II8E6IAqYI8I648v4I1lFIxGxcIEc7CjxVA2Y2\n\tka0xkIwI1lc7CjxVAaw2AFwI0_Jw0_GFyl42xK82IYc2Ij64vIr41l4I8I3I0E4IkC6x0Y\n\tz7v_Jr0_Gr1lx2IqxVAqx4xG67AKxVWUJVWUGwC20s026x8GjcxK67AKxVWUGVWUWwC2zV\n\tAF1VAY17CE14v26r4a6rW5MIIYrxkI7VAKI48JMIIF0xvE2Ix0cI8IcVAFwI0_JFI_Gr1l\n\tIxAIcVC0I7IYx2IY6xkF7I0E14v26r4UJVWxJr1lIxAIcVCF04k26cxKx2IYs7xG6r1j6r\n\t1xMIIF0xvEx4A2jsIE14v26r4j6F4UMIIF0xvEx4A2jsIEc7CjxVAFwI0_Gr1j6F4UJbIY\n\tCTnIWIevJa73UjIFyTuYvjfUriihUUUUU", "X-CM-SenderInfo": "d1lo6xhdqjqx5xdzvxpfor3voofrz/", "X-Spam-Status": "No, score=-1.1 required=5.0 tests=ARC_SIGNED,ARC_VALID,\n\tDMARC_MISSING,HEADER_FROM_DIFFERENT_DOMAINS,MAILING_LIST_MULTI,\n\tSPF_HELO_NONE,SPF_PASS autolearn=disabled version=4.0.1", "X-Spam-Checker-Version": "SpamAssassin 4.0.1 (2024-03-25) on gandalf.ozlabs.org" }, "content": "From: Zhang Yi <yi.zhang@huawei.com>\n\nImplement the iomap writeback path for ext4. It implements\next4_iomap_writepages(), introduces a new iomap_writeback_ops instance,\next4_writeback_ops, and creates a new end I/O extent conversion worker\nto convert unwritten extents after the I/O is completed.\n\nIn the ->writeback_range() callback, it first calls\next4_iomap_map_writeback_range() to query the longest range of existing\nmapped extents. For performance considerations, if the block range has\nnot been allocated, it attempts to allocate a range of the longest\nblocks which is based on the writeback length and the delalloc extent\nlength, rather than allocating for a single folio length at a time.\nThen, it adds the folio to the iomap_ioend instance.\n\nIn the ->writeback_submit() callback, it registers a special end bio\ncallback, ext4_iomap_end_bio(), which will start a worker if we need to\nconvert unwritten extents or need to update i_disksize after the data\nhas been written back, and if we need to abort the journal when the I/O\nfailed to write back.\n\nKey changes:\n\n - Since we don't use data=ordered mode to prevent exposing stale data\n during append writebacks, we always allocate unwritten extents for\n new blocks and postpone updating the i_disksize until the I/O is\n done. In addition, the deadlock problem that was expected to be\n resolved through the reserve handle does not exist here. Therefore,\n we also do not need to use the reserve handle when converting the\n unwritten extent in the end I/O worker; we can start a normal\n journal handle instead.\n\n - Since ->writeback_range() is always executed under the folio lock,\n this means we need to start the handle under the folio lock as well.\n This is opposite to the order in the buffer_head writeback path. The\n lock ordering documentation in super.c has been updated accordingly.\n\nSigned-off-by: Zhang Yi <yi.zhang@huawei.com>\n---\n fs/ext4/ext4.h | 4 +\n fs/ext4/inode.c | 202 +++++++++++++++++++++++++++++++++++++++++++++-\n fs/ext4/page-io.c | 129 +++++++++++++++++++++++++++++\n fs/ext4/super.c | 7 +-\n 4 files changed, 340 insertions(+), 2 deletions(-)", "diff": "diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h\nindex be92ff648362..0ffa81f86bc5 100644\n--- a/fs/ext4/ext4.h\n+++ b/fs/ext4/ext4.h\n@@ -1173,6 +1173,8 @@ struct ext4_inode_info {\n \t */\n \tstruct list_head i_rsv_conversion_list;\n \tstruct work_struct i_rsv_conversion_work;\n+\tstruct list_head i_iomap_ioend_list;\n+\tstruct work_struct i_iomap_ioend_work;\n \n \t/*\n \t * Transactions that contain inode's metadata needed to complete\n@@ -3887,6 +3889,8 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *page,\n \t\tsize_t len);\n extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);\n extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);\n+extern void ext4_iomap_end_io(struct work_struct *work);\n+extern void ext4_iomap_end_bio(struct bio *bio);\n \n /* mmp.c */\n extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);\ndiff --git a/fs/ext4/inode.c b/fs/ext4/inode.c\nindex 0ca303a90249..76ce43c64c30 100644\n--- a/fs/ext4/inode.c\n+++ b/fs/ext4/inode.c\n@@ -44,6 +44,7 @@\n #include <linux/iversion.h>\n \n #include \"ext4_jbd2.h\"\n+#include \"ext4_extents.h\"\n #include \"xattr.h\"\n #include \"acl.h\"\n #include \"truncate.h\"\n@@ -4119,10 +4120,209 @@ static void ext4_iomap_readahead(struct readahead_control *rac)\n \tiomap_bio_readahead(rac, &ext4_iomap_buffered_read_ops);\n }\n \n+static int ext4_iomap_map_one_extent(struct inode *inode,\n+\t\t\t\t struct ext4_map_blocks *map)\n+{\n+\tstruct extent_status es;\n+\thandle_t *handle = NULL;\n+\tint credits, map_flags;\n+\tint retval;\n+\n+\tcredits = ext4_chunk_trans_blocks(inode, map->m_len);\n+\thandle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, credits);\n+\tif (IS_ERR(handle))\n+\t\treturn PTR_ERR(handle);\n+\n+\tmap->m_flags = 0;\n+\t/*\n+\t * It is necessary to look up extent and map blocks under i_data_sem\n+\t * in write mode, otherwise, the delalloc extent may become stale\n+\t * during concurrent truncate operations.\n+\t */\n+\text4_fc_track_inode(handle, inode);\n+\tdown_write(&EXT4_I(inode)->i_data_sem);\n+\tif (ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es, &map->m_seq)) {\n+\t\tretval = es.es_len - (map->m_lblk - es.es_lblk);\n+\t\tmap->m_len = min_t(unsigned int, retval, map->m_len);\n+\n+\t\tif (ext4_es_is_delayed(&es)) {\n+\t\t\tmap->m_flags |= EXT4_MAP_DELAYED;\n+\t\t\ttrace_ext4_da_write_pages_extent(inode, map);\n+\t\t\t/*\n+\t\t\t * Call ext4_map_create_blocks() to allocate any\n+\t\t\t * delayed allocation blocks. It is possible that\n+\t\t\t * we're going to need more metadata blocks, however\n+\t\t\t * we must not fail because we're in writeback and\n+\t\t\t * there is nothing we can do so it might result in\n+\t\t\t * data loss. So use reserved blocks to allocate\n+\t\t\t * metadata if possible.\n+\t\t\t */\n+\t\t\tmap_flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |\n+\t\t\t\t EXT4_GET_BLOCKS_METADATA_NOFAIL |\n+\t\t\t\t EXT4_EX_NOCACHE;\n+\n+\t\t\tretval = ext4_map_create_blocks(handle, inode, map,\n+\t\t\t\t\t\t\tmap_flags);\n+\t\t\tif (retval > 0)\n+\t\t\t\text4_fc_track_range(handle, inode, map->m_lblk,\n+\t\t\t\t\t\tmap->m_lblk + map->m_len - 1);\n+\t\t\tgoto out;\n+\t\t} else if (unlikely(ext4_es_is_hole(&es)))\n+\t\t\tgoto out;\n+\n+\t\t/* Found written or unwritten extent. */\n+\t\tmap->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk;\n+\t\tmap->m_flags = ext4_es_is_written(&es) ?\n+\t\t\t EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;\n+\t\tgoto out;\n+\t}\n+\n+\tretval = ext4_map_query_blocks(handle, inode, map, EXT4_EX_NOCACHE);\n+out:\n+\tup_write(&EXT4_I(inode)->i_data_sem);\n+\text4_journal_stop(handle);\n+\treturn retval < 0 ? retval : 0;\n+}\n+\n+static int ext4_iomap_map_writeback_range(struct iomap_writepage_ctx *wpc,\n+\t\t\t\t\t loff_t offset, unsigned int dirty_len)\n+{\n+\tstruct inode *inode = wpc->inode;\n+\tstruct super_block *sb = inode->i_sb;\n+\tstruct journal_s *journal = EXT4_SB(sb)->s_journal;\n+\tstruct ext4_map_blocks map;\n+\tunsigned int blkbits = inode->i_blkbits;\n+\tunsigned int index = offset >> blkbits;\n+\tunsigned int blk_end, blk_len;\n+\tint ret;\n+\n+\tret = ext4_emergency_state(sb);\n+\tif (unlikely(ret))\n+\t\treturn ret;\n+\n+\t/* Check validity of the cached writeback mapping. */\n+\tif (offset >= wpc->iomap.offset &&\n+\t offset < wpc->iomap.offset + wpc->iomap.length &&\n+\t ext4_iomap_valid(inode, &wpc->iomap))\n+\t\treturn 0;\n+\n+\tblk_len = dirty_len >> blkbits;\n+\tblk_end = min_t(unsigned int, (wpc->wbc->range_end >> blkbits),\n+\t\t\t\t (UINT_MAX - 1));\n+\tif (blk_end > index + blk_len)\n+\t\tblk_len = blk_end - index + 1;\n+\n+retry:\n+\tmap.m_lblk = index;\n+\tmap.m_len = min_t(unsigned int, MAX_WRITEPAGES_EXTENT_LEN, blk_len);\n+\tret = ext4_map_blocks(NULL, inode, &map,\n+\t\t\t EXT4_GET_BLOCKS_IO_SUBMIT | EXT4_EX_NOCACHE);\n+\tif (ret < 0)\n+\t\treturn ret;\n+\n+\t/*\n+\t * The map is not a delalloc extent, it must either be a hole\n+\t * or an extent which have already been allocated.\n+\t */\n+\tif (!(map.m_flags & EXT4_MAP_DELAYED))\n+\t\tgoto out;\n+\n+\t/* Map one delalloc extent. */\n+\tret = ext4_iomap_map_one_extent(inode, &map);\n+\tif (ret < 0) {\n+\t\tif (ext4_emergency_state(sb))\n+\t\t\treturn ret;\n+\n+\t\t/*\n+\t\t * Retry transient ENOSPC errors, if\n+\t\t * ext4_count_free_blocks() is non-zero, a commit\n+\t\t * should free up blocks.\n+\t\t */\n+\t\tif (ret == -ENOSPC && journal && ext4_count_free_clusters(sb)) {\n+\t\t\tjbd2_journal_force_commit_nested(journal);\n+\t\t\tgoto retry;\n+\t\t}\n+\n+\t\text4_msg(sb, KERN_CRIT,\n+\t\t\t \"Delayed block allocation failed for inode %llu at logical offset %llu with max blocks %u with error %d\",\n+\t\t\t inode->i_ino, (unsigned long long)map.m_lblk,\n+\t\t\t (unsigned int)map.m_len, -ret);\n+\t\text4_msg(sb, KERN_CRIT,\n+\t\t\t \"This should not happen!! Data will be lost\\n\");\n+\t\tif (ret == -ENOSPC)\n+\t\t\text4_print_free_blocks(inode);\n+\t\treturn ret;\n+\t}\n+out:\n+\text4_set_iomap(inode, &wpc->iomap, &map, offset, dirty_len, 0);\n+\treturn 0;\n+}\n+\n+static void ext4_iomap_discard_folio(struct folio *folio, loff_t pos)\n+{\n+\tstruct inode *inode = folio->mapping->host;\n+\tloff_t length = folio_pos(folio) + folio_size(folio) - pos;\n+\n+\text4_iomap_punch_delalloc(inode, pos, length, NULL);\n+}\n+\n+static ssize_t ext4_iomap_writeback_range(struct iomap_writepage_ctx *wpc,\n+\t\t\t\t\t struct folio *folio, u64 offset,\n+\t\t\t\t\t unsigned int len, u64 end_pos)\n+{\n+\tssize_t ret;\n+\n+\tret = ext4_iomap_map_writeback_range(wpc, offset, len);\n+\tif (!ret)\n+\t\tret = iomap_add_to_ioend(wpc, folio, offset, end_pos, len);\n+\tif (ret < 0)\n+\t\text4_iomap_discard_folio(folio, offset);\n+\treturn ret;\n+}\n+\n+static int ext4_iomap_writeback_submit(struct iomap_writepage_ctx *wpc,\n+\t\t\t\t int error)\n+{\n+\tstruct iomap_ioend *ioend = wpc->wb_ctx;\n+\tstruct ext4_inode_info *ei = EXT4_I(ioend->io_inode);\n+\n+\t/* Need to convert unwritten extents when I/Os are completed. */\n+\tif ((ioend->io_flags & IOMAP_IOEND_UNWRITTEN) ||\n+\t ioend->io_offset + ioend->io_size > READ_ONCE(ei->i_disksize))\n+\t\tioend->io_bio.bi_end_io = ext4_iomap_end_bio;\n+\n+\treturn iomap_ioend_writeback_submit(wpc, error);\n+}\n+\n+static const struct iomap_writeback_ops ext4_writeback_ops = {\n+\t.writeback_range = ext4_iomap_writeback_range,\n+\t.writeback_submit = ext4_iomap_writeback_submit,\n+};\n+\n static int ext4_iomap_writepages(struct address_space *mapping,\n \t\t\t\t struct writeback_control *wbc)\n {\n-\treturn 0;\n+\tstruct inode *inode = mapping->host;\n+\tstruct super_block *sb = inode->i_sb;\n+\tlong nr = wbc->nr_to_write;\n+\tint alloc_ctx, ret;\n+\tstruct iomap_writepage_ctx wpc = {\n+\t\t.inode = inode,\n+\t\t.wbc = wbc,\n+\t\t.ops = &ext4_writeback_ops,\n+\t};\n+\n+\tret = ext4_emergency_state(sb);\n+\tif (unlikely(ret))\n+\t\treturn ret;\n+\n+\talloc_ctx = ext4_writepages_down_read(sb);\n+\ttrace_ext4_writepages(inode, wbc);\n+\tret = iomap_writepages(&wpc);\n+\ttrace_ext4_writepages_result(inode, wbc, ret, nr - wbc->nr_to_write);\n+\text4_writepages_up_read(sb, alloc_ctx);\n+\n+\treturn ret;\n }\n \n /*\ndiff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c\nindex dc82e7b57e75..07978e2cd9c8 100644\n--- a/fs/ext4/page-io.c\n+++ b/fs/ext4/page-io.c\n@@ -22,6 +22,7 @@\n #include <linux/bio.h>\n #include <linux/workqueue.h>\n #include <linux/kernel.h>\n+#include <linux/iomap.h>\n #include <linux/slab.h>\n #include <linux/mm.h>\n #include <linux/sched/mm.h>\n@@ -611,3 +612,131 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,\n \n \treturn 0;\n }\n+\n+static int ext4_iomap_wb_update_disksize(handle_t *handle, struct inode *inode,\n+\t\t\t\t\t loff_t end)\n+{\n+\tloff_t new_disksize = end;\n+\tstruct ext4_inode_info *ei = EXT4_I(inode);\n+\tint ret;\n+\n+\tif (new_disksize <= READ_ONCE(ei->i_disksize))\n+\t\treturn 0;\n+\n+\t/*\n+\t * Update on-disk size after IO is completed. Races with truncate\n+\t * are avoided by checking i_size under i_data_sem.\n+\t */\n+\tdown_write(&ei->i_data_sem);\n+\tnew_disksize = min(new_disksize, i_size_read(inode));\n+\tif (new_disksize > ei->i_disksize)\n+\t\tei->i_disksize = new_disksize;\n+\tup_write(&ei->i_data_sem);\n+\tret = ext4_mark_inode_dirty(handle, inode);\n+\tif (ret)\n+\t\tEXT4_ERROR_INODE_ERR(inode, -ret, \"Failed to mark inode dirty\");\n+\n+\treturn ret;\n+}\n+\n+static void ext4_iomap_finish_ioend(struct iomap_ioend *ioend)\n+{\n+\tstruct inode *inode = ioend->io_inode;\n+\tstruct super_block *sb = inode->i_sb;\n+\tloff_t pos = ioend->io_offset;\n+\tsize_t size = ioend->io_size;\n+\thandle_t *handle;\n+\tint credits;\n+\tint ret, err;\n+\n+\tret = blk_status_to_errno(ioend->io_bio.bi_status);\n+\tif (unlikely(ret)) {\n+\t\tif (test_opt(sb, DATA_ERR_ABORT))\n+\t\t\tjbd2_journal_abort(EXT4_SB(sb)->s_journal, ret);\n+\t\tgoto out;\n+\t}\n+\n+\t/* We may need to convert one extent and dirty the inode. */\n+\tcredits = ext4_chunk_trans_blocks(inode,\n+\t\t\tEXT4_MAX_BLOCKS(size, pos, inode->i_blkbits));\n+\thandle = ext4_journal_start(inode, EXT4_HT_EXT_CONVERT, credits);\n+\tif (IS_ERR(handle)) {\n+\t\tret = PTR_ERR(handle);\n+\t\tgoto out_err;\n+\t}\n+\n+\tif (ioend->io_flags & IOMAP_IOEND_UNWRITTEN) {\n+\t\tret = ext4_convert_unwritten_extents(handle, inode, pos, size);\n+\t\tif (ret)\n+\t\t\tgoto out_journal;\n+\t}\n+\n+\tret = ext4_iomap_wb_update_disksize(handle, inode, pos + size);\n+out_journal:\n+\terr = ext4_journal_stop(handle);\n+\tif (!ret)\n+\t\tret = err;\n+out_err:\n+\tif (ret < 0 && !ext4_emergency_state(sb)) {\n+\t\text4_msg(sb, KERN_EMERG,\n+\t\t\t \"failed to convert unwritten extents to written extents or update inode size -- potential data loss! (inode %llu, error %d)\",\n+\t\t\t inode->i_ino, ret);\n+\t}\n+out:\n+\tiomap_finish_ioends(ioend, ret);\n+}\n+\n+/*\n+ * Work on buffered iomap completed IO, to convert unwritten extents to\n+ * mapped extents\n+ */\n+void ext4_iomap_end_io(struct work_struct *work)\n+{\n+\tstruct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,\n+\t\t\t\t\t\t i_iomap_ioend_work);\n+\tstruct iomap_ioend *ioend;\n+\tstruct list_head ioend_list;\n+\tunsigned long flags;\n+\n+\tspin_lock_irqsave(&ei->i_completed_io_lock, flags);\n+\tlist_replace_init(&ei->i_iomap_ioend_list, &ioend_list);\n+\tspin_unlock_irqrestore(&ei->i_completed_io_lock, flags);\n+\n+\tiomap_sort_ioends(&ioend_list);\n+\twhile (!list_empty(&ioend_list)) {\n+\t\tioend = list_entry(ioend_list.next, struct iomap_ioend, io_list);\n+\t\tlist_del_init(&ioend->io_list);\n+\t\tiomap_ioend_try_merge(ioend, &ioend_list);\n+\t\text4_iomap_finish_ioend(ioend);\n+\t}\n+}\n+\n+void ext4_iomap_end_bio(struct bio *bio)\n+{\n+\tstruct iomap_ioend *ioend = iomap_ioend_from_bio(bio);\n+\tstruct inode *inode = ioend->io_inode;\n+\tstruct ext4_inode_info *ei = EXT4_I(inode);\n+\tstruct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);\n+\tunsigned long flags;\n+\tint ret;\n+\n+\t/* Needs to convert unwritten extents or update the i_disksize. */\n+\tif ((ioend->io_flags & IOMAP_IOEND_UNWRITTEN) ||\n+\t ioend->io_offset + ioend->io_size > READ_ONCE(ei->i_disksize))\n+\t\tgoto defer;\n+\n+\t/* Needs to abort the journal on data_err=abort. */\n+\tret = blk_status_to_errno(ioend->io_bio.bi_status);\n+\tif (unlikely(ret) && test_opt(inode->i_sb, DATA_ERR_ABORT) &&\n+\t !ext4_emergency_state(inode->i_sb))\n+\t\tgoto defer;\n+\n+\tiomap_finish_ioends(ioend, ret);\n+\treturn;\n+defer:\n+\tspin_lock_irqsave(&ei->i_completed_io_lock, flags);\n+\tif (list_empty(&ei->i_iomap_ioend_list))\n+\t\tqueue_work(sbi->rsv_conversion_wq, &ei->i_iomap_ioend_work);\n+\tlist_add_tail(&ioend->io_list, &ei->i_iomap_ioend_list);\n+\tspin_unlock_irqrestore(&ei->i_completed_io_lock, flags);\n+}\ndiff --git a/fs/ext4/super.c b/fs/ext4/super.c\nindex 9bc294b769db..51d87db53543 100644\n--- a/fs/ext4/super.c\n+++ b/fs/ext4/super.c\n@@ -123,7 +123,10 @@ static const struct fs_parameter_spec ext4_param_specs[];\n * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)\n *\n * writepages:\n- * transaction start -> page lock(s) -> i_data_sem (rw)\n+ * - buffer_head path:\n+ * transaction start -> folio lock(s) -> i_data_sem (rw)\n+ * - iomap path:\n+ * folio lock -> transaction start -> i_data_sem (rw)\n */\n \n static const struct fs_context_operations ext4_context_ops = {\n@@ -1428,10 +1431,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)\n #endif\n \tei->jinode = NULL;\n \tINIT_LIST_HEAD(&ei->i_rsv_conversion_list);\n+\tINIT_LIST_HEAD(&ei->i_iomap_ioend_list);\n \tspin_lock_init(&ei->i_completed_io_lock);\n \tei->i_sync_tid = 0;\n \tei->i_datasync_tid = 0;\n \tINIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);\n+\tINIT_WORK(&ei->i_iomap_ioend_work, ext4_iomap_end_io);\n \text4_fc_init_inode(&ei->vfs_inode);\n \tspin_lock_init(&ei->i_fc_lock);\n \tmmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);\n", "prefixes": [ "v3", "09/22" ] }