Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2223090/?format=api
{ "id": 2223090, "url": "http://patchwork.ozlabs.org/api/patches/2223090/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-ext4/patch/20260414122647.15686-1-changfengnan@bytedance.com/", "project": { "id": 8, "url": "http://patchwork.ozlabs.org/api/projects/8/?format=api", "name": "Linux ext4 filesystem development", "link_name": "linux-ext4", "list_id": "linux-ext4.vger.kernel.org", "list_email": "linux-ext4@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260414122647.15686-1-changfengnan@bytedance.com>", "list_archive_url": null, "date": "2026-04-14T12:26:47", "name": "[RFC] iomap: add fast read path for small direct I/O", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "6c28d9e00ac15962b04d9fb8ae55208e352572aa", "submitter": { "id": 80323, "url": "http://patchwork.ozlabs.org/api/people/80323/?format=api", "name": "Fengnan Chang", "email": "fengnanchang@gmail.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linux-ext4/patch/20260414122647.15686-1-changfengnan@bytedance.com/mbox/", "series": [ { "id": 499829, "url": "http://patchwork.ozlabs.org/api/series/499829/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linux-ext4/list/?series=499829", "date": "2026-04-14T12:26:47", "name": "[RFC] iomap: add fast read path for small direct I/O", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/499829/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2223090/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2223090/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "\n <SRS0=Zu+I=CN=vger.kernel.org=linux-ext4+bounces-15831-patchwork-incoming=ozlabs.org@ozlabs.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "linux-ext4@vger.kernel.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "patchwork-incoming@ozlabs.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=EfN8c0ye;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=ozlabs.org\n (client-ip=2404:9400:2221:ea00::3; helo=mail.ozlabs.org;\n envelope-from=srs0=zu+i=cn=vger.kernel.org=linux-ext4+bounces-15831-patchwork-incoming=ozlabs.org@ozlabs.org;\n receiver=patchwork.ozlabs.org)", "gandalf.ozlabs.org;\n arc=pass smtp.remote-ip=\"2600:3c0a:e001:db::12fc:5321\"\n arc.chain=subspace.kernel.org", "gandalf.ozlabs.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com", "gandalf.ozlabs.org;\n\tdkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=EfN8c0ye;\n\tdkim-atps=neutral", "gandalf.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=vger.kernel.org\n (client-ip=2600:3c0a:e001:db::12fc:5321; helo=sea.lore.kernel.org;\n envelope-from=linux-ext4+bounces-15831-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org)", "smtp.subspace.kernel.org;\n\tdkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com\n header.b=\"EfN8c0ye\"", "smtp.subspace.kernel.org;\n arc=none smtp.client-ip=209.85.214.182", "smtp.subspace.kernel.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com", "smtp.subspace.kernel.org;\n spf=pass smtp.mailfrom=gmail.com" ], "Received": [ "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1 raw public key)\n server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fw3Sb6psCz1xtJ\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 14 Apr 2026 22:27:27 +1000 (AEST)", "from mail.ozlabs.org (mail.ozlabs.org [IPv6:2404:9400:2221:ea00::3])\n\tby gandalf.ozlabs.org (Postfix) with ESMTP id 4fw3SV1Q2Lz4wJD\n\tfor <incoming@patchwork.ozlabs.org>; Tue, 14 Apr 2026 22:27:22 +1000 (AEST)", "by gandalf.ozlabs.org (Postfix)\n\tid 4fw3SV1G3Kz4wJJ; Tue, 14 Apr 2026 22:27:22 +1000 (AEST)", "from sea.lore.kernel.org (sea.lore.kernel.org\n [IPv6:2600:3c0a:e001:db::12fc:5321])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby gandalf.ozlabs.org (Postfix) with ESMTPS id 4fw3SQ418Mz4wJD\n\tfor <patchwork-incoming@ozlabs.org>; Tue, 14 Apr 2026 22:27:18 +1000 (AEST)", "from smtp.subspace.kernel.org (conduit.subspace.kernel.org\n [100.90.174.1])\n\tby sea.lore.kernel.org (Postfix) with ESMTP id 6E0AE301F9F0\n\tfor <patchwork-incoming@ozlabs.org>; Tue, 14 Apr 2026 12:27:16 +0000 (UTC)", "from localhost.localdomain (localhost.localdomain [127.0.0.1])\n\tby smtp.subspace.kernel.org (Postfix) with ESMTP id 068913A4502;\n\tTue, 14 Apr 2026 12:27:15 +0000 (UTC)", "from mail-pl1-f182.google.com (mail-pl1-f182.google.com\n [209.85.214.182])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))\n\t(No client certificate requested)\n\tby smtp.subspace.kernel.org (Postfix) with ESMTPS id 5903F3043CF\n\tfor <linux-ext4@vger.kernel.org>; Tue, 14 Apr 2026 12:27:13 +0000 (UTC)", "by mail-pl1-f182.google.com with SMTP id\n d9443c01a7336-2a8fba3f769so24626245ad.2\n for <linux-ext4@vger.kernel.org>;\n Tue, 14 Apr 2026 05:27:13 -0700 (PDT)", "from 192.168.5.4 ([2409:8a28:f32:86e4:9161:fb54:c7b3:5567])\n by smtp.googlemail.com with ESMTPSA id\n d9443c01a7336-2b468273ccfsm62273745ad.43.2026.04.14.05.27.07\n (version=TLS1_3 cipher=TLS_CHACHA20_POLY1305_SHA256 bits=256/256);\n Tue, 14 Apr 2026 05:27:11 -0700 (PDT)" ], "ARC-Seal": [ "i=2; a=rsa-sha256; d=ozlabs.org; s=201707; t=1776169642; cv=pass;\n\tb=YXmqv6KqW9uzZu7YDgY1Zii2S8PqLxQYhRooa2zHlG+EEh5k33ZsqcfXA4xelWDhi3rT+0sdKOGSyUO19uHN7SbHzngCRBTYG4/Cv431Zxw8Is87njYq+yMymwt9RpJ5o0WB3YLZSmFpldh4ibvB/Kvqd6lJChA8ljg1egE0halZMBlaI0kaBidhkfNocjGUfQabsbZEqw52ekq0vhKq6k86L/xG1t5EcveefNvbNefsvnOvTjMnOTSQDwE5Uyym0QqI0DCmgLC3LaurVDJPtapERZLK7d6o4Sy9/IY7BZaQPsoyjLVm0iZScczO1+sdUKXZY58DMxFopf3uudC0HQ==", "i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;\n\tt=1776169634; cv=none;\n b=LaT1C1uK7cW0uysz5B2SIJxhd16inxm8FrnQ6HQY71rHjrcgJLe3vZUXNkIf+ZhXudW5q1b7najDWe7YFTHRIVL2v/eLqGeY6jfwqkf0rfvHmWslX40SlHwPfWyYqoDZWMoEYRO3gOcWHeHwN5ZB9aW1VbQivMCjlYS0ZdnKJww=" ], "ARC-Message-Signature": [ "i=2; a=rsa-sha256; d=ozlabs.org; s=201707;\n\tt=1776169642; c=relaxed/relaxed;\n\tbh=bEDuvXTH5Zn2XT2X16UjfO2ntfyWS9qDU8ZPsqL80os=;\n\th=From:To:Cc:Subject:Date:Message-Id:MIME-Version;\n b=Gel+k1kuz0h4EHsjfM62WLL9vChtsyNwyS6J+LVRp5nWcZf4HvCcjXwX3wxgCHWTFcKKZUMxJuBU+YiWyO4fKTHGGQO7prLEP4SWKEkv39kZxA8vUUc9zOU9RSvYpOBikjybFsBSKEXK0b+7D+f0KQbvHdX4b+VvwCXpzqWKP70UlKy4uYGiyUMkqPXO/8rO0VFZNb1kFoLRDoEHuxE7PGFv9FvKETarNztWNTHZLfo1UQ3id7SY9gCFrf5iFwNiYSI2FOCsTSLyGi2NncvIO4Zmchje/tLna+wYBT+nSDUb+Yyk861FuvDj5BvZCn53JyFjTdQw8Pr8/BAgJKP5+w==", "i=1; a=rsa-sha256; d=subspace.kernel.org;\n\ts=arc-20240116; t=1776169634; c=relaxed/simple;\n\tbh=lInK4xg5OPiqmqMrMqo3ff+cEQrecsEeB4QLUNhSHm4=;\n\th=From:To:Cc:Subject:Date:Message-Id:MIME-Version;\n b=JqMo63tbtuPulukGjSJPvPgnoauolKA3ReLVhDqYimBjZa/8KZxPWdSs1ioRRJDHxU3OUZYQuOSW2MTVQ7GStnjqs3icMvpnAyC8NAbCb/MepCaXzxOckS/W8Qz4zeJQyeXjGo7wP2x71NuQjPvhm3mciEYaexLXAiQbzgoILrQ=" ], "ARC-Authentication-Results": [ "i=2; gandalf.ozlabs.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com; dkim=pass (2048-bit key;\n unprotected) header.d=gmail.com header.i=@gmail.com header.a=rsa-sha256\n header.s=20251104 header.b=EfN8c0ye; dkim-atps=neutral;\n spf=pass (client-ip=2600:3c0a:e001:db::12fc:5321; helo=sea.lore.kernel.org;\n envelope-from=linux-ext4+bounces-15831-patchwork-incoming=ozlabs.org@vger.kernel.org;\n receiver=ozlabs.org) smtp.mailfrom=vger.kernel.org", "i=1; smtp.subspace.kernel.org;\n dmarc=pass (p=none dis=none) header.from=gmail.com;\n spf=pass smtp.mailfrom=gmail.com;\n dkim=pass (2048-bit key) header.d=gmail.com header.i=@gmail.com\n header.b=EfN8c0ye; arc=none smtp.client-ip=209.85.214.182" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=gmail.com; s=20251104; t=1776169633; x=1776774433;\n darn=vger.kernel.org;\n h=content-transfer-encoding:mime-version:message-id:date:subject:cc\n :to:from:from:to:cc:subject:date:message-id:reply-to;\n bh=bEDuvXTH5Zn2XT2X16UjfO2ntfyWS9qDU8ZPsqL80os=;\n b=EfN8c0yet70Tc5H7N3Bs8BIwx1TZ5Sq4ByBNOO39p+trvs3vJhc72sRmk5eAes8b9G\n /QjhQfs8oGr3032yYM1eAVQj/p8BuuvSHGb7dt4OWRJveyMUnN+sJ6OwXCwto6O+RK6M\n 52BuqJ7eH7U0f7hiHq8dI3TUcUYywowO559Lyfnvgqqxdj1E73dwG5yJ+DAuiWjHU+OU\n Nvmzo8K5qFRDoQYDPCEsd5QDkTsdExHHogCCvR52M6flw5saMmfPrYWoDLdl432o1wtH\n y/ipTCfZukeu27M8PgSIkeuNKuUP76V9JUYpj5AkP7Y6v3Q4xy60lHbnqQ9SVm/RbfT7\n FyrA==", "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20251104; t=1776169633; x=1776774433;\n h=content-transfer-encoding:mime-version:message-id:date:subject:cc\n :to:from:x-gm-gg:x-gm-message-state:from:to:cc:subject:date\n :message-id:reply-to;\n bh=bEDuvXTH5Zn2XT2X16UjfO2ntfyWS9qDU8ZPsqL80os=;\n b=XyrSg3JsJoKoHHwkjiXeYIpYlljcaBIp/AwihtU/CU3NksjmjbWj3KYU0OKufRA55q\n N9/u47JHAvvl20IOYuTjD07V+gLAM5BpxgWazk6HuY9LQzS3CMhyt+eILD+Xsa30u+oi\n dFICMLyvFP7S1iYyGwmW7Wzd+ptog5RulftEohWPigtemssw9jxlBlz0mdwEMfrHe4Us\n gItiFqcnQcxeHtXIoZapZ+dFlTNb4+4Qvr85gUcUASmae197yzkj2YdXAaa6RJVQTrYC\n E152jLdtWRkMJFlMya+OlYp++6VstSg9NW+REiSV/+ugAJMX9hJqDLBshmuGqKUTPYJd\n lUCg==", "X-Forwarded-Encrypted": "i=1;\n AFNElJ+YrWI8F1kNm3OtfnW4ozxNY9NzWYsCobKFdI9xkt9LFleiyVbv86HlA1Mrl1/lcgXaBbwd1ZMN0Au+@vger.kernel.org", "X-Gm-Message-State": "AOJu0YzmIvtdMebl3ETyEwGcJLCmnEIAVGdEDc+Shcrw7uUc0Gc0efqV\n\tv8OabLoHbZO+hgN8vFdEL8ZiRvjSONR7Utq7jaTIR+J8o4RtlNSm2PeW", "X-Gm-Gg": "AeBDieutYtq8cjbNhLgRTz0L/iRCEQ6qVQC/G2PKtmuKhk0UDOO+8XfhDaBo+SAmNOx\n\thGIKVyLCvErh/5vbEUoXuF+G0cFPLsEMchMlzzdQJd7UHR4YRFcYJJJifgOKNUl6obghAk6ehK5\n\tUdJQA4fMzZ65lVOa7rQqOOT/DtuZhmjfbDOXgAWyz8sPGgV+iAP5kgsHtEBB4AqCvrx6AveKMO1\n\tvlUdkWgbv6C/imFPLEHNb5doz95QDzEQv6VOjE9u7NNs4dMs6JG3QJH9fMePtheq7WQt4fyJOZB\n\tin9PRV9M6WM6SRwYm4xUvkQMOpI4bf/u2iASLlrF84sztGr2LuNWwvk26iDATGO7mLf0AL9HV4T\n\tYToN9rE0WqTdofQD7jvlGMYdl0sPuxOkf4oTmgHL67Pj71tIl11ofx2r4JO2ox6KByvYktaHa9Y\n\tT19K1M7ZLr5hlueOuC+xy2NgTNW5szcdD3aGyH2Vn2NDynkP27itPNShJVeBRoqdnqUkA=", "X-Received": "by 2002:a17:903:1250:b0:2ae:54b2:27c7 with SMTP id\n d9443c01a7336-2b2d5a88d3dmr176156215ad.39.1776169632604;\n Tue, 14 Apr 2026 05:27:12 -0700 (PDT)", "From": "Fengnan Chang <fengnanchang@gmail.com>", "X-Google-Original-From": "Fengnan Chang <changfengnan@bytedance.com>", "To": "brauner@kernel.org,\n\tdjwong@kernel.org,\n\tlinux-xfs@vger.kernel.org,\n\tlinux-fsdevel@vger.kernel.org,\n\tlinux-ext4@vger.kernel.org", "Cc": "lidiangang@bytedance.com,\n\tFengnan Chang <changfengnan@bytedance.com>", "Subject": "[RFC PATCH] iomap: add fast read path for small direct I/O", "Date": "Tue, 14 Apr 2026 20:26:47 +0800", "Message-Id": "<20260414122647.15686-1-changfengnan@bytedance.com>", "X-Mailer": "git-send-email 2.39.5 (Apple Git-154)", "Precedence": "bulk", "X-Mailing-List": "linux-ext4@vger.kernel.org", "List-Id": "<linux-ext4.vger.kernel.org>", "List-Subscribe": "<mailto:linux-ext4+subscribe@vger.kernel.org>", "List-Unsubscribe": "<mailto:linux-ext4+unsubscribe@vger.kernel.org>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-Spam-Status": "No, score=-1.2 required=5.0 tests=ARC_SIGNED,ARC_VALID,\n\tDKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DMARC_PASS,\n\tFREEMAIL_FORGED_FROMDOMAIN,FREEMAIL_FROM,HEADER_FROM_DIFFERENT_DOMAINS,\n\tMAILING_LIST_MULTI,SPF_HELO_NONE,SPF_PASS autolearn=disabled\n\tversion=4.0.1", "X-Spam-Checker-Version": "SpamAssassin 4.0.1 (2024-03-25) on gandalf.ozlabs.org" }, "content": "When running 4K random read workloads on high-performance Gen5 NVMe\nSSDs, the software overhead in the iomap direct I/O path\n(__iomap_dio_rw) becomes a significant bottleneck.\n\nUsing io_uring with poll mode for a 4K randread test on a raw block\ndevice:\ntaskset -c 30 ./t/io_uring -p1 -d512 -b4096 -s32 -c32 -F1 -B1 -R1 -X1\n-n1 -P1 /dev/nvme10n1\nResult: ~3.2M IOPS\n\nRunning the exact same workload on ext4 and XFS:\ntaskset -c 30 ./t/io_uring -p1 -d512 -b4096 -s32 -c32 -F1 -B1 -R1 -X1\n-n1 -P1 /mnt/testfile\nResult: ~1.9M IOPS\n\nProfiling the ext4 workload reveals that a significant portion of CPU\ntime is spent on memory allocation and the iomap state machine\niteration:\n 5.33% [kernel] [k] __iomap_dio_rw\n 3.26% [kernel] [k] iomap_iter\n 2.37% [kernel] [k] iomap_dio_bio_iter\n 2.35% [kernel] [k] kfree\n 1.33% [kernel] [k] iomap_dio_complete\n\nI attempted several incremental optimizations in the __iomap_dio_rw()\npath to close the gap:\n1. Allocating the `bio` and `struct iomap_dio` together to avoid a\n separate kmalloc. However, because `struct iomap_dio` is relatively\n large and the main path is complex, this yielded almost no\n performance improvement.\n2. Reducing unnecessary state resets in the iomap state machine (e.g.,\n skipping `iomap_iter_reset_iomap` where safe). This provided a ~5%\n IOPS boost, which is helpful but still falls far short of closing\n the gap with the raw block device.\n\nSince optimizing the heavy generic path did not yield the desired\nresults for this specific, highly-demanding Gen5 SSD scenario, this\nRFC patch introduces a dedicated asynchronous fast path.\n\nThe fast path is triggered when the request satisfies:\n- Asynchronous READ request only for now.\n- I/O size is <= inode blocksize (fits in a single block, no splits).\n- Aligned to the block device's logical block size.\n- No bounce buffering, fscrypt, or fsverity involved.\n- No custom `iomap_dio_ops` (dops) registered by the filesystem.\n\nBy using a dedicated bio_set (`iomap_dio_fast_read_pool`) to embed a\nmuch smaller completion state (`struct iomap_dio_fast_read`) directly\nin the bio's front padding, we completely eliminate kmalloc/kfree and\ndrastically shorten the execution path.\n\nAfter this optimization, the heavy generic functions disappear from the\nprofile, replaced by a single streamlined execution path:\n 4.83% [kernel] [k] iomap_dio_fast_read_async.isra.31\n\nWith this patch, 4K random read IOPS on ext4 increases from 1.9M to\n2.3M.\n\nI am aware that adding a completely separate fast path introduces\nduplicate code and may result in iomap_begin being called twice, this\nlikely unacceptable for merging in its current form.\n\nHowever, I am submitting this patch to validate whether this\noptimization direction is correct and worth pursuing. I would appreciate\nfeedback on how to better integrate these ideas into the main iomap\nexecution path.\n\nSigned-off-by: Fengnan Chang <changfengnan@bytedance.com>\n---\n fs/iomap/direct-io.c | 275 +++++++++++++++++++++++++++++++++++++++++++\n 1 file changed, 275 insertions(+)", "diff": "diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c\nindex e911daedff65a..e4183f7c2f962 100644\n--- a/fs/iomap/direct-io.c\n+++ b/fs/iomap/direct-io.c\n@@ -5,10 +5,14 @@\n */\n #include <linux/blk-crypto.h>\n #include <linux/fscrypt.h>\n+#include <linux/fsverity.h>\n #include <linux/pagemap.h>\n #include <linux/iomap.h>\n #include <linux/task_io_accounting_ops.h>\n #include <linux/fserror.h>\n+#include <linux/kobject.h>\n+#include <linux/sysfs.h>\n+#include <linux/init.h>\n #include \"internal.h\"\n #include \"trace.h\"\n \n@@ -880,12 +884,231 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,\n }\n EXPORT_SYMBOL_GPL(__iomap_dio_rw);\n \n+static bool iomap_dio_fast_read_enabled = true;\n+\n+struct iomap_dio_fast_read {\n+\tstruct kiocb\t*iocb;\n+\tsize_t\t\tsize;\n+\tbool\t\tshould_dirty;\n+\tstruct work_struct\twork;\n+\tstruct bio\tbio ____cacheline_aligned_in_smp;\n+};\n+\n+static struct bio_set iomap_dio_fast_read_pool;\n+\n+static void iomap_dio_fast_read_complete_work(struct work_struct *work)\n+{\n+\tstruct iomap_dio_fast_read *fr =\n+\t\tcontainer_of(work, struct iomap_dio_fast_read, work);\n+\tstruct kiocb *iocb = fr->iocb;\n+\tstruct inode *inode = file_inode(iocb->ki_filp);\n+\tbool should_dirty = fr->should_dirty;\n+\tstruct bio *bio = &fr->bio;\n+\tssize_t ret;\n+\n+\tWRITE_ONCE(iocb->private, NULL);\n+\n+\tif (likely(!bio->bi_status)) {\n+\t\tret = fr->size;\n+\t\tiocb->ki_pos += ret;\n+\t} else {\n+\t\tret = blk_status_to_errno(bio->bi_status);\n+\t\tfserror_report_io(inode, FSERR_DIRECTIO_READ, iocb->ki_pos,\n+\t\t\t\t fr->size, ret, GFP_NOFS);\n+\t}\n+\n+\tif (should_dirty) {\n+\t\tbio_check_pages_dirty(bio);\n+\t} else {\n+\t\tbio_release_pages(bio, false);\n+\t\tbio_put(bio);\n+\t}\n+\n+\tinode_dio_end(inode);\n+\n+\ttrace_iomap_dio_complete(iocb, ret < 0 ? ret : 0, ret > 0 ? ret : 0);\n+\tiocb->ki_complete(iocb, ret);\n+}\n+\n+static void iomap_dio_fast_read_end_io(struct bio *bio)\n+{\n+\tstruct iomap_dio_fast_read *fr = bio->bi_private;\n+\tstruct kiocb *iocb = fr->iocb;\n+\n+\tif (unlikely(bio->bi_status)) {\n+\t\tstruct inode *inode = file_inode(iocb->ki_filp);\n+\n+\t\tINIT_WORK(&fr->work, iomap_dio_fast_read_complete_work);\n+\t\tqueue_work(inode->i_sb->s_dio_done_wq, &fr->work);\n+\t\treturn;\n+\t}\n+\n+\tiomap_dio_fast_read_complete_work(&fr->work);\n+}\n+\n+static inline bool iomap_dio_fast_read_supported(struct kiocb *iocb,\n+\t\t\t\t\t struct iov_iter *iter,\n+\t\t\t\t\t unsigned int dio_flags,\n+\t\t\t\t\t size_t done_before)\n+{\n+\tstruct inode *inode = file_inode(iocb->ki_filp);\n+\tsize_t count = iov_iter_count(iter);\n+\tunsigned int alignment;\n+\n+\tif (!iomap_dio_fast_read_enabled)\n+\t\treturn false;\n+\tif (iov_iter_rw(iter) != READ)\n+\t\treturn false;\n+\n+\t/*\n+\t * Fast read is an optimization for small IO. Filter out large IO early\n+\t * as it's the most common case to fail for typical direct IO workloads.\n+\t */\n+\tif (count > inode->i_sb->s_blocksize)\n+\t\treturn false;\n+\n+\tif (is_sync_kiocb(iocb) || done_before)\n+\t\treturn false;\n+\tif (dio_flags & (IOMAP_DIO_FORCE_WAIT | IOMAP_DIO_BOUNCE))\n+\t\treturn false;\n+\tif (iocb->ki_pos + count > i_size_read(inode))\n+\t\treturn false;\n+\tif (IS_ENCRYPTED(inode) || fsverity_active(inode))\n+\t\treturn false;\n+\n+\tif (count < bdev_logical_block_size(inode->i_sb->s_bdev))\n+\t\treturn false;\n+\n+\tif (dio_flags & IOMAP_DIO_FSBLOCK_ALIGNED)\n+\t\talignment = i_blocksize(inode);\n+\telse\n+\t\talignment = bdev_logical_block_size(inode->i_sb->s_bdev);\n+\n+\tif ((iocb->ki_pos | count) & (alignment - 1))\n+\t\treturn false;\n+\n+\treturn true;\n+}\n+\n+static ssize_t iomap_dio_fast_read_async(struct kiocb *iocb,\n+\t\t\t\t\t struct iov_iter *iter,\n+\t\t\t\t\t const struct iomap_ops *ops,\n+\t\t\t\t\t void *private)\n+{\n+\tstruct inode *inode = file_inode(iocb->ki_filp);\n+\tsize_t count = iov_iter_count(iter);\n+\tint nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);\n+\tbool should_dirty = user_backed_iter(iter);\n+\tstruct iomap_dio_fast_read *fr;\n+\tstruct iomap_iter iomi = {\n+\t\t.inode\t\t= inode,\n+\t\t.pos\t\t= iocb->ki_pos,\n+\t\t.len\t\t= count,\n+\t\t.flags\t\t= IOMAP_DIRECT,\n+\t\t.private\t= private,\n+\t};\n+\tstruct bio *bio;\n+\tssize_t ret;\n+\n+\tif (iocb->ki_flags & IOCB_NOWAIT)\n+\t\tiomi.flags |= IOMAP_NOWAIT;\n+\n+\tret = kiocb_write_and_wait(iocb, count);\n+\tif (ret)\n+\t\treturn ret;\n+\n+\tinode_dio_begin(inode);\n+\n+\tret = ops->iomap_begin(inode, iomi.pos, count, iomi.flags,\n+\t\t\t &iomi.iomap, &iomi.srcmap);\n+\tif (ret) {\n+\t\tinode_dio_end(inode);\n+\t\treturn ret;\n+\t}\n+\n+\tif (iomi.iomap.type != IOMAP_MAPPED ||\n+\t iomi.iomap.offset > iomi.pos ||\n+\t iomi.iomap.offset + iomi.iomap.length < iomi.pos + count ||\n+\t (iomi.iomap.flags & IOMAP_F_ANON_WRITE)) {\n+\t\tret = -EAGAIN;\n+\t\tgoto out_iomap_end;\n+\t}\n+\n+\tif (!inode->i_sb->s_dio_done_wq) {\n+\t\tret = sb_init_dio_done_wq(inode->i_sb);\n+\t\tif (ret < 0)\n+\t\t\tgoto out_iomap_end;\n+\t}\n+\n+\ttrace_iomap_dio_rw_begin(iocb, iter, 0, 0);\n+\n+\tbio = bio_alloc_bioset(iomi.iomap.bdev, nr_pages,\n+\t\t\t REQ_OP_READ | REQ_SYNC | REQ_IDLE,\n+\t\t\t GFP_KERNEL, &iomap_dio_fast_read_pool);\n+\tfr = container_of(bio, struct iomap_dio_fast_read, bio);\n+\tfr->iocb = iocb;\n+\tfr->should_dirty = should_dirty;\n+\n+\tbio->bi_iter.bi_sector = iomap_sector(&iomi.iomap, iomi.pos);\n+\tbio->bi_ioprio = iocb->ki_ioprio;\n+\tbio->bi_private = fr;\n+\tbio->bi_end_io = iomap_dio_fast_read_end_io;\n+\n+\tret = bio_iov_iter_get_pages(bio, iter,\n+\t\t\t\t bdev_logical_block_size(iomi.iomap.bdev) - 1);\n+\tif (unlikely(ret)) {\n+\t\tbio_put(bio);\n+\t\tgoto out_iomap_end;\n+\t}\n+\n+\tif (bio->bi_iter.bi_size != count) {\n+\t\tiov_iter_revert(iter, bio->bi_iter.bi_size);\n+\t\tbio_release_pages(bio, false);\n+\t\tbio_put(bio);\n+\t\tret = -EAGAIN;\n+\t\tgoto out_iomap_end;\n+\t}\n+\n+\tfr->size = bio->bi_iter.bi_size;\n+\n+\tif (should_dirty)\n+\t\tbio_set_pages_dirty(bio);\n+\n+\tif (iocb->ki_flags & IOCB_NOWAIT)\n+\t\tbio->bi_opf |= REQ_NOWAIT;\n+\tif (iocb->ki_flags & IOCB_HIPRI) {\n+\t\tbio->bi_opf |= REQ_POLLED;\n+\t\tbio_set_polled(bio, iocb);\n+\t\tWRITE_ONCE(iocb->private, bio);\n+\t}\n+\tsubmit_bio(bio);\n+\n+\tif (ops->iomap_end)\n+\t\tops->iomap_end(inode, iomi.pos, count, count, iomi.flags,\n+\t\t\t &iomi.iomap);\n+\treturn -EIOCBQUEUED;\n+\n+out_iomap_end:\n+\tif (ops->iomap_end)\n+\t\tops->iomap_end(inode, iomi.pos, count, 0, iomi.flags,\n+\t\t\t &iomi.iomap);\n+\tinode_dio_end(inode);\n+\treturn ret;\n+}\n+\n ssize_t\n iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,\n \t\tconst struct iomap_ops *ops, const struct iomap_dio_ops *dops,\n \t\tunsigned int dio_flags, void *private, size_t done_before)\n {\n \tstruct iomap_dio *dio;\n+\tssize_t ret;\n+\n+\tif (!dops && iomap_dio_fast_read_supported(iocb, iter, dio_flags, done_before)) {\n+\t\tret = iomap_dio_fast_read_async(iocb, iter, ops, private);\n+\t\tif (ret != -EAGAIN)\n+\t\t\treturn ret;\n+\t}\n \n \tdio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, private,\n \t\t\t done_before);\n@@ -894,3 +1117,55 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,\n \treturn iomap_dio_complete(dio);\n }\n EXPORT_SYMBOL_GPL(iomap_dio_rw);\n+\n+static ssize_t fast_read_enable_show(struct kobject *kobj,\n+\t\t\t\t struct kobj_attribute *attr, char *buf)\n+{\n+\treturn sysfs_emit(buf, \"%d\\n\", iomap_dio_fast_read_enabled);\n+}\n+\n+static ssize_t fast_read_enable_store(struct kobject *kobj,\n+\t\t\t\t struct kobj_attribute *attr,\n+\t\t\t\t const char *buf, size_t count)\n+{\n+\tbool enable;\n+\tint ret;\n+\n+\tret = kstrtobool(buf, &enable);\n+\tif (ret)\n+\t\treturn ret;\n+\n+\tiomap_dio_fast_read_enabled = enable;\n+\treturn count;\n+}\n+\n+static struct kobj_attribute fast_read_enable_attr =\n+\t__ATTR(fast_read_enable, 0644, fast_read_enable_show, fast_read_enable_store);\n+\n+static struct kobject *iomap_kobj;\n+\n+static int __init iomap_dio_sysfs_init(void)\n+{\n+\tint ret;\n+\n+\tret = bioset_init(&iomap_dio_fast_read_pool, 4,\n+\t\t\t offsetof(struct iomap_dio_fast_read, bio),\n+\t\t\t BIOSET_NEED_BVECS | BIOSET_PERCPU_CACHE);\n+\tif (ret)\n+\t\treturn ret;\n+\n+\tiomap_kobj = kobject_create_and_add(\"iomap\", fs_kobj);\n+\tif (!iomap_kobj) {\n+\t\tbioset_exit(&iomap_dio_fast_read_pool);\n+\t\treturn -ENOMEM;\n+\t}\n+\n+\tif (sysfs_create_file(iomap_kobj, &fast_read_enable_attr.attr)) {\n+\t\tkobject_put(iomap_kobj);\n+\t\tbioset_exit(&iomap_dio_fast_read_pool);\n+\t\treturn -ENOMEM;\n+\t}\n+\n+\treturn 0;\n+}\n+fs_initcall(iomap_dio_sysfs_init);\n", "prefixes": [ "RFC" ] }