get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/2216650/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 2216650,
    "url": "http://patchwork.ozlabs.org/api/patches/2216650/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20260327021403.214713-18-usama.arif@linux.dev/",
    "project": {
        "id": 2,
        "url": "http://patchwork.ozlabs.org/api/projects/2/?format=api",
        "name": "Linux PPC development",
        "link_name": "linuxppc-dev",
        "list_id": "linuxppc-dev.lists.ozlabs.org",
        "list_email": "linuxppc-dev@lists.ozlabs.org",
        "web_url": "https://github.com/linuxppc/wiki/wiki",
        "scm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git",
        "webscm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/",
        "list_archive_url": "https://lore.kernel.org/linuxppc-dev/",
        "list_archive_url_format": "https://lore.kernel.org/linuxppc-dev/{}/",
        "commit_url_format": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id={}"
    },
    "msgid": "<20260327021403.214713-18-usama.arif@linux.dev>",
    "list_archive_url": "https://lore.kernel.org/linuxppc-dev/20260327021403.214713-18-usama.arif@linux.dev/",
    "date": "2026-03-27T02:08:59",
    "name": "[v3,17/24] mm: thp: allocate PTE page tables lazily at split time",
    "commit_ref": null,
    "pull_url": null,
    "state": "handled-elsewhere",
    "archived": false,
    "hash": "90e33aab5e2f4f7ed8548db90e3334cb6d7e2df4",
    "submitter": {
        "id": 92719,
        "url": "http://patchwork.ozlabs.org/api/people/92719/?format=api",
        "name": "Usama Arif",
        "email": "usama.arif@linux.dev"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20260327021403.214713-18-usama.arif@linux.dev/mbox/",
    "series": [
        {
            "id": 497678,
            "url": "http://patchwork.ozlabs.org/api/series/497678/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=497678",
            "date": "2026-03-27T02:08:43",
            "name": "[v3,01/24] mm: thp: make split_huge_pmd functions return int for error propagation",
            "version": 3,
            "mbox": "http://patchwork.ozlabs.org/series/497678/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/2216650/comments/",
    "check": "pending",
    "checks": "http://patchwork.ozlabs.org/api/patches/2216650/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "\n <linuxppc-dev+bounces-18860-incoming=patchwork.ozlabs.org@lists.ozlabs.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "linuxppc-dev@lists.ozlabs.org"
        ],
        "Delivered-To": "patchwork-incoming@legolas.ozlabs.org",
        "Authentication-Results": [
            "legolas.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=linux.dev header.i=@linux.dev header.a=rsa-sha256\n header.s=key1 header.b=w30DGB4U;\n\tdkim-atps=neutral",
            "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=lists.ozlabs.org\n (client-ip=2404:9400:21b9:f100::1; helo=lists.ozlabs.org;\n envelope-from=linuxppc-dev+bounces-18860-incoming=patchwork.ozlabs.org@lists.ozlabs.org;\n receiver=patchwork.ozlabs.org)",
            "lists.ozlabs.org;\n arc=none smtp.remote-ip=95.215.58.173",
            "lists.ozlabs.org;\n dmarc=pass (p=none dis=none) header.from=linux.dev",
            "lists.ozlabs.org;\n\tdkim=pass (1024-bit key;\n unprotected) header.d=linux.dev header.i=@linux.dev header.a=rsa-sha256\n header.s=key1 header.b=w30DGB4U;\n\tdkim-atps=neutral",
            "lists.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=linux.dev\n (client-ip=95.215.58.173; helo=out-173.mta1.migadu.com;\n envelope-from=usama.arif@linux.dev; receiver=lists.ozlabs.org)"
        ],
        "Received": [
            "from lists.ozlabs.org (lists.ozlabs.org\n [IPv6:2404:9400:21b9:f100::1])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4fhkm83X6Qz1y1j\n\tfor <incoming@patchwork.ozlabs.org>; Fri, 27 Mar 2026 13:16:40 +1100 (AEDT)",
            "from boromir.ozlabs.org (localhost [127.0.0.1])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 4fhklf1Bz7z30g6;\n\tFri, 27 Mar 2026 13:16:14 +1100 (AEDT)",
            "from out-173.mta1.migadu.com (out-173.mta1.migadu.com\n [95.215.58.173])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 4fhklc5PNPz2xmX\n\tfor <linuxppc-dev@lists.ozlabs.org>; Fri, 27 Mar 2026 13:16:12 +1100 (AEDT)"
        ],
        "ARC-Seal": "i=1; a=rsa-sha256; d=lists.ozlabs.org; s=201707; t=1774577774;\n\tcv=none;\n b=fkOgAiMnGt47Ge8282YaLEMk/dCGV/qtJpWdxMeOc9IGnNSfTeeHnm7hmwaiwlQKOXNAMnTXbPa7VW/Exg3up4zSR3fQOZzmI1MFH61C/EBPRWpoyMY3uyw+LDnG9s9mXFOvAw2wegXioY/2tPHdPF3ZctdZ1x5QzjDbRRr7ROMKYe1wdtudzPfVI/NPNLUpcxZR0c1T+2WqFxrKyENrn86gxJxAI/f75jPQac96SjDIbhIqvxXGD4r5yaFbwlTRDCwZ4jn+pkbWd4k/eDoxRzZyfkRzbAuHygJj5ic0XKvQMAVZeQPAQE7EYWlvo131lOcgckU5oEXRG0W7CajFFw==",
        "ARC-Message-Signature": "i=1; a=rsa-sha256; d=lists.ozlabs.org; s=201707;\n\tt=1774577774; c=relaxed/relaxed;\n\tbh=39bZVeDLCZMGYPUr+EAc/EnS1HXbPGaYLlzIdJWQkDw=;\n\th=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:\n\t MIME-Version:Content-Type;\n b=WmfSnfFCmpy2nwCz4fCKBBtJxEEnfqXuzzdlPBsTBoOhTQRRO+0YumrF8+NIEG5P/M9cgOnpAD09PJveMqPFRtzZcwJX3yWLJ94hg38h5L260wpfhQBE0feKGcpKe2+/dencDr3/ZugCZvfnZIFKwh5dW3q/sNZ42A03TZiH6SVw91JudcMtHyGJhYJxwEKx5xZaI8wfPiG6qu6wHvPkz6zGxOvks4GIDC119ouXDkK8nDRXt5AH99vU5QSjInHLIo2+C84sNtf/RR0iZsaKBuglQt1voUhf70qmmRqDHYhTgniPhyI6KWtXwJBtngRF5oGs513oBWXvaxy4uu5fQg==",
        "ARC-Authentication-Results": "i=1; lists.ozlabs.org;\n dmarc=pass (p=none dis=none) header.from=linux.dev; dkim=pass (1024-bit key;\n unprotected) header.d=linux.dev header.i=@linux.dev header.a=rsa-sha256\n header.s=key1 header.b=w30DGB4U; dkim-atps=neutral;\n spf=pass (client-ip=95.215.58.173; helo=out-173.mta1.migadu.com;\n envelope-from=usama.arif@linux.dev;\n receiver=lists.ozlabs.org) smtp.mailfrom=linux.dev",
        "X-Report-Abuse": "Please report any abuse attempt to abuse@migadu.com and\n include these headers.",
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1;\n\tt=1774577753;\n\th=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n\t to:to:cc:cc:mime-version:mime-version:content-type:content-type:\n\t content-transfer-encoding:content-transfer-encoding:\n\t in-reply-to:in-reply-to:references:references;\n\tbh=39bZVeDLCZMGYPUr+EAc/EnS1HXbPGaYLlzIdJWQkDw=;\n\tb=w30DGB4UyGXLzwfa4i4fkjpzxE6aN3Qb/p4ZKaRsPTAiSqe7v4evtfAT7/BH1HS7bkR3ee\n\tTPIhwTi3UpYkcx8TXWvhJiBwvNzLsYzefVS4KGsbhlTKHaRYF6+xMyROiJO8FjJH7HcP1c\n\tVHlLz8MpZzAFWrBdFHiUuP22FNKzK2o=",
        "From": "Usama Arif <usama.arif@linux.dev>",
        "To": "Andrew Morton <akpm@linux-foundation.org>,\n\tdavid@kernel.org,\n\tLorenzo Stoakes <ljs@kernel.org>,\n\twilly@infradead.org,\n\tlinux-mm@kvack.org",
        "Cc": "fvdl@google.com,\n\thannes@cmpxchg.org,\n\triel@surriel.com,\n\tshakeel.butt@linux.dev,\n\tkas@kernel.org,\n\tbaohua@kernel.org,\n\tdev.jain@arm.com,\n\tbaolin.wang@linux.alibaba.com,\n\tnpache@redhat.com,\n\tLiam.Howlett@oracle.com,\n\tryan.roberts@arm.com,\n\tVlastimil Babka <vbabka@kernel.org>,\n\tlance.yang@linux.dev,\n\tlinux-kernel@vger.kernel.org,\n\tkernel-team@meta.com,\n\tmaddy@linux.ibm.com,\n\tmpe@ellerman.id.au,\n\tlinuxppc-dev@lists.ozlabs.org,\n\thca@linux.ibm.com,\n\tgor@linux.ibm.com,\n\tagordeev@linux.ibm.com,\n\tborntraeger@linux.ibm.com,\n\tsvens@linux.ibm.com,\n\tlinux-s390@vger.kernel.org,\n\tUsama Arif <usama.arif@linux.dev>",
        "Subject": "[v3 17/24] mm: thp: allocate PTE page tables lazily at split time",
        "Date": "Thu, 26 Mar 2026 19:08:59 -0700",
        "Message-ID": "<20260327021403.214713-18-usama.arif@linux.dev>",
        "In-Reply-To": "<20260327021403.214713-1-usama.arif@linux.dev>",
        "References": "<20260327021403.214713-1-usama.arif@linux.dev>",
        "X-Mailing-List": "linuxppc-dev@lists.ozlabs.org",
        "List-Id": "<linuxppc-dev.lists.ozlabs.org>",
        "List-Help": "<mailto:linuxppc-dev+help@lists.ozlabs.org>",
        "List-Owner": "<mailto:linuxppc-dev+owner@lists.ozlabs.org>",
        "List-Post": "<mailto:linuxppc-dev@lists.ozlabs.org>",
        "List-Archive": "<https://lore.kernel.org/linuxppc-dev/>,\n  <https://lists.ozlabs.org/pipermail/linuxppc-dev/>",
        "List-Subscribe": "<mailto:linuxppc-dev+subscribe@lists.ozlabs.org>,\n  <mailto:linuxppc-dev+subscribe-digest@lists.ozlabs.org>,\n  <mailto:linuxppc-dev+subscribe-nomail@lists.ozlabs.org>",
        "List-Unsubscribe": "<mailto:linuxppc-dev+unsubscribe@lists.ozlabs.org>",
        "Precedence": "list",
        "MIME-Version": "1.0",
        "Content-Type": "text/plain; charset=UTF-8",
        "Content-Transfer-Encoding": "8bit",
        "X-Migadu-Flow": "FLOW_OUT",
        "X-Spam-Status": "No, score=-0.9 required=3.0 tests=DKIM_SIGNED,DKIM_VALID,\n\tDKIM_VALID_AU,DKIM_VALID_EF,RCVD_IN_DNSWL_LOW,SPF_HELO_PASS,SPF_PASS\n\tautolearn=disabled version=4.0.1 OzLabs 8",
        "X-Spam-Checker-Version": "SpamAssassin 4.0.1 (2024-03-25) on lists.ozlabs.org"
    },
    "content": "When the kernel creates a PMD-level THP mapping for anonymous pages,\nit pre-allocates a PTE page table and deposits it via\npgtable_trans_huge_deposit(). This deposited table is withdrawn during\nPMD split or zap. The rationale was that split must not fail—if the\nkernel decides to split a THP, it needs a PTE table to populate.\n\nHowever, every anon THP wastes 4KB (one page table page) that sits\nunused in the deposit list for the lifetime of the mapping. On systems\nwith many THPs, this adds up to significant memory waste. The original\nrationale is also not an issue. It is ok for split to fail, and if the\nkernel can't find an order 0 allocation for split, there are much bigger\nproblems. On large servers where you can easily have 100s of GBs of THPs,\nthe memory usage for these tables is 200M per 100G. This memory could be\nused for any other usecase, which include allocating the pagetables\nrequired during split.\n\nThis patch removes the pre-deposit for anonymous pages on architectures\nwhere arch_needs_pgtable_deposit() returns false (every arch apart from\npowerpc, and only when radix hash tables are not enabled) and allocates\nthe PTE table lazily—only when a split actually occurs. The split path\nis modified to accept a caller-provided page table.\n\nPowerPC exception:\n\nIt would have been great if we can completely remove the pagetable\ndeposit code and this commit would mostly have been a code cleanup patch,\nunfortunately PowerPC has hash MMU, it stores hash slot information in\nthe deposited page table and pre-deposit is necessary. All deposit/\nwithdraw paths are guarded by arch_needs_pgtable_deposit(), so PowerPC\nbehavior is unchanged with this patch. On a better note,\narch_needs_pgtable_deposit will always evaluate to false at compile time\non non PowerPC architectures and the pre-deposit code will not be\ncompiled in.\n\nSuggested-by: David Hildenbrand <david@kernel.org>\nSigned-off-by: Usama Arif <usama.arif@linux.dev>\n---\n include/linux/huge_mm.h |   4 +-\n mm/huge_memory.c        | 158 ++++++++++++++++++++++++++--------------\n mm/khugepaged.c         |   7 +-\n mm/migrate_device.c     |  15 ++--\n mm/rmap.c               |  39 +++++++++-\n 5 files changed, 158 insertions(+), 65 deletions(-)",
    "diff": "diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h\nindex c4d0badc4ce27..c02ba9c4b8d5b 100644\n--- a/include/linux/huge_mm.h\n+++ b/include/linux/huge_mm.h\n@@ -561,7 +561,7 @@ static inline bool thp_migration_supported(void)\n }\n \n void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,\n-\t\t\t   pmd_t *pmd, bool freeze);\n+\t\t\t   pmd_t *pmd, bool freeze, pgtable_t pgtable);\n bool unmap_huge_pmd_locked(struct vm_area_struct *vma, unsigned long addr,\n \t\t\t   pmd_t *pmdp, struct folio *folio);\n void map_anon_folio_pmd_nopf(struct folio *folio, pmd_t *pmd,\n@@ -660,7 +660,7 @@ static inline int split_huge_pmd_address(struct vm_area_struct *vma,\n \t\tunsigned long address, bool freeze) { return 0; }\n static inline void split_huge_pmd_locked(struct vm_area_struct *vma,\n \t\t\t\t\t unsigned long address, pmd_t *pmd,\n-\t\t\t\t\t bool freeze) {}\n+\t\t\t\t\t bool freeze, pgtable_t pgtable) {}\n \n static inline bool unmap_huge_pmd_locked(struct vm_area_struct *vma,\n \t\t\t\t\t unsigned long addr, pmd_t *pmdp,\ndiff --git a/mm/huge_memory.c b/mm/huge_memory.c\nindex 9f4be707c8cb0..2acedb1de7404 100644\n--- a/mm/huge_memory.c\n+++ b/mm/huge_memory.c\n@@ -1309,17 +1309,19 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n \tunsigned long haddr = vmf->address & HPAGE_PMD_MASK;\n \tstruct vm_area_struct *vma = vmf->vma;\n \tstruct folio *folio;\n-\tpgtable_t pgtable;\n+\tpgtable_t pgtable = NULL;\n \tvm_fault_t ret = 0;\n \n \tfolio = vma_alloc_anon_folio_pmd(vma, vmf->address);\n \tif (unlikely(!folio))\n \t\treturn VM_FAULT_FALLBACK;\n \n-\tpgtable = pte_alloc_one(vma->vm_mm);\n-\tif (unlikely(!pgtable)) {\n-\t\tret = VM_FAULT_OOM;\n-\t\tgoto release;\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pte_alloc_one(vma->vm_mm);\n+\t\tif (unlikely(!pgtable)) {\n+\t\t\tret = VM_FAULT_OOM;\n+\t\t\tgoto release;\n+\t\t}\n \t}\n \n \tvmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);\n@@ -1334,14 +1336,18 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n \t\tif (userfaultfd_missing(vma)) {\n \t\t\tspin_unlock(vmf->ptl);\n \t\t\tfolio_put(folio);\n-\t\t\tpte_free(vma->vm_mm, pgtable);\n+\t\t\tif (pgtable)\n+\t\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\t\tret = handle_userfault(vmf, VM_UFFD_MISSING);\n \t\t\tVM_BUG_ON(ret & VM_FAULT_FALLBACK);\n \t\t\treturn ret;\n \t\t}\n-\t\tpgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);\n+\t\tif (pgtable) {\n+\t\t\tpgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd,\n+\t\t\t\t\t\t   pgtable);\n+\t\t\tmm_inc_nr_ptes(vma->vm_mm);\n+\t\t}\n \t\tmap_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);\n-\t\tmm_inc_nr_ptes(vma->vm_mm);\n \t\tspin_unlock(vmf->ptl);\n \t}\n \n@@ -1437,9 +1443,11 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm,\n \tpmd_t entry;\n \tentry = folio_mk_pmd(zero_folio, vma->vm_page_prot);\n \tentry = pmd_mkspecial(entry);\n-\tpgtable_trans_huge_deposit(mm, pmd, pgtable);\n+\tif (pgtable) {\n+\t\tpgtable_trans_huge_deposit(mm, pmd, pgtable);\n+\t\tmm_inc_nr_ptes(mm);\n+\t}\n \tset_pmd_at(mm, haddr, pmd, entry);\n-\tmm_inc_nr_ptes(mm);\n }\n \n vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n@@ -1458,16 +1466,19 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n \tif (!(vmf->flags & FAULT_FLAG_WRITE) &&\n \t\t\t!mm_forbids_zeropage(vma->vm_mm) &&\n \t\t\ttransparent_hugepage_use_zero_page()) {\n-\t\tpgtable_t pgtable;\n+\t\tpgtable_t pgtable = NULL;\n \t\tstruct folio *zero_folio;\n \t\tvm_fault_t ret;\n \n-\t\tpgtable = pte_alloc_one(vma->vm_mm);\n-\t\tif (unlikely(!pgtable))\n-\t\t\treturn VM_FAULT_OOM;\n+\t\tif (arch_needs_pgtable_deposit()) {\n+\t\t\tpgtable = pte_alloc_one(vma->vm_mm);\n+\t\t\tif (unlikely(!pgtable))\n+\t\t\t\treturn VM_FAULT_OOM;\n+\t\t}\n \t\tzero_folio = mm_get_huge_zero_folio(vma->vm_mm);\n \t\tif (unlikely(!zero_folio)) {\n-\t\t\tpte_free(vma->vm_mm, pgtable);\n+\t\t\tif (pgtable)\n+\t\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\t\tcount_vm_event(THP_FAULT_FALLBACK);\n \t\t\treturn VM_FAULT_FALLBACK;\n \t\t}\n@@ -1477,10 +1488,12 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n \t\t\tret = check_stable_address_space(vma->vm_mm);\n \t\t\tif (ret) {\n \t\t\t\tspin_unlock(vmf->ptl);\n-\t\t\t\tpte_free(vma->vm_mm, pgtable);\n+\t\t\t\tif (pgtable)\n+\t\t\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\t\t} else if (userfaultfd_missing(vma)) {\n \t\t\t\tspin_unlock(vmf->ptl);\n-\t\t\t\tpte_free(vma->vm_mm, pgtable);\n+\t\t\t\tif (pgtable)\n+\t\t\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\t\t\tret = handle_userfault(vmf, VM_UFFD_MISSING);\n \t\t\t\tVM_BUG_ON(ret & VM_FAULT_FALLBACK);\n \t\t\t} else {\n@@ -1491,7 +1504,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)\n \t\t\t}\n \t\t} else {\n \t\t\tspin_unlock(vmf->ptl);\n-\t\t\tpte_free(vma->vm_mm, pgtable);\n+\t\t\tif (pgtable)\n+\t\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\t}\n \t\treturn ret;\n \t}\n@@ -1823,8 +1837,10 @@ static void copy_huge_non_present_pmd(\n \t}\n \n \tadd_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);\n-\tmm_inc_nr_ptes(dst_mm);\n-\tpgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);\n+\tif (pgtable) {\n+\t\tmm_inc_nr_ptes(dst_mm);\n+\t\tpgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);\n+\t}\n \tif (!userfaultfd_wp(dst_vma))\n \t\tpmd = pmd_swp_clear_uffd_wp(pmd);\n \tset_pmd_at(dst_mm, addr, dst_pmd, pmd);\n@@ -1864,9 +1880,11 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,\n \tif (!vma_is_anonymous(dst_vma))\n \t\treturn 0;\n \n-\tpgtable = pte_alloc_one(dst_mm);\n-\tif (unlikely(!pgtable))\n-\t\tgoto out;\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pte_alloc_one(dst_mm);\n+\t\tif (unlikely(!pgtable))\n+\t\t\tgoto out;\n+\t}\n \n \tdst_ptl = pmd_lock(dst_mm, dst_pmd);\n \tsrc_ptl = pmd_lockptr(src_mm, src_pmd);\n@@ -1884,7 +1902,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,\n \t}\n \n \tif (unlikely(!pmd_trans_huge(pmd))) {\n-\t\tpte_free(dst_mm, pgtable);\n+\t\tif (pgtable)\n+\t\t\tpte_free(dst_mm, pgtable);\n \t\tgoto out_unlock;\n \t}\n \t/*\n@@ -1910,7 +1929,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,\n \tif (unlikely(folio_try_dup_anon_rmap_pmd(src_folio, src_page, dst_vma, src_vma))) {\n \t\t/* Page maybe pinned: split and retry the fault on PTEs. */\n \t\tfolio_put(src_folio);\n-\t\tpte_free(dst_mm, pgtable);\n+\t\tif (pgtable)\n+\t\t\tpte_free(dst_mm, pgtable);\n \t\tspin_unlock(src_ptl);\n \t\tspin_unlock(dst_ptl);\n \t\t/*\n@@ -1924,8 +1944,10 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,\n \t}\n \tadd_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);\n out_zero_page:\n-\tmm_inc_nr_ptes(dst_mm);\n-\tpgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);\n+\tif (pgtable) {\n+\t\tmm_inc_nr_ptes(dst_mm);\n+\t\tpgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);\n+\t}\n \tpmdp_set_wrprotect(src_mm, addr, src_pmd);\n \tif (!userfaultfd_wp(dst_vma))\n \t\tpmd = pmd_clear_uffd_wp(pmd);\n@@ -2376,22 +2398,13 @@ static struct folio *normal_or_softleaf_folio_pmd(struct vm_area_struct *vma,\n static bool has_deposited_pgtable(struct vm_area_struct *vma, pmd_t pmdval,\n \t\tstruct folio *folio)\n {\n-\t/* Some architectures require unconditional depositing. */\n-\tif (arch_needs_pgtable_deposit())\n-\t\treturn true;\n-\n-\t/*\n-\t * Huge zero always deposited except for DAX which handles itself, see\n-\t * set_huge_zero_folio().\n-\t */\n-\tif (is_huge_zero_pmd(pmdval))\n-\t\treturn !vma_is_dax(vma);\n-\n \t/*\n-\t * Otherwise, only anonymous folios are deposited, see\n-\t * __do_huge_pmd_anonymous_page().\n+\t * With lazy PTE page table allocation, only architectures that\n+\t * require unconditional depositing (powerpc hash MMU) will have\n+\t * deposited page tables. All other architectures allocate PTE\n+\t * page tables lazily at split time.\n \t */\n-\treturn folio && folio_test_anon(folio);\n+\treturn arch_needs_pgtable_deposit();\n }\n \n /**\n@@ -2514,7 +2527,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,\n \t\t\tforce_flush = true;\n \t\tVM_BUG_ON(!pmd_none(*new_pmd));\n \n-\t\tif (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {\n+\t\tif (pmd_move_must_withdraw(new_ptl, old_ptl, vma) &&\n+\t\t    arch_needs_pgtable_deposit()) {\n \t\t\tpgtable_t pgtable;\n \t\t\tpgtable = pgtable_trans_huge_withdraw(mm, old_pmd);\n \t\t\tpgtable_trans_huge_deposit(mm, new_pmd, pgtable);\n@@ -2823,8 +2837,10 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm\n \t}\n \tset_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd);\n \n-\tsrc_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);\n-\tpgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tsrc_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);\n+\t\tpgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);\n+\t}\n unlock_ptls:\n \tdouble_pt_unlock(src_ptl, dst_ptl);\n \t/* unblock rmap walks */\n@@ -2966,10 +2982,9 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,\n #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */\n \n static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,\n-\t\tunsigned long haddr, pmd_t *pmd)\n+\t\tunsigned long haddr, pmd_t *pmd, pgtable_t pgtable)\n {\n \tstruct mm_struct *mm = vma->vm_mm;\n-\tpgtable_t pgtable;\n \tpmd_t _pmd, old_pmd;\n \tunsigned long addr;\n \tpte_t *pte;\n@@ -2985,7 +3000,16 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,\n \t */\n \told_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);\n \n-\tpgtable = pgtable_trans_huge_withdraw(mm, pmd);\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pgtable_trans_huge_withdraw(mm, pmd);\n+\t} else {\n+\t\tVM_BUG_ON(!pgtable);\n+\t\t/*\n+\t\t * Account for the freshly allocated (in __split_huge_pmd) pgtable\n+\t\t * being used in mm.\n+\t\t */\n+\t\tmm_inc_nr_ptes(mm);\n+\t}\n \tpmd_populate(mm, &_pmd, pgtable);\n \n \tpte = pte_offset_map(&_pmd, haddr);\n@@ -3007,12 +3031,11 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,\n }\n \n static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,\n-\t\tunsigned long haddr, bool freeze)\n+\t\tunsigned long haddr, bool freeze, pgtable_t pgtable)\n {\n \tstruct mm_struct *mm = vma->vm_mm;\n \tstruct folio *folio;\n \tstruct page *page;\n-\tpgtable_t pgtable;\n \tpmd_t old_pmd, _pmd;\n \tbool soft_dirty, uffd_wp = false, young = false, write = false;\n \tbool anon_exclusive = false, dirty = false;\n@@ -3036,6 +3059,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,\n \t\t */\n \t\tif (arch_needs_pgtable_deposit())\n \t\t\tzap_deposited_table(mm, pmd);\n+\t\tif (pgtable)\n+\t\t\tpte_free(mm, pgtable);\n \t\tif (vma_is_special_huge(vma))\n \t\t\treturn;\n \t\tif (unlikely(pmd_is_migration_entry(old_pmd))) {\n@@ -3068,7 +3093,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,\n \t\t * small page also write protected so it does not seems useful\n \t\t * to invalidate secondary mmu at this time.\n \t\t */\n-\t\treturn __split_huge_zero_page_pmd(vma, haddr, pmd);\n+\t\treturn __split_huge_zero_page_pmd(vma, haddr, pmd, pgtable);\n \t}\n \n \tif (pmd_is_migration_entry(*pmd)) {\n@@ -3192,7 +3217,16 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,\n \t * Withdraw the table only after we mark the pmd entry invalid.\n \t * This's critical for some architectures (Power).\n \t */\n-\tpgtable = pgtable_trans_huge_withdraw(mm, pmd);\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pgtable_trans_huge_withdraw(mm, pmd);\n+\t} else {\n+\t\tVM_BUG_ON(!pgtable);\n+\t\t/*\n+\t\t * Account for the freshly allocated (in __split_huge_pmd) pgtable\n+\t\t * being used in mm.\n+\t\t */\n+\t\tmm_inc_nr_ptes(mm);\n+\t}\n \tpmd_populate(mm, &_pmd, pgtable);\n \n \tpte = pte_offset_map(&_pmd, haddr);\n@@ -3288,11 +3322,13 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,\n }\n \n void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,\n-\t\t\t   pmd_t *pmd, bool freeze)\n+\t\t\t   pmd_t *pmd, bool freeze, pgtable_t pgtable)\n {\n \tVM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));\n \tif (pmd_trans_huge(*pmd) || pmd_is_valid_softleaf(*pmd))\n-\t\t__split_huge_pmd_locked(vma, pmd, address, freeze);\n+\t\t__split_huge_pmd_locked(vma, pmd, address, freeze, pgtable);\n+\telse if (pgtable)\n+\t\tpte_free(vma->vm_mm, pgtable);\n }\n \n int __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,\n@@ -3300,13 +3336,24 @@ int __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,\n {\n \tspinlock_t *ptl;\n \tstruct mmu_notifier_range range;\n+\tpgtable_t pgtable = NULL;\n \n \tmmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm,\n \t\t\t\taddress & HPAGE_PMD_MASK,\n \t\t\t\t(address & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE);\n \tmmu_notifier_invalidate_range_start(&range);\n+\n+\t/* allocate pagetable before acquiring pmd lock */\n+\tif (vma_is_anonymous(vma) && !arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pte_alloc_one(vma->vm_mm);\n+\t\tif (!pgtable) {\n+\t\t\tmmu_notifier_invalidate_range_end(&range);\n+\t\t\treturn -ENOMEM;\n+\t\t}\n+\t}\n+\n \tptl = pmd_lock(vma->vm_mm, pmd);\n-\tsplit_huge_pmd_locked(vma, range.start, pmd, freeze);\n+\tsplit_huge_pmd_locked(vma, range.start, pmd, freeze, pgtable);\n \tspin_unlock(ptl);\n \tmmu_notifier_invalidate_range_end(&range);\n \n@@ -3442,7 +3489,8 @@ static bool __discard_anon_folio_pmd_locked(struct vm_area_struct *vma,\n \t}\n \n \tfolio_remove_rmap_pmd(folio, pmd_page(orig_pmd), vma);\n-\tzap_deposited_table(mm, pmdp);\n+\tif (arch_needs_pgtable_deposit())\n+\t\tzap_deposited_table(mm, pmdp);\n \tadd_mm_counter(mm, MM_ANONPAGES, -HPAGE_PMD_NR);\n \tif (vma->vm_flags & VM_LOCKED)\n \t\tmlock_drain_local();\ndiff --git a/mm/khugepaged.c b/mm/khugepaged.c\nindex d06d84219e1b8..40b33263f6135 100644\n--- a/mm/khugepaged.c\n+++ b/mm/khugepaged.c\n@@ -1239,7 +1239,12 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a\n \n \tspin_lock(pmd_ptl);\n \tBUG_ON(!pmd_none(*pmd));\n-\tpgtable_trans_huge_deposit(mm, pmd, pgtable);\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable_trans_huge_deposit(mm, pmd, pgtable);\n+\t} else {\n+\t\tmm_dec_nr_ptes(mm);\n+\t\tpte_free(mm, pgtable);\n+\t}\n \tmap_anon_folio_pmd_nopf(folio, pmd, vma, address);\n \tspin_unlock(pmd_ptl);\n \ndiff --git a/mm/migrate_device.c b/mm/migrate_device.c\nindex 00003fbe803df..b9242217a81b6 100644\n--- a/mm/migrate_device.c\n+++ b/mm/migrate_device.c\n@@ -829,9 +829,13 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,\n \n \t__folio_mark_uptodate(folio);\n \n-\tpgtable = pte_alloc_one(vma->vm_mm);\n-\tif (unlikely(!pgtable))\n-\t\tgoto abort;\n+\tif (arch_needs_pgtable_deposit()) {\n+\t\tpgtable = pte_alloc_one(vma->vm_mm);\n+\t\tif (unlikely(!pgtable))\n+\t\t\tgoto abort;\n+\t} else {\n+\t\tpgtable = NULL;\n+\t}\n \n \tif (folio_is_device_private(folio)) {\n \t\tswp_entry_t swp_entry;\n@@ -879,10 +883,11 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,\n \tfolio_get(folio);\n \n \tif (flush) {\n-\t\tpte_free(vma->vm_mm, pgtable);\n+\t\tif (pgtable)\n+\t\t\tpte_free(vma->vm_mm, pgtable);\n \t\tflush_cache_page(vma, addr, addr + HPAGE_PMD_SIZE);\n \t\tpmdp_invalidate(vma, addr, pmdp);\n-\t} else {\n+\t} else if (pgtable) {\n \t\tpgtable_trans_huge_deposit(vma->vm_mm, pmdp, pgtable);\n \t\tmm_inc_nr_ptes(vma->vm_mm);\n \t}\ndiff --git a/mm/rmap.c b/mm/rmap.c\nindex 78b7fb5f367ce..efbcdd3b32632 100644\n--- a/mm/rmap.c\n+++ b/mm/rmap.c\n@@ -76,6 +76,7 @@\n #include <linux/mm_inline.h>\n #include <linux/oom.h>\n \n+#include <asm/pgalloc.h>\n #include <asm/tlb.h>\n \n #define CREATE_TRACE_POINTS\n@@ -1995,6 +1996,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,\n \tunsigned long pfn;\n \tunsigned long hsz = 0;\n \tint ptes = 0;\n+\tpgtable_t prealloc_pte = NULL;\n \n \t/*\n \t * When racing against e.g. zap_pte_range() on another cpu,\n@@ -2029,6 +2031,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,\n \t}\n \tmmu_notifier_invalidate_range_start(&range);\n \n+\tif ((flags & TTU_SPLIT_HUGE_PMD) && vma_is_anonymous(vma) &&\n+\t    !arch_needs_pgtable_deposit())\n+\t\tprealloc_pte = pte_alloc_one(mm);\n+\n \twhile (page_vma_mapped_walk(&pvmw)) {\n \t\t/*\n \t\t * If the folio is in an mlock()d vma, we must not swap it out.\n@@ -2078,12 +2084,21 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,\n \t\t\t}\n \n \t\t\tif (flags & TTU_SPLIT_HUGE_PMD) {\n+\t\t\t\tpgtable_t pgtable = prealloc_pte;\n+\n+\t\t\t\tprealloc_pte = NULL;\n+\t\t\t\tif (!arch_needs_pgtable_deposit() && !pgtable &&\n+\t\t\t\t    vma_is_anonymous(vma)) {\n+\t\t\t\t\tpage_vma_mapped_walk_done(&pvmw);\n+\t\t\t\t\tret = false;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n \t\t\t\t/*\n \t\t\t\t * We temporarily have to drop the PTL and\n \t\t\t\t * restart so we can process the PTE-mapped THP.\n \t\t\t\t */\n \t\t\t\tsplit_huge_pmd_locked(vma, pvmw.address,\n-\t\t\t\t\t\t      pvmw.pmd, false);\n+\t\t\t\t\t\t      pvmw.pmd, false, pgtable);\n \t\t\t\tflags &= ~TTU_SPLIT_HUGE_PMD;\n \t\t\t\tpage_vma_mapped_walk_restart(&pvmw);\n \t\t\t\tcontinue;\n@@ -2363,6 +2378,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,\n \t\tbreak;\n \t}\n \n+\tif (prealloc_pte)\n+\t\tpte_free(mm, prealloc_pte);\n+\n \tmmu_notifier_invalidate_range_end(&range);\n \n \treturn ret;\n@@ -2422,6 +2440,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,\n \tenum ttu_flags flags = (enum ttu_flags)(long)arg;\n \tunsigned long pfn;\n \tunsigned long hsz = 0;\n+\tpgtable_t prealloc_pte = NULL;\n \n \t/*\n \t * When racing against e.g. zap_pte_range() on another cpu,\n@@ -2456,6 +2475,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,\n \t}\n \tmmu_notifier_invalidate_range_start(&range);\n \n+\tif ((flags & TTU_SPLIT_HUGE_PMD) && vma_is_anonymous(vma) &&\n+\t    !arch_needs_pgtable_deposit())\n+\t\tprealloc_pte = pte_alloc_one(mm);\n+\n \twhile (page_vma_mapped_walk(&pvmw)) {\n \t\t/* PMD-mapped THP migration entry */\n \t\tif (!pvmw.pte) {\n@@ -2463,6 +2486,15 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,\n \t\t\t__maybe_unused pmd_t pmdval;\n \n \t\t\tif (flags & TTU_SPLIT_HUGE_PMD) {\n+\t\t\t\tpgtable_t pgtable = prealloc_pte;\n+\n+\t\t\t\tprealloc_pte = NULL;\n+\t\t\t\tif (!arch_needs_pgtable_deposit() && !pgtable &&\n+\t\t\t\t    vma_is_anonymous(vma)) {\n+\t\t\t\t\tpage_vma_mapped_walk_done(&pvmw);\n+\t\t\t\t\tret = false;\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n \t\t\t\t/*\n \t\t\t\t * split_huge_pmd_locked() might leave the\n \t\t\t\t * folio mapped through PTEs. Retry the walk\n@@ -2470,7 +2502,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,\n \t\t\t\t * abort the walk.\n \t\t\t\t */\n \t\t\t\tsplit_huge_pmd_locked(vma, pvmw.address,\n-\t\t\t\t\t\t      pvmw.pmd, true);\n+\t\t\t\t\t\t      pvmw.pmd, true, pgtable);\n \t\t\t\tflags &= ~TTU_SPLIT_HUGE_PMD;\n \t\t\t\tpage_vma_mapped_walk_restart(&pvmw);\n \t\t\t\tcontinue;\n@@ -2721,6 +2753,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,\n \t\tfolio_put(folio);\n \t}\n \n+\tif (prealloc_pte)\n+\t\tpte_free(mm, prealloc_pte);\n+\n \tmmu_notifier_invalidate_range_end(&range);\n \n \treturn ret;\n",
    "prefixes": [
        "v3",
        "17/24"
    ]
}