diff mbox series

[v2,61/69] mm/hugetlb: Drop boot-time HVO handling for gigantic folios

Message ID 20260513132044.41690-15-songmuchun@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series mm: Generalize HVO for HugeTLB and device DAX | expand

Commit Message

Muchun Song May 13, 2026, 1:20 p.m. UTC
HugeTLB HVO is currently supported on x86-64, riscv64, and LoongArch.
On x86-64 and riscv64, gigantic HugeTLB pages are larger than the
section size, so the existing section-based vmemmap optimization
infrastructure is already sufficient to cover the whole folio.  On
LoongArch, HugeTLB HVO is supported without gigantic HugeTLB pages.

Therefore, boot-time HugeTLB HVO folios can rely on the section-based
vmemmap optimization infrastructure directly, without the extra bulk
optimization and fallback handling.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/hugetlb.c         | 25 ++++++-------------------
 mm/hugetlb_vmemmap.c | 21 ++++++---------------
 mm/internal.h        | 25 +++++++++++++++++++++++--
 mm/sparse.c          | 23 -----------------------
 4 files changed, 35 insertions(+), 59 deletions(-)
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bd136fc6aec0..3cb8fffb9e3e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3201,21 +3201,7 @@  static void __init prep_and_add_bootmem_folios(struct hstate *h,
 	unsigned long flags;
 	struct folio *folio, *tmp_f;
 
-	/* Send list for bulk vmemmap optimization processing */
-	hugetlb_vmemmap_optimize_folios(h, folio_list);
-
 	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
-		if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
-			/*
-			 * If HVO fails, initialize all tail struct pages
-			 * We do not worry about potential long lock hold
-			 * time as this is early in boot and there should
-			 * be no contention.
-			 */
-			hugetlb_folio_init_tail_vmemmap(folio, h,
-					OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES,
-					pages_per_huge_page(h));
-		}
 		hugetlb_bootmem_init_migratetype(folio, h);
 		/* Subdivide locks to achieve better parallel performance */
 		spin_lock_irqsave(&hugetlb_lock, flags);
@@ -3238,6 +3224,8 @@  static void __init gather_bootmem_prealloc_node(unsigned long nid)
 	list_for_each_entry_safe(m, tm, &huge_boot_pages[nid], list) {
 		struct page *page = virt_to_page(m);
 		struct folio *folio = (void *)page;
+		unsigned long pfn = PHYS_PFN(__pa(m));
+		unsigned long nr_pages = pages_per_huge_page(m->hstate);
 
 		h = m->hstate;
 		/*
@@ -3251,13 +3239,12 @@  static void __init gather_bootmem_prealloc_node(unsigned long nid)
 		VM_BUG_ON(!hstate_is_gigantic(h));
 		WARN_ON(folio_ref_count(folio) != 1);
 
-		hugetlb_folio_init_vmemmap(folio, h,
-					   OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES);
+		hugetlb_folio_init_vmemmap(folio, h, vmemmap_nr_struct_pages(pfn, nr_pages));
 		init_new_hugetlb_folio(folio);
 
-		if (order_vmemmap_optimizable(pfn_to_section_order(folio_pfn(folio)))) {
+		if (order_vmemmap_optimizable(pfn_to_section_order(pfn))) {
 			folio_set_hugetlb_vmemmap_optimized(folio);
-			section_set_order_range(folio_pfn(folio), folio_nr_pages(folio), 0);
+			section_set_order_range(pfn, nr_pages, 0);
 		}
 
 		if (hugetlb_early_cma(h))
@@ -3274,7 +3261,7 @@  static void __init gather_bootmem_prealloc_node(unsigned long nid)
 		 * (via hugetlb_bootmem_init_migratetype), so skip it here.
 		 */
 		if (!folio_test_hugetlb_cma(folio))
-			adjust_managed_page_count(page, pages_per_huge_page(h));
+			adjust_managed_page_count(page, nr_pages);
 		cond_resched();
 	}
 
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 1305bee1195a..d20d2ce13906 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -599,23 +599,17 @@  static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *fol
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
 {
 	struct folio *folio;
-	unsigned long nr_to_optimize = 0;
 	LIST_HEAD(vmemmap_pages);
 	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH;
 
-	list_for_each_entry(folio, folio_list, lru) {
-		int ret;
-
-		/*
-		 * Bootmem gigantic folios may already be marked optimized when
-		 * their vmemmap layout was prepared earlier, so skip them here.
-		 */
-		if (folio_test_hugetlb_vmemmap_optimized(folio))
-			continue;
+	if (!vmemmap_should_optimize(h))
+		return;
 
-		nr_to_optimize++;
+	if (list_empty(folio_list))
+		return;
 
-		ret = hugetlb_vmemmap_split_folio(h, folio);
+	list_for_each_entry(folio, folio_list, lru) {
+		int ret = hugetlb_vmemmap_split_folio(h, folio);
 
 		/*
 		 * Splitting the PMD requires allocating a page, thus let's fail
@@ -627,9 +621,6 @@  void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 			break;
 	}
 
-	if (!nr_to_optimize)
-		return;
-
 	flush_tlb_all();
 
 	list_for_each_entry(folio, folio_list, lru) {
diff --git a/mm/internal.h b/mm/internal.h
index aff7cebb1da4..416afdf7b2ec 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -949,6 +949,29 @@  void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
 		unsigned long, enum meminit_context, struct vmem_altmap *, int,
 		bool);
 
+static inline int vmemmap_nr_struct_pages(unsigned long pfn, unsigned long nr_pages)
+{
+	const unsigned int order = pfn_to_section_order(pfn);
+	const unsigned long pages_per_compound = 1UL << order;
+
+	if (!order_vmemmap_optimizable(order))
+		return nr_pages;
+
+	if (order < PFN_SECTION_SHIFT) {
+		VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound));
+		return OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES * nr_pages / pages_per_compound;
+	}
+
+	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION));
+	/* Ensure the requested range does not cross a compound page boundary. */
+	VM_WARN_ON_ONCE((pfn % pages_per_compound) + nr_pages > pages_per_compound);
+
+	if (IS_ALIGNED(pfn, pages_per_compound))
+		return OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES;
+
+	return 0;
+}
+
 /*
  * mm/sparse.c
  */
@@ -988,8 +1011,6 @@  static inline void __section_mark_present(struct mem_section *ms,
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 }
 
-int vmemmap_nr_struct_pages(unsigned long pfn, unsigned long nr_pages);
-
 static inline int section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages)
 {
 	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SUBSECTION));
diff --git a/mm/sparse.c b/mm/sparse.c
index 598da1651e49..21a0eb636fea 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -236,29 +236,6 @@  void __weak __meminit vmemmap_populate_print_last(void)
 {
 }
 
-int __meminit vmemmap_nr_struct_pages(unsigned long pfn, unsigned long nr_pages)
-{
-	const unsigned int order = pfn_to_section_order(pfn);
-	const unsigned long pages_per_compound = 1UL << order;
-
-	if (!order_vmemmap_optimizable(order))
-		return nr_pages;
-
-	if (order < PFN_SECTION_SHIFT) {
-		VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, pages_per_compound));
-		return OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES * nr_pages / pages_per_compound;
-	}
-
-	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION));
-	/* Ensure the requested range does not cross a compound page boundary. */
-	VM_WARN_ON_ONCE((pfn % pages_per_compound) + nr_pages > pages_per_compound);
-
-	if (IS_ALIGNED(pfn, pages_per_compound))
-		return OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES;
-
-	return 0;
-}
-
 /*
  * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
  * And number of present sections in this node is map_count.