diff mbox series

[v2,24/69] mm/mm_init: Skip initializing shared vmemmap tail pages

Message ID 20260513130542.35604-25-songmuchun@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series mm: Generalize HVO for HugeTLB and device DAX | expand

Commit Message

Muchun Song May 13, 2026, 1:04 p.m. UTC
memmap_init_range() initializes every struct page in the target range.
For compound pages with vmemmap optimization, the tail struct pages are
backed by a shared vmemmap page.

Initializing those tail struct pages would overwrite the shared
vmemmap page contents, so users such as HugeTLB have to open-code
follow-up handling to restore the metadata afterwards.

Use the section's compound page order to detect struct pages that fall
into the shared tail vmemmap range and skip their initialization in
memmap_init_range().  Still initialize the pageblock migratetypes for
the skipped range so the surrounding setup remains intact.

This is a preparatory change for consolidating handling across users of
vmemmap optimization, and it also avoids redundant initialization of
shared tail vmemmap pages during early boot.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 include/linux/mmzone.h |  9 +++++++++
 mm/internal.h          | 16 ++++++++++++++++
 mm/mm_init.c           | 19 +++++++++++++------
 3 files changed, 38 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6f112e6f42bb..5fc968bac1f7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -2264,6 +2264,11 @@  static inline unsigned int section_order(const struct mem_section *section)
 }
 #endif
 
+static inline unsigned int pfn_to_section_order(unsigned long pfn)
+{
+	return section_order(__pfn_to_section(pfn));
+}
+
 void sparse_init_early_section(int nid, struct page *map, unsigned long pnum,
 			       unsigned long flags);
 
@@ -2404,6 +2409,10 @@  static inline unsigned long next_present_section_nr(unsigned long section_nr)
 #else
 #define sparse_vmemmap_init_nid_early(_nid) do {} while (0)
 #define pfn_in_present_section pfn_valid
+static inline unsigned int pfn_to_section_order(unsigned long pfn)
+{
+	return 0;
+}
 #endif /* CONFIG_SPARSEMEM */
 
 /*
diff --git a/mm/internal.h b/mm/internal.h
index 4a5053368078..1f1c07eb70e2 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1004,10 +1004,26 @@  static inline void sparse_init(void) {}
  */
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 void sparse_init_subsection_map(void);
+
+static inline bool vmemmap_page_optimizable(const struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long nr_pages = 1UL << pfn_to_section_order(pfn);
+
+	if (!is_power_of_2(sizeof(struct page)))
+		return false;
+
+	return (pfn & (nr_pages - 1)) >= OPTIMIZED_FOLIO_VMEMMAP_NR_STRUCT_PAGES;
+}
 #else
 static inline void sparse_init_subsection_map(void)
 {
 }
+
+static inline bool vmemmap_page_optimizable(const struct page *page)
+{
+	return false;
+}
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/mm_init.c b/mm/mm_init.c
index c64e5d63c4ae..3aaee1cf7bf0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -674,19 +674,17 @@  static inline void fixup_hashdist(void)
 static inline void fixup_hashdist(void) {}
 #endif /* CONFIG_NUMA */
 
-#if defined(CONFIG_ZONE_DEVICE) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
-		unsigned long nr_pages, int migratetype, bool atomic)
+		unsigned long nr_pages, int migratetype, bool isolate, bool atomic)
 {
 	const unsigned long end = pfn + nr_pages;
 
 	for (pfn = pageblock_align(pfn); pfn < end; pfn += pageblock_nr_pages) {
-		init_pageblock_migratetype(pfn_to_page(pfn), migratetype, false);
+		init_pageblock_migratetype(pfn_to_page(pfn), migratetype, isolate);
 		if (!atomic && IS_ALIGNED(pfn, PAGES_PER_SECTION))
 			cond_resched();
 	}
 }
-#endif
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 /*
@@ -916,6 +914,15 @@  void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
 		}
 
 		page = pfn_to_page(pfn);
+		if (vmemmap_page_optimizable(page)) {
+			unsigned long start = pfn;
+
+			pfn = min(ALIGN(start, 1UL << pfn_to_section_order(pfn)), end_pfn);
+			pageblock_migratetype_init_range(start, pfn - start, migratetype,
+							 isolate_pageblock, false);
+			continue;
+		}
+
 		__init_single_page(page, pfn, zone, nid);
 		if (context == MEMINIT_HOTPLUG) {
 #ifdef CONFIG_ZONE_DEVICE
@@ -1142,7 +1149,7 @@  void __ref memmap_init_zone_device(struct zone *zone,
 				     compound_nr_pages(pfn, altmap, pgmap));
 	}
 
-	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false);
+	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false);
 
 	pr_debug("%s initialised %lu pages in %ums\n", __func__,
 		nr_pages, jiffies_to_msecs(jiffies - start));
@@ -1982,7 +1989,7 @@  static void __init deferred_free_pages(unsigned long pfn,
 	if (!nr_pages)
 		return;
 
-	pageblock_migratetype_init_range(pfn, nr_pages, mt, true);
+	pageblock_migratetype_init_range(pfn, nr_pages, mt, false, true);
 
 	page = pfn_to_page(pfn);