diff mbox series

[v2,42/69] mm/sparse-vmemmap: Switch DAX to section-based vmemmap optimization

Message ID 20260513130542.35604-43-songmuchun@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series mm: Generalize HVO for HugeTLB and device DAX | expand

Commit Message

Muchun Song May 13, 2026, 1:05 p.m. UTC
DAX vmemmap optimization still uses pgmap-specific state to decide
whether a section should use the optimized layout.

Switch DAX to the compound page order recorded in struct mem_section, so
it follows the same section-based optimization state as the rest of
sparse-vmemmap.

This lets the DAX population, initialization, and teardown paths make
their optimization decisions from the section metadata instead of
carrying separate pgmap-specific state.

This makes DAX vmemmap optimization section-granular. Only
section-aligned ranges record a compound page order, so subsection
mappings remain unoptimized. The resulting loss of vmemmap savings
is negligible.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 arch/powerpc/mm/book3s64/radix_pgtable.c |  5 +++--
 mm/memory_hotplug.c                      |  6 +-----
 mm/mm_init.c                             | 13 ++++---------
 mm/sparse-vmemmap.c                      | 24 ++++++++++++++++++------
 mm/sparse.c                              |  2 +-
 5 files changed, 27 insertions(+), 23 deletions(-)
diff mbox series

Patch

diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index fb8738016b30..f0043c57694e 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1235,8 +1235,9 @@  int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 	pmd_t *pmd;
 	pte_t *pte;
 	struct page *tail_page;
+	const struct mem_section *ms = __pfn_to_section(start_pfn);
 
-	tail_page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, device_zone(node));
+	tail_page = vmemmap_shared_tail_page(section_order(ms), device_zone(node));
 	if (!tail_page)
 		return -ENOMEM;
 
@@ -1268,7 +1269,7 @@  int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 			next = addr + PAGE_SIZE;
 			continue;
 		} else {
-			unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+			unsigned long nr_pages = 1UL << section_order(ms);
 			unsigned long addr_pfn = page_to_pfn((struct page *)addr);
 			unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9ff830703785..c9c69f827efa 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -551,11 +551,7 @@  void remove_pfn_range_from_zone(struct zone *zone,
 		/* Select all remaining pages up to the next section boundary */
 		cur_nr_pages =
 			min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
-		/*
-		 * This is a temporary workaround to prevent the shared vmemmap
-		 * page from being overwritten; it will be removed later.
-		 */
-		if (!zone_is_zone_device(zone))
+		if (!section_vmemmap_optimizable(__pfn_to_section(pfn)))
 			page_init_poison(pfn_to_page(pfn),
 					 sizeof(struct page) * cur_nr_pages);
 	}
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 35c99e5c215c..2b94115e6dd5 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1071,16 +1071,11 @@  static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
  * of an altmap. See vmemmap_populate_compound_pages().
  */
 static inline unsigned long compound_nr_pages(unsigned long pfn,
-					      struct vmem_altmap *altmap,
 					      struct dev_pagemap *pgmap)
 {
-	/*
-	 * If DAX memory is hot-plugged into an unoccupied subsection
-	 * of an early section, the unoptimized boot memmap is reused.
-	 * See section_activate().
-	 */
-	if (early_section(__pfn_to_section(pfn)) ||
-	    !vmemmap_can_optimize(altmap, pgmap))
+	const struct mem_section *ms = __pfn_to_section(pfn);
+
+	if (!section_vmemmap_optimizable(ms))
 		return pgmap_vmemmap_nr(pgmap);
 
 	return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page));
@@ -1150,7 +1145,7 @@  void __ref memmap_init_zone_device(struct zone *zone,
 			continue;
 
 		memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-				     compound_nr_pages(pfn, altmap, pgmap));
+				     compound_nr_pages(pfn, pgmap));
 	}
 
 	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index b5c109b8af6f..ad3e5b54abf7 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -455,8 +455,9 @@  static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 	pte_t *pte;
 	int rc;
 	struct page *page;
+	const struct mem_section *ms = __pfn_to_section(start_pfn);
 
-	page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, device_zone(node));
+	page = vmemmap_shared_tail_page(section_order(ms), device_zone(node));
 	if (!page)
 		return -ENOMEM;
 
@@ -464,7 +465,7 @@  static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
 		return vmemmap_populate_range(start, end, node, NULL,
 					      page_to_pfn(page));
 
-	size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
+	size = min(end - start, (1UL << section_order(ms)) * sizeof(struct page));
 	for (addr = start; addr < end; addr += size) {
 		unsigned long next, last = addr + size;
 
@@ -501,7 +502,9 @@  struct page * __meminit __populate_section_memmap(unsigned long pfn,
 		!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
 		return NULL;
 
-	if (vmemmap_can_optimize(altmap, pgmap))
+	/* This may occur in sub-section scenarios. */
+	if (vmemmap_can_optimize(altmap, pgmap) &&
+	    section_vmemmap_optimizable(__pfn_to_section(pfn)))
 		r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
 	else
 		r = vmemmap_populate(start, end, nid, altmap);
@@ -718,8 +721,10 @@  static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
 	else if (memmap)
 		free_map_bootmem(memmap);
 
-	if (empty)
+	if (empty) {
 		ms->section_mem_map = (unsigned long)NULL;
+		section_set_order(ms, 0);
+	}
 }
 
 static struct page * __meminit section_activate(int nid, unsigned long pfn,
@@ -729,8 +734,14 @@  static struct page * __meminit section_activate(int nid, unsigned long pfn,
 	struct mem_section *ms = __pfn_to_section(pfn);
 	struct mem_section_usage *usage = NULL;
 	struct page *memmap;
+	unsigned int order;
 	int rc;
 
+	order = vmemmap_can_optimize(altmap, pgmap) ? pgmap->vmemmap_shift : 0;
+	/* All sub-sections within a section must share the same order. */
+	if (nr_pages < PAGES_PER_SECTION && section_order(ms) && section_order(ms) != order)
+		return ERR_PTR(-ENOTSUPP);
+
 	if (!ms->usage) {
 		usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
 		if (!usage)
@@ -756,6 +767,7 @@  static struct page * __meminit section_activate(int nid, unsigned long pfn,
 	if (nr_pages < PAGES_PER_SECTION && early_section(ms))
 		return pfn_to_page(pfn);
 
+	section_set_order_range(pfn, nr_pages, order);
 	memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
 	if (!memmap) {
 		section_deactivate(pfn, nr_pages, altmap, pgmap);
@@ -801,14 +813,14 @@  int __meminit sparse_add_section(int nid, unsigned long start_pfn,
 	if (IS_ERR(memmap))
 		return PTR_ERR(memmap);
 
+	ms = __nr_to_section(section_nr);
 	/*
 	 * Poison uninitialized struct pages in order to catch invalid flags
 	 * combinations.
 	 */
-	if (!vmemmap_can_optimize(altmap, pgmap))
+	if (!section_vmemmap_optimizable(ms))
 		page_init_poison(memmap, sizeof(struct page) * nr_pages);
 
-	ms = __nr_to_section(section_nr);
 	__section_mark_present(ms, section_nr);
 
 	/* Align memmap to section boundary in the subsection case */
diff --git a/mm/sparse.c b/mm/sparse.c
index 54c38ea08190..6878f8941b4c 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -251,7 +251,7 @@  int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages
 	if (vmemmap_can_optimize(altmap, pgmap))
 		vmemmap_pages = VMEMMAP_RESERVE_NR;
 
-	if (!vmemmap_can_optimize(altmap, pgmap) && !section_vmemmap_optimizable(ms))
+	if (!section_vmemmap_optimizable(ms))
 		return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE);
 
 	if (order < PFN_SECTION_SHIFT) {