@@ -1235,8 +1235,9 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
pmd_t *pmd;
pte_t *pte;
struct page *tail_page;
+ const struct mem_section *ms = __pfn_to_section(start_pfn);
- tail_page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, device_zone(node));
+ tail_page = vmemmap_shared_tail_page(section_order(ms), device_zone(node));
if (!tail_page)
return -ENOMEM;
@@ -1268,7 +1269,7 @@ int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
next = addr + PAGE_SIZE;
continue;
} else {
- unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ unsigned long nr_pages = 1UL << section_order(ms);
unsigned long addr_pfn = page_to_pfn((struct page *)addr);
unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
@@ -551,11 +551,7 @@ void remove_pfn_range_from_zone(struct zone *zone,
/* Select all remaining pages up to the next section boundary */
cur_nr_pages =
min(end_pfn - pfn, SECTION_ALIGN_UP(pfn + 1) - pfn);
- /*
- * This is a temporary workaround to prevent the shared vmemmap
- * page from being overwritten; it will be removed later.
- */
- if (!zone_is_zone_device(zone))
+ if (!section_vmemmap_optimizable(__pfn_to_section(pfn)))
page_init_poison(pfn_to_page(pfn),
sizeof(struct page) * cur_nr_pages);
}
@@ -1071,16 +1071,11 @@ static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
* of an altmap. See vmemmap_populate_compound_pages().
*/
static inline unsigned long compound_nr_pages(unsigned long pfn,
- struct vmem_altmap *altmap,
struct dev_pagemap *pgmap)
{
- /*
- * If DAX memory is hot-plugged into an unoccupied subsection
- * of an early section, the unoptimized boot memmap is reused.
- * See section_activate().
- */
- if (early_section(__pfn_to_section(pfn)) ||
- !vmemmap_can_optimize(altmap, pgmap))
+ const struct mem_section *ms = __pfn_to_section(pfn);
+
+ if (!section_vmemmap_optimizable(ms))
return pgmap_vmemmap_nr(pgmap);
return VMEMMAP_RESERVE_NR * (PAGE_SIZE / sizeof(struct page));
@@ -1150,7 +1145,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
continue;
memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
- compound_nr_pages(pfn, altmap, pgmap));
+ compound_nr_pages(pfn, pgmap));
}
pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false);
@@ -455,8 +455,9 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
pte_t *pte;
int rc;
struct page *page;
+ const struct mem_section *ms = __pfn_to_section(start_pfn);
- page = vmemmap_shared_tail_page(pgmap->vmemmap_shift, device_zone(node));
+ page = vmemmap_shared_tail_page(section_order(ms), device_zone(node));
if (!page)
return -ENOMEM;
@@ -464,7 +465,7 @@ static int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
return vmemmap_populate_range(start, end, node, NULL,
page_to_pfn(page));
- size = min(end - start, pgmap_vmemmap_nr(pgmap) * sizeof(struct page));
+ size = min(end - start, (1UL << section_order(ms)) * sizeof(struct page));
for (addr = start; addr < end; addr += size) {
unsigned long next, last = addr + size;
@@ -501,7 +502,9 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn,
!IS_ALIGNED(nr_pages, PAGES_PER_SUBSECTION)))
return NULL;
- if (vmemmap_can_optimize(altmap, pgmap))
+ /* This may occur in sub-section scenarios. */
+ if (vmemmap_can_optimize(altmap, pgmap) &&
+ section_vmemmap_optimizable(__pfn_to_section(pfn)))
r = vmemmap_populate_compound_pages(pfn, start, end, nid, pgmap);
else
r = vmemmap_populate(start, end, nid, altmap);
@@ -718,8 +721,10 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
else if (memmap)
free_map_bootmem(memmap);
- if (empty)
+ if (empty) {
ms->section_mem_map = (unsigned long)NULL;
+ section_set_order(ms, 0);
+ }
}
static struct page * __meminit section_activate(int nid, unsigned long pfn,
@@ -729,8 +734,14 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
struct mem_section *ms = __pfn_to_section(pfn);
struct mem_section_usage *usage = NULL;
struct page *memmap;
+ unsigned int order;
int rc;
+ order = vmemmap_can_optimize(altmap, pgmap) ? pgmap->vmemmap_shift : 0;
+ /* All sub-sections within a section must share the same order. */
+ if (nr_pages < PAGES_PER_SECTION && section_order(ms) && section_order(ms) != order)
+ return ERR_PTR(-ENOTSUPP);
+
if (!ms->usage) {
usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
if (!usage)
@@ -756,6 +767,7 @@ static struct page * __meminit section_activate(int nid, unsigned long pfn,
if (nr_pages < PAGES_PER_SECTION && early_section(ms))
return pfn_to_page(pfn);
+ section_set_order_range(pfn, nr_pages, order);
memmap = populate_section_memmap(pfn, nr_pages, nid, altmap, pgmap);
if (!memmap) {
section_deactivate(pfn, nr_pages, altmap, pgmap);
@@ -801,14 +813,14 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
if (IS_ERR(memmap))
return PTR_ERR(memmap);
+ ms = __nr_to_section(section_nr);
/*
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
*/
- if (!vmemmap_can_optimize(altmap, pgmap))
+ if (!section_vmemmap_optimizable(ms))
page_init_poison(memmap, sizeof(struct page) * nr_pages);
- ms = __nr_to_section(section_nr);
__section_mark_present(ms, section_nr);
/* Align memmap to section boundary in the subsection case */
@@ -251,7 +251,7 @@ int __meminit section_nr_vmemmap_pages(unsigned long pfn, unsigned long nr_pages
if (vmemmap_can_optimize(altmap, pgmap))
vmemmap_pages = VMEMMAP_RESERVE_NR;
- if (!vmemmap_can_optimize(altmap, pgmap) && !section_vmemmap_optimizable(ms))
+ if (!section_vmemmap_optimizable(ms))
return DIV_ROUND_UP(nr_pages * sizeof(struct page), PAGE_SIZE);
if (order < PFN_SECTION_SHIFT) {
DAX vmemmap optimization still uses pgmap-specific state to decide whether a section should use the optimized layout. Switch DAX to the compound page order recorded in struct mem_section, so it follows the same section-based optimization state as the rest of sparse-vmemmap. This lets the DAX population, initialization, and teardown paths make their optimization decisions from the section metadata instead of carrying separate pgmap-specific state. This makes DAX vmemmap optimization section-granular. Only section-aligned ranges record a compound page order, so subsection mappings remain unoptimized. The resulting loss of vmemmap savings is negligible. Signed-off-by: Muchun Song <songmuchun@bytedance.com> --- arch/powerpc/mm/book3s64/radix_pgtable.c | 5 +++-- mm/memory_hotplug.c | 6 +----- mm/mm_init.c | 13 ++++--------- mm/sparse-vmemmap.c | 24 ++++++++++++++++++------ mm/sparse.c | 2 +- 5 files changed, 27 insertions(+), 23 deletions(-)