diff mbox series

[v2,64/69] mm/mm_init: Factor out compound page initialization

Message ID 20260513132044.41690-18-songmuchun@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series mm: Generalize HVO for HugeTLB and device DAX | expand

Commit Message

Muchun Song May 13, 2026, 1:20 p.m. UTC
The compound struct page initialization needed by boot-time gigantic hugetlb
folios is currently open-coded in hugetlb code, while ZONE_DEVICE has its own
separate initialization path in mm_init.c.

Factor the common compound memmap setup into memmap_init_compound_page_frozen()
so both paths can share the same frozen page initialization logic. This removes
duplicated open-coded compound page setup and keeps the initialization rules
in one place.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/hugetlb.c  |  25 +-----------
 mm/internal.h |   2 +
 mm/mm_init.c  | 111 +++++++++++++++++++-------------------------------
 3 files changed, 45 insertions(+), 93 deletions(-)
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 10f04fa95d43..7e9f49882395 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3118,28 +3118,6 @@  static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
 	return true;
 }
 
-static void __init hugetlb_folio_init_vmemmap(struct page *head, unsigned long pfn,
-		enum zone_type zone, int nid, unsigned int order, unsigned int nr_pages)
-{
-	/*
-	 * This is an open-coded prep_compound_page() whereby we avoid
-	 * walking pages twice by initializing/preparing+freezing them in the
-	 * same go.
-	 */
-	__init_single_page(head, pfn, zone, nid);
-	set_page_count(head, 0);
-
-	__SetPageHead(head);
-	for (int i = 1; i < nr_pages; i++) {
-		struct page *page = head + i;
-
-		__init_single_page(page, pfn + i, zone, nid);
-		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
-	}
-	prep_compound_head(head, order);
-}
-
 /*
  * memblock-allocated pageblocks might not have the migrate type set
  * if marked with the 'noinit' flag. Set it to the default (MIGRATE_MOVABLE)
@@ -3210,8 +3188,7 @@  static void __init gather_bootmem_prealloc_node(unsigned long nid)
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
 
-		hugetlb_folio_init_vmemmap(page, pfn, zone, nid, huge_page_order(h),
-					   vmemmap_nr_struct_pages(pfn, nr_pages));
+		memmap_init_compound_page_frozen(page, pfn, zone, nid, huge_page_order(h));
 		init_new_hugetlb_folio(folio);
 
 		if (order_vmemmap_optimizable(pfn_to_section_order(pfn))) {
diff --git a/mm/internal.h b/mm/internal.h
index 416afdf7b2ec..2c67ae25124b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1793,6 +1793,8 @@  static inline bool pte_needs_soft_dirty_wp(struct vm_area_struct *vma, pte_t pte
 
 void __meminit __init_single_page(struct page *page, unsigned long pfn,
 				unsigned long zone, int nid);
+void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn,
+		enum zone_type zone, int nid, unsigned int order);
 
 /* shrinker related functions */
 unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 95422e92ede8..9b23c31db8c6 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1018,79 +1018,46 @@  static void __init memmap_init(void)
 		init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
 }
 
-#ifdef CONFIG_ZONE_DEVICE
-static void __ref __init_zone_device_page(struct page *page, unsigned long pfn,
-					  unsigned long zone_idx, int nid,
-					  struct dev_pagemap *pgmap)
+static void __meminit init_single_page_frozen(struct page *page, unsigned long pfn,
+		enum zone_type zone, int nid)
 {
+	__init_single_page(page, pfn, zone, nid);
+	if (zone_is_zone_device(&NODE_DATA(nid)->node_zones[zone])) {
+		/*
+		 * ZONE_DEVICE pages are not managed by the page allocator, mark
+		 * them reserved to prevent them from being touched elsewhere.
+		 *
+		 * We can use the non-atomic __set_bit operation for setting
+		 * the flag as we are still initializing the pages.
+		 */
+		__SetPageReserved(page);
 
-	__init_single_page(page, pfn, zone_idx, nid);
-
-	/*
-	 * Mark page reserved as it will need to wait for onlining
-	 * phase for it to be fully associated with a zone.
-	 *
-	 * We can use the non-atomic __set_bit operation for setting
-	 * the flag as we are still initializing the pages.
-	 */
-	__SetPageReserved(page);
-
-	/*
-	 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
-	 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
-	 * ever freed or placed on a driver-private list.
-	 */
-	page_folio(page)->pgmap = pgmap;
-	page->zone_device_data = NULL;
-
-	/*
-	 * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released
-	 * directly to the driver page allocator which will set the page count
-	 * to 1 when allocating the page.
-	 *
-	 * MEMORY_TYPE_GENERIC and MEMORY_TYPE_FS_DAX pages automatically have
-	 * their refcount reset to one whenever they are freed (ie. after
-	 * their refcount drops to 0).
-	 */
-	switch (pgmap->type) {
-	case MEMORY_DEVICE_FS_DAX:
-	case MEMORY_DEVICE_PRIVATE:
-	case MEMORY_DEVICE_COHERENT:
-	case MEMORY_DEVICE_PCI_P2PDMA:
-		set_page_count(page, 0);
-		break;
-
-	case MEMORY_DEVICE_GENERIC:
-		break;
+		/*
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back pointer
+		 * and zone_device_data.  It is a bug if a ZONE_DEVICE page is
+		 * ever freed or placed on a driver-private list.
+		 */
+		page->zone_device_data = NULL;
 	}
+	set_page_count(page, 0);
 }
 
-static void __ref memmap_init_compound(struct page *head,
-				       unsigned long head_pfn,
-				       unsigned long zone_idx, int nid,
-				       struct dev_pagemap *pgmap,
-				       unsigned long nr_pages)
+void __meminit memmap_init_compound_page_frozen(struct page *head, unsigned long pfn,
+		enum zone_type zone, int nid, unsigned int order)
 {
-	unsigned long pfn, end_pfn = head_pfn + nr_pages;
-	unsigned int order = pgmap->vmemmap_shift;
+	int nr_pages = vmemmap_nr_struct_pages(pfn, 1UL << order);
 
-	/*
-	 * We have to initialize the pages, including setting up page links.
-	 * prep_compound_page() does not take care of that, so instead we
-	 * open-code prep_compound_page() so we can take care of initializing
-	 * the pages in the same go.
-	 */
-	__SetPageHead(head);
-	for (pfn = head_pfn + 1; pfn < end_pfn; pfn++) {
-		struct page *page = pfn_to_page(pfn);
+	init_single_page_frozen(head, pfn, zone, nid);
 
-		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
-		prep_compound_tail(page, head, order);
-		set_page_count(page, 0);
+	__SetPageHead(head);
+	for (int i = 1; i < nr_pages; i++) {
+		init_single_page_frozen(head + i, pfn + i, zone, nid);
+		prep_compound_tail(head + i, head, order);
 	}
 	prep_compound_head(head, order);
 }
 
+#ifdef CONFIG_ZONE_DEVICE
 void __ref memmap_init_zone_device(struct zone *zone,
 				   unsigned long start_pfn,
 				   unsigned long nr_pages,
@@ -1118,18 +1085,24 @@  void __ref memmap_init_zone_device(struct zone *zone,
 	}
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn += pfns_per_compound) {
-		struct page *page = pfn_to_page(pfn);
-
-		__init_zone_device_page(page, pfn, zone_idx, nid, pgmap);
+		struct page *head = pfn_to_page(pfn);
 
 		if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
 			cond_resched();
 
-		if (pfns_per_compound == 1)
-			continue;
-
-		memmap_init_compound(page, pfn, zone_idx, nid, pgmap,
-				     vmemmap_nr_struct_pages(pfn, pfns_per_compound));
+		if (pgmap->vmemmap_shift)
+			memmap_init_compound_page_frozen(head, pfn, zone_idx, nid,
+							 pgmap->vmemmap_shift);
+		else
+			init_single_page_frozen(head, pfn, zone_idx, nid);
+		/*
+		 * ZONE_DEVICE pages other than MEMORY_TYPE_GENERIC are released
+		 * directly to the driver page allocator which will set the page
+		 * count to 1 when allocating the page.
+		 */
+		if (pgmap->type == MEMORY_DEVICE_GENERIC)
+			init_page_count(head);
+		((struct folio *)head)->pgmap = pgmap;
 	}
 
 	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false, false);