diff mbox series

[v2,08/69] mm/mm_init: Defer sparse_init() until after zone initialization

Message ID 20260513130542.35604-9-songmuchun@bytedance.com (mailing list archive)
State Handled Elsewhere
Headers show
Series mm: Generalize HVO for HugeTLB and device DAX | expand

Commit Message

Muchun Song May 13, 2026, 1:04 p.m. UTC
free_area_init() is responsible for initializing pgdat and zone state.
Calling sparse_init() from there mixes in later vmemmap and struct page
setup, which makes the initialization flow less clear.

Defer sparse_init(), sparse_vmemmap_init_nid_late(), and memmap_init()
until after free_area_init() completes, when zone initialization is fully
done. This keeps free_area_init() focused on zone setup and ensures that
sparse_init() runs with the relevant zone state already available.

This is also a prerequisite for later hugetlb vmemmap changes that need
zone information during early sparse vmemmap setup.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
v1->v2:
- Restore the set_pageblock_order() change suggested by Mike Rapoport
- Add Mike Rapoport's Reviewed-by
---
 mm/mm_init.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

Comments

Oscar Salvador (SUSE) May 18, 2026, 6:29 a.m. UTC | #1
On Wed, May 13, 2026 at 09:04:36PM +0800, Muchun Song wrote:
>  void __init mm_core_init_early(void)
>  {
> +	int nid;
> +
>  	hugetlb_cma_reserve();
>  	hugetlb_bootmem_alloc();
>  
>  	free_area_init();
> +
> +	sparse_init();
> +	for_each_node_state(nid, N_MEMORY)
> +		sparse_vmemmap_init_nid_late(nid);

Would it not make more sense to hide sparse_vmemmap_init_nid_late()
within sparse_init() and have it called at the end of the function?

The flow would be:

 sparse_init()
  sparse_init_nid
   sparse_vmemmap_init_nid_early
   ...
  sparse_vmemmap_init_nid_late

I think it is better to have sparse stuff all together in one place instead of scattered.
Oscar Salvador (SUSE) May 18, 2026, 6:32 a.m. UTC | #2
On Mon, May 18, 2026 at 08:29:07AM +0200, Oscar Salvador (SUSE) wrote:
> On Wed, May 13, 2026 at 09:04:36PM +0800, Muchun Song wrote:
> >  void __init mm_core_init_early(void)
> >  {
> > +	int nid;
> > +
> >  	hugetlb_cma_reserve();
> >  	hugetlb_bootmem_alloc();
> >  
> >  	free_area_init();
> > +
> > +	sparse_init();
> > +	for_each_node_state(nid, N_MEMORY)
> > +		sparse_vmemmap_init_nid_late(nid);
> 
> Would it not make more sense to hide sparse_vmemmap_init_nid_late()
> within sparse_init() and have it called at the end of the function?
> 
> The flow would be:
> 
>  sparse_init()
>   sparse_init_nid
>    sparse_vmemmap_init_nid_early
>    ...
>   sparse_vmemmap_init_nid_late
> 
> I think it is better to have sparse stuff all together in one place instead of scattered.

Ah, I see that you later remove this stuff, so disregard this please.
Oscar Salvador (SUSE) May 18, 2026, 6:32 a.m. UTC | #3
On Wed, May 13, 2026 at 09:04:36PM +0800, Muchun Song wrote:
> free_area_init() is responsible for initializing pgdat and zone state.
> Calling sparse_init() from there mixes in later vmemmap and struct page
> setup, which makes the initialization flow less clear.
> 
> Defer sparse_init(), sparse_vmemmap_init_nid_late(), and memmap_init()
> until after free_area_init() completes, when zone initialization is fully
> done. This keeps free_area_init() focused on zone setup and ensures that
> sparse_init() runs with the relevant zone state already available.
> 
> This is also a prerequisite for later hugetlb vmemmap changes that need
> zone information during early sparse vmemmap setup.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

Reviewed-by: Oscar Salvador (SUSE) <osalvador@kernel.org>

> ---
> v1->v2:
> - Restore the set_pageblock_order() change suggested by Mike Rapoport
> - Add Mike Rapoport's Reviewed-by
> ---
>  mm/mm_init.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 12fe21c4e26c..c14491c2dad3 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -1826,7 +1826,6 @@ static void __init free_area_init(void)
>  	bool descending;
>  
>  	arch_zone_limits_init(max_zone_pfn);
> -	sparse_init();
>  
>  	start_pfn = PHYS_PFN(memblock_start_of_DRAM());
>  	descending = arch_has_descending_max_zone_pfns();
> @@ -1915,11 +1914,7 @@ static void __init free_area_init(void)
>  		}
>  	}
>  
> -	for_each_node_state(nid, N_MEMORY)
> -		sparse_vmemmap_init_nid_late(nid);
> -
>  	calc_nr_kernel_pages();
> -	memmap_init();
>  
>  	/* disable hash distribution for systems with a single node */
>  	fixup_hashdist();
> @@ -2691,10 +2686,17 @@ void __init __weak mem_init(void)
>  
>  void __init mm_core_init_early(void)
>  {
> +	int nid;
> +
>  	hugetlb_cma_reserve();
>  	hugetlb_bootmem_alloc();
>  
>  	free_area_init();
> +
> +	sparse_init();
> +	for_each_node_state(nid, N_MEMORY)
> +		sparse_vmemmap_init_nid_late(nid);
> +	memmap_init();
>  }
>  
>  /*
> -- 
> 2.54.0
>
diff mbox series

Patch

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 12fe21c4e26c..c14491c2dad3 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1826,7 +1826,6 @@  static void __init free_area_init(void)
 	bool descending;
 
 	arch_zone_limits_init(max_zone_pfn);
-	sparse_init();
 
 	start_pfn = PHYS_PFN(memblock_start_of_DRAM());
 	descending = arch_has_descending_max_zone_pfns();
@@ -1915,11 +1914,7 @@  static void __init free_area_init(void)
 		}
 	}
 
-	for_each_node_state(nid, N_MEMORY)
-		sparse_vmemmap_init_nid_late(nid);
-
 	calc_nr_kernel_pages();
-	memmap_init();
 
 	/* disable hash distribution for systems with a single node */
 	fixup_hashdist();
@@ -2691,10 +2686,17 @@  void __init __weak mem_init(void)
 
 void __init mm_core_init_early(void)
 {
+	int nid;
+
 	hugetlb_cma_reserve();
 	hugetlb_bootmem_alloc();
 
 	free_area_init();
+
+	sparse_init();
+	for_each_node_state(nid, N_MEMORY)
+		sparse_vmemmap_init_nid_late(nid);
+	memmap_init();
 }
 
 /*