diff mbox series

[v7,6/7] mm/memory_hotplug: Embed vmem_altmap details in memory block

Message ID 20230801044116.10674-7-aneesh.kumar@linux.ibm.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Add support for memmap on memory feature on ppc64 | expand

Commit Message

Aneesh Kumar K V Aug. 1, 2023, 4:41 a.m. UTC
With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/base/memory.c  | 27 +++++++++++++--------
 include/linux/memory.h |  8 ++----
 mm/memory_hotplug.c    | 55 ++++++++++++++++++++++++++----------------
 3 files changed, 53 insertions(+), 37 deletions(-)

Comments

Verma, Vishal L Aug. 1, 2023, 11:10 p.m. UTC | #1
On Tue, 2023-08-01 at 10:11 +0530, Aneesh Kumar K.V wrote:
> With memmap on memory, some architecture needs more details w.r.t altmap
> such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
> computing them again when we remove a memory block, embed vmem_altmap
> details in struct memory_block if we are using memmap on memory block
> feature.
> 
> Acked-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  drivers/base/memory.c  | 27 +++++++++++++--------
>  include/linux/memory.h |  8 ++----
>  mm/memory_hotplug.c    | 55 ++++++++++++++++++++++++++----------------
>  3 files changed, 53 insertions(+), 37 deletions(-)
> 
<snip>

> @@ -2136,10 +2148,10 @@ EXPORT_SYMBOL(try_offline_node);
>  
>  static int __ref try_remove_memory(u64 start, u64 size)
>  {
> -       struct vmem_altmap mhp_altmap = {};
> -       struct vmem_altmap *altmap = NULL;
> -       unsigned long nr_vmemmap_pages;
> +       int ret;

Minor nit - there is already an 'int rc' below - just use that, or
rename it to 'ret' if that's better for consistency.

> +       struct memory_block *mem;
>         int rc = 0, nid = NUMA_NO_NODE;
> +       struct vmem_altmap *altmap = NULL;
>  
>         BUG_ON(check_hotplug_memory_range(start, size));
>  
> @@ -2161,25 +2173,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
>          * the same granularity it was added - a single memory block.
>          */
>         if (mhp_memmap_on_memory()) {
> -               nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
> -                                                     get_nr_vmemmap_pages_cb);
> -               if (nr_vmemmap_pages) {
> +               ret = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
> +               if (ret) {
>                         if (size != memory_block_size_bytes()) {
>                                 pr_warn("Refuse to remove %#llx - %#llx,"
>                                         "wrong granularity\n",
>                                         start, start + size);
>                                 return -EINVAL;
>                         }
> -
> +                       altmap = mem->altmap;
>                         /*
> -                        * Let remove_pmd_table->free_hugepage_table do the
> -                        * right thing if we used vmem_altmap when hot-adding
> -                        * the range.
> +                        * Mark altmap NULL so that we can add a debug
> +                        * check on memblock free.
>                          */
> -                       mhp_altmap.base_pfn = PHYS_PFN(start);
> -                       mhp_altmap.free = nr_vmemmap_pages;
> -                       mhp_altmap.alloc = nr_vmemmap_pages;
> -                       altmap = &mhp_altmap;
> +                       mem->altmap = NULL;
>                 }
>         }
>
Aneesh Kumar K V Aug. 2, 2023, 4:47 a.m. UTC | #2
On 8/2/23 4:40 AM, Verma, Vishal L wrote:
> On Tue, 2023-08-01 at 10:11 +0530, Aneesh Kumar K.V wrote:
>> With memmap on memory, some architecture needs more details w.r.t altmap
>> such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
>> computing them again when we remove a memory block, embed vmem_altmap
>> details in struct memory_block if we are using memmap on memory block
>> feature.
>>
>> Acked-by: David Hildenbrand <david@redhat.com>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>  drivers/base/memory.c  | 27 +++++++++++++--------
>>  include/linux/memory.h |  8 ++----
>>  mm/memory_hotplug.c    | 55 ++++++++++++++++++++++++++----------------
>>  3 files changed, 53 insertions(+), 37 deletions(-)
>>
> <snip>
> 
>> @@ -2136,10 +2148,10 @@ EXPORT_SYMBOL(try_offline_node);
>>  
>>  static int __ref try_remove_memory(u64 start, u64 size)
>>  {
>> -       struct vmem_altmap mhp_altmap = {};
>> -       struct vmem_altmap *altmap = NULL;
>> -       unsigned long nr_vmemmap_pages;
>> +       int ret;
> 
> Minor nit - there is already an 'int rc' below - just use that, or
> rename it to 'ret' if that's better for consistency.
> 


I reused the existing rc variable. 

>> +       struct memory_block *mem;
>>         int rc = 0, nid = NUMA_NO_NODE;
>> +       struct vmem_altmap *altmap = NULL;
>>  
>>         BUG_ON(check_hotplug_memory_range(start, size));
>>  
>> @@ -2161,25 +2173,20 @@ static int __ref try_remove_memory(u64 start, u64 size)
>>          * the same granularity it was added - a single memory block.
>>          */
>>         if (mhp_memmap_on_memory()) {
>> -               nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
>> -                                                     get_nr_vmemmap_pages_cb);
>> -               if (nr_vmemmap_pages) {
>> +               ret = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
>> +               if (ret) {
>>                         if (size != memory_block_size_bytes()) {
>>                                 pr_warn("Refuse to remove %#llx - %#llx,"
>>                                         "wrong granularity\n",
>>                                         start, start + size);
>>                                 return -EINVAL;
>>                         }
>> -
>> +                       altmap = mem->altmap;
>>                         /*
>> -                        * Let remove_pmd_table->free_hugepage_table do the
>> -                        * right thing if we used vmem_altmap when hot-adding
>> -                        * the range.
>> +                        * Mark altmap NULL so that we can add a debug
>> +                        * check on memblock free.
>>                          */
>> -                       mhp_altmap.base_pfn = PHYS_PFN(start);
>> -                       mhp_altmap.free = nr_vmemmap_pages;
>> -                       mhp_altmap.alloc = nr_vmemmap_pages;
>> -                       altmap = &mhp_altmap;
>> +                       mem->altmap = NULL;
>>                 }
>>         }
>>  


Thank you for taking the time to review the patch.

-aneesh
diff mbox series

Patch

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index b456ac213610..8191709c9ad2 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -105,7 +105,8 @@  EXPORT_SYMBOL(unregister_memory_notifier);
 static void memory_block_release(struct device *dev)
 {
 	struct memory_block *mem = to_memory_block(dev);
-
+	/* Verify that the altmap is freed */
+	WARN_ON(mem->altmap);
 	kfree(mem);
 }
 
@@ -183,7 +184,7 @@  static int memory_block_online(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+	unsigned long nr_vmemmap_pages = 0;
 	struct zone *zone;
 	int ret;
 
@@ -200,6 +201,9 @@  static int memory_block_online(struct memory_block *mem)
 	 * stage helps to keep accounting easier to follow - e.g vmemmaps
 	 * belong to the same zone as the memory they backed.
 	 */
+	if (mem->altmap)
+		nr_vmemmap_pages = mem->altmap->free;
+
 	if (nr_vmemmap_pages) {
 		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
 		if (ret)
@@ -230,7 +234,7 @@  static int memory_block_offline(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
-	unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+	unsigned long nr_vmemmap_pages = 0;
 	int ret;
 
 	if (!mem->zone)
@@ -240,6 +244,9 @@  static int memory_block_offline(struct memory_block *mem)
 	 * Unaccount before offlining, such that unpopulated zone and kthreads
 	 * can properly be torn down in offline_pages().
 	 */
+	if (mem->altmap)
+		nr_vmemmap_pages = mem->altmap->free;
+
 	if (nr_vmemmap_pages)
 		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
 					  -nr_vmemmap_pages);
@@ -726,7 +733,7 @@  void memory_block_add_nid(struct memory_block *mem, int nid,
 #endif
 
 static int add_memory_block(unsigned long block_id, unsigned long state,
-			    unsigned long nr_vmemmap_pages,
+			    struct vmem_altmap *altmap,
 			    struct memory_group *group)
 {
 	struct memory_block *mem;
@@ -744,7 +751,7 @@  static int add_memory_block(unsigned long block_id, unsigned long state,
 	mem->start_section_nr = block_id * sections_per_block;
 	mem->state = state;
 	mem->nid = NUMA_NO_NODE;
-	mem->nr_vmemmap_pages = nr_vmemmap_pages;
+	mem->altmap = altmap;
 	INIT_LIST_HEAD(&mem->group_next);
 
 #ifndef CONFIG_NUMA
@@ -783,14 +790,14 @@  static int __init add_boot_memory_block(unsigned long base_section_nr)
 	if (section_count == 0)
 		return 0;
 	return add_memory_block(memory_block_id(base_section_nr),
-				MEM_ONLINE, 0,  NULL);
+				MEM_ONLINE, NULL,  NULL);
 }
 
 static int add_hotplug_memory_block(unsigned long block_id,
-				    unsigned long nr_vmemmap_pages,
+				    struct vmem_altmap *altmap,
 				    struct memory_group *group)
 {
-	return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
+	return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
 }
 
 static void remove_memory_block(struct memory_block *memory)
@@ -818,7 +825,7 @@  static void remove_memory_block(struct memory_block *memory)
  * Called under device_hotplug_lock.
  */
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				unsigned long vmemmap_pages,
+				struct vmem_altmap *altmap,
 				struct memory_group *group)
 {
 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -832,7 +839,7 @@  int create_memory_block_devices(unsigned long start, unsigned long size,
 		return -EINVAL;
 
 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-		ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
+		ret = add_hotplug_memory_block(block_id, altmap, group);
 		if (ret)
 			break;
 	}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 31343566c221..f53cfdaaaa41 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -77,11 +77,7 @@  struct memory_block {
 	 */
 	struct zone *zone;
 	struct device dev;
-	/*
-	 * Number of vmemmap pages. These pages
-	 * lay at the beginning of the memory block.
-	 */
-	unsigned long nr_vmemmap_pages;
+	struct vmem_altmap *altmap;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
@@ -147,7 +143,7 @@  static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
-				unsigned long vmemmap_pages,
+				struct vmem_altmap *altmap,
 				struct memory_group *group);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
 extern void memory_dev_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 76b813991bdc..1ce8ad04a980 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1439,7 +1439,11 @@  int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
 		if (mhp_supports_memmap_on_memory(size)) {
 			mhp_altmap.free = memory_block_memmap_on_memory_pages();
-			params.altmap = &mhp_altmap;
+			params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL);
+			if (!params.altmap)
+				goto error;
+
+			memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap));
 		}
 		/* fallback to not using altmap  */
 	}
@@ -1447,13 +1451,13 @@  int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	/* call arch's memory hotadd */
 	ret = arch_add_memory(nid, start, size, &params);
 	if (ret < 0)
-		goto error;
+		goto error_free;
 
 	/* create memory block devices after memory was added */
-	ret = create_memory_block_devices(start, size, mhp_altmap.free, group);
+	ret = create_memory_block_devices(start, size, params.altmap, group);
 	if (ret) {
 		arch_remove_memory(start, size, NULL);
-		goto error;
+		goto error_free;
 	}
 
 	if (new_node) {
@@ -1490,6 +1494,8 @@  int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 		walk_memory_blocks(start, size, NULL, online_memory_block);
 
 	return ret;
+error_free:
+	kfree(params.altmap);
 error:
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
 		memblock_remove(start, size);
@@ -2056,12 +2062,18 @@  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
 	return 0;
 }
 
-static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+static int test_has_altmap_cb(struct memory_block *mem, void *arg)
 {
+	struct memory_block **mem_ptr = (struct memory_block **)arg;
 	/*
-	 * If not set, continue with the next block.
+	 * return the memblock if we have altmap
+	 * and break callback.
 	 */
-	return mem->nr_vmemmap_pages;
+	if (mem->altmap) {
+		*mem_ptr = mem;
+		return 1;
+	}
+	return 0;
 }
 
 static int check_cpu_on_node(int nid)
@@ -2136,10 +2148,10 @@  EXPORT_SYMBOL(try_offline_node);
 
 static int __ref try_remove_memory(u64 start, u64 size)
 {
-	struct vmem_altmap mhp_altmap = {};
-	struct vmem_altmap *altmap = NULL;
-	unsigned long nr_vmemmap_pages;
+	int ret;
+	struct memory_block *mem;
 	int rc = 0, nid = NUMA_NO_NODE;
+	struct vmem_altmap *altmap = NULL;
 
 	BUG_ON(check_hotplug_memory_range(start, size));
 
@@ -2161,25 +2173,20 @@  static int __ref try_remove_memory(u64 start, u64 size)
 	 * the same granularity it was added - a single memory block.
 	 */
 	if (mhp_memmap_on_memory()) {
-		nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
-						      get_nr_vmemmap_pages_cb);
-		if (nr_vmemmap_pages) {
+		ret = walk_memory_blocks(start, size, &mem, test_has_altmap_cb);
+		if (ret) {
 			if (size != memory_block_size_bytes()) {
 				pr_warn("Refuse to remove %#llx - %#llx,"
 					"wrong granularity\n",
 					start, start + size);
 				return -EINVAL;
 			}
-
+			altmap = mem->altmap;
 			/*
-			 * Let remove_pmd_table->free_hugepage_table do the
-			 * right thing if we used vmem_altmap when hot-adding
-			 * the range.
+			 * Mark altmap NULL so that we can add a debug
+			 * check on memblock free.
 			 */
-			mhp_altmap.base_pfn = PHYS_PFN(start);
-			mhp_altmap.free = nr_vmemmap_pages;
-			mhp_altmap.alloc = nr_vmemmap_pages;
-			altmap = &mhp_altmap;
+			mem->altmap = NULL;
 		}
 	}
 
@@ -2196,6 +2203,12 @@  static int __ref try_remove_memory(u64 start, u64 size)
 
 	arch_remove_memory(start, size, altmap);
 
+	/* Verify that all vmemmap pages have actually been freed. */
+	if (altmap) {
+		WARN(altmap->alloc, "Altmap not fully unmapped");
+		kfree(altmap);
+	}
+
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
 		memblock_phys_free(start, size);
 		memblock_remove(start, size);