powerpc/mm/hugetlb: Add support for reserving gigantic huge pages via kernel command line

Submitted by Aneesh Kumar K.V on May 16, 2017, 9:24 a.m.

Details

Message ID 1494926691-24664-1-git-send-email-aneesh.kumar@linux.vnet.ibm.com
State New
Headers show

Commit Message

Aneesh Kumar K.V May 16, 2017, 9:24 a.m.
We use the kernel command line to do reservation of hugetlb pages. The code
duplcation here is mostly to make it simpler. With 64 bit book3s, we need to
support either 16G or 1G gigantic hugepage. Whereas the FSL_BOOK3E
implementation needs to support multiple gigantic hugepage. We avoid the
gpage_npages array and use a gpage_npage count for ppc64. We also cannot use the
generic code to do the gigantic page allocation because that will require
conditonal to handle the pseries allocation, where the memory is already
reserved by the hypervisor.

Inorder to keep it simpler, book3s 64 implements a version that keeps it simpler
and working with pseries.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/hugetlb.h |  8 +---
 arch/powerpc/mm/hugetlbpage.c      | 78 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 7 deletions(-)

Comments

Anshuman Khandual May 17, 2017, 5:01 a.m.
On 05/16/2017 02:54 PM, Aneesh Kumar K.V wrote:
> +void __init reserve_hugetlb_gpages(void)
> +{
> +	char buf[10];
> +	phys_addr_t base;
> +	unsigned long gpage_size = 1UL << 34;
> +	static __initdata char cmdline[COMMAND_LINE_SIZE];
> +
> +	if (radix_enabled())
> +		gpage_size = 1UL << 30;
> +
> +	strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
> +	parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
> +		   NULL, &do_gpage_early_setup);
> +
> +	if (!gpage_npages)
> +		return;
> +
> +	string_get_size(gpage_size, 1, STRING_UNITS_2, buf, sizeof(buf));
> +	pr_info("Trying to reserve %ld %s pages\n", gpage_npages, buf);
> +
> +	/* Allocate one page at a time */
> +	while(gpage_npages) {
> +		base = memblock_alloc_base(gpage_size, gpage_size,
> +					   MEMBLOCK_ALLOC_ANYWHERE);
> +		add_gpage(base, gpage_size, 1);

For 16GB pages (1UL << 34) on POWER8, we already do these functions
inside htab_dt_scan_hugepage_blocks(). IIUC this happens just by
scanning DT without even specifying any gpages in kernel command
line.

memblock_reserve()
add_gpage()

Then attempting to allocate from memblock and adding it again into
gigantic pages list wont collide ? More over its trying to allocate
across the RAM not specifically on the gpages mentioned in device
tree by the platform. Are we trying to support 16GB pages just from
any memory without platform notification through DT ?
Aneesh Kumar K.V May 17, 2017, 6:59 a.m.
On Wednesday 17 May 2017 10:31 AM, Anshuman Khandual wrote:
> On 05/16/2017 02:54 PM, Aneesh Kumar K.V wrote:
>> +void __init reserve_hugetlb_gpages(void)
>> +{
>> +	char buf[10];
>> +	phys_addr_t base;
>> +	unsigned long gpage_size = 1UL << 34;
>> +	static __initdata char cmdline[COMMAND_LINE_SIZE];
>> +
>> +	if (radix_enabled())
>> +		gpage_size = 1UL << 30;
>> +
>> +	strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
>> +	parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
>> +		   NULL, &do_gpage_early_setup);
>> +
>> +	if (!gpage_npages)
>> +		return;
>> +
>> +	string_get_size(gpage_size, 1, STRING_UNITS_2, buf, sizeof(buf));
>> +	pr_info("Trying to reserve %ld %s pages\n", gpage_npages, buf);
>> +
>> +	/* Allocate one page at a time */
>> +	while(gpage_npages) {
>> +		base = memblock_alloc_base(gpage_size, gpage_size,
>> +					   MEMBLOCK_ALLOC_ANYWHERE);
>> +		add_gpage(base, gpage_size, 1);
>
> For 16GB pages (1UL << 34) on POWER8, we already do these functions
> inside htab_dt_scan_hugepage_blocks(). IIUC this happens just by
> scanning DT without even specifying any gpages in kernel command
> line.
>
> memblock_reserve()
> add_gpage()
>
> Then attempting to allocate from memblock and adding it again into
> gigantic pages list wont collide ?

That is for pseries.ie, pSeries will get the hugpages reserved by phyp 
and the details of those pages are passed via device tree. Not sure what 
is the conflict here. If we use the above kernel parameter, we will try 
to allocate another 'x' number of hugepages.

> More over its trying to allocate
> across the RAM not specifically on the gpages mentioned in device
> tree by the platform. Are we trying to support 16GB pages just from
> any memory without platform notification through DT ?
>

There are two ways to specify gpages, one via device tree which is used 
only in case of pseries and other hugepagesz=size hugepags=no-of-hugepages.

-aneesh
Anshuman Khandual May 18, 2017, 1:04 p.m.
On 05/17/2017 12:29 PM, Aneesh Kumar K.V wrote:
> 
> 
> On Wednesday 17 May 2017 10:31 AM, Anshuman Khandual wrote:
>> On 05/16/2017 02:54 PM, Aneesh Kumar K.V wrote:
>>> +void __init reserve_hugetlb_gpages(void)
>>> +{
>>> +    char buf[10];
>>> +    phys_addr_t base;
>>> +    unsigned long gpage_size = 1UL << 34;
>>> +    static __initdata char cmdline[COMMAND_LINE_SIZE];
>>> +
>>> +    if (radix_enabled())
>>> +        gpage_size = 1UL << 30;
>>> +
>>> +    strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
>>> +    parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
>>> +           NULL, &do_gpage_early_setup);
>>> +
>>> +    if (!gpage_npages)
>>> +        return;
>>> +
>>> +    string_get_size(gpage_size, 1, STRING_UNITS_2, buf, sizeof(buf));
>>> +    pr_info("Trying to reserve %ld %s pages\n", gpage_npages, buf);
>>> +
>>> +    /* Allocate one page at a time */
>>> +    while(gpage_npages) {
>>> +        base = memblock_alloc_base(gpage_size, gpage_size,
>>> +                       MEMBLOCK_ALLOC_ANYWHERE);
>>> +        add_gpage(base, gpage_size, 1);
>>
>> For 16GB pages (1UL << 34) on POWER8, we already do these functions
>> inside htab_dt_scan_hugepage_blocks(). IIUC this happens just by
>> scanning DT without even specifying any gpages in kernel command
>> line.
>>
>> memblock_reserve()
>> add_gpage()
>>
>> Then attempting to allocate from memblock and adding it again into
>> gigantic pages list wont collide ?
> 
> That is for pseries.ie, pSeries will get the hugpages reserved by phyp
> and the details of those pages are passed via device tree. Not sure what
> is the conflict here. If we use the above kernel parameter, we will try
> to allocate another 'x' number of hugepages.
> 
>> More over its trying to allocate
>> across the RAM not specifically on the gpages mentioned in device
>> tree by the platform. Are we trying to support 16GB pages just from
>> any memory without platform notification through DT ?
>>
> 
> There are two ways to specify gpages, one via device tree which is used
> only in case of pseries and other hugepagesz=size hugepags=no-of-hugepages.

New way (Added with this patch)
-------------------------------
setup_arch()
	reserve_hugetlb_page() (Now defined for PPC64 BOOK3S)

reserve_hugetlb_page() allocate 1GB (radix) / 16GB (hash) from the
memblock during boot (with memblock_alloc_base()) looking into the
kernel command line parameters for HugeTLB gigantic pages. It then
calls add_gpage() which populates gpage_freearray[] which remains
local to powerpc arch.

Existing DT (pseries on PHYP)
-----------------------------
early_setup()
	early_init_devtree()
		mmu_early_init_devtree()
			hash__early_init_devtree()
				htab_scan_page_sizes()
					htab_dt_scan_hugepage_blocks()

htab_dt_scan_hugepage_blocks() scans and adds individual PHYP reserved
16GB pages huge pages into gpage_freearray[] through add_gpage() call.

The same kernel command line parameters then create the hstate structure
for the gigantic pages in generic HugeTLB and which then calls alloc_
bootmem_huge_page() transferring the local gpages details stored in
gpage_freearray[] to generic huge_boot_pages. I hope my understanding
here is correct, please do correct me otherwise.

DT scanned gpages are first reserved with memblock_reserve() hence
then wont be used during memblock_alloc_base() called from the other
method. Hence no race during add_gpage() on system using both methods
simultaneously. I dont see anything preventing reserve_hugetlb_page()
being called on pseries systems though in which case may allocate
gigantic pages more than required if there are some already available
through DT path. Will look into this further.

Patch hide | download patch | download mbox

diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 7f4025a6c69e..03401a17d1da 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -218,13 +218,7 @@  static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
 }
 #endif /* CONFIG_HUGETLB_PAGE */
 
-/*
- * FSL Book3E platforms require special gpage handling - the gpages
- * are reserved early in the boot process by memblock instead of via
- * the .dts as on IBM platforms.
- */
-#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \
-    defined(CONFIG_PPC_8xx))
+#ifdef CONFIG_HUGETLB_PAGE
 extern void __init reserve_hugetlb_gpages(void);
 #else
 static inline void reserve_hugetlb_gpages(void)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1816b965a142..4ebaa18f2495 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -19,6 +19,7 @@ 
 #include <linux/moduleparam.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/string_helpers.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -373,6 +374,83 @@  int alloc_bootmem_huge_page(struct hstate *hstate)
 	m->hstate = hstate;
 	return 1;
 }
+
+static unsigned long gpage_npages;
+static int __init do_gpage_early_setup(char *param, char *val,
+				       const char *unused, void *arg)
+{
+	unsigned long npages;
+	static unsigned long size = 0;
+	unsigned long gpage_size = 1UL << 34;
+
+	if (radix_enabled())
+		gpage_size = 1UL << 30;
+
+	/*
+	 * The hugepagesz and hugepages cmdline options are interleaved.  We
+	 * use the size variable to keep track of whether or not this was done
+	 * properly and skip over instances where it is incorrect.  Other
+	 * command-line parsing code will issue warnings, so we don't need to.
+	 *
+	 */
+	if ((strcmp(param, "default_hugepagesz") == 0) ||
+	    (strcmp(param, "hugepagesz") == 0)) {
+		size = memparse(val, NULL);
+		/*
+		 * We want to handle on 16GB gigantic huge page here.
+		 */
+		if (size != gpage_size)
+			size = 0;
+	} else if (strcmp(param, "hugepages") == 0) {
+		if (size != 0) {
+			if (sscanf(val, "%lu", &npages) <= 0)
+				npages = 0;
+			if (npages > MAX_NUMBER_GPAGES) {
+				pr_warn("MMU: %lu 16GB pages requested, "
+					"limiting to %d pages\n", npages,
+					MAX_NUMBER_GPAGES);
+				npages = MAX_NUMBER_GPAGES;
+			}
+			gpage_npages = npages;
+			size = 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This will just do the necessary memblock reservations. Every else is
+ * done by core, based on kernel command line parsing.
+ */
+void __init reserve_hugetlb_gpages(void)
+{
+	char buf[10];
+	phys_addr_t base;
+	unsigned long gpage_size = 1UL << 34;
+	static __initdata char cmdline[COMMAND_LINE_SIZE];
+
+	if (radix_enabled())
+		gpage_size = 1UL << 30;
+
+	strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
+	parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
+		   NULL, &do_gpage_early_setup);
+
+	if (!gpage_npages)
+		return;
+
+	string_get_size(gpage_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+	pr_info("Trying to reserve %ld %s pages\n", gpage_npages, buf);
+
+	/* Allocate one page at a time */
+	while(gpage_npages) {
+		base = memblock_alloc_base(gpage_size, gpage_size,
+					   MEMBLOCK_ALLOC_ANYWHERE);
+		add_gpage(base, gpage_size, 1);
+		gpage_npages--;
+	}
+}
+
 #endif
 
 #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)