diff mbox series

[2/2] powerpc/fadump: consider reserved ranges while reserving memory

Message ID 158387202999.17176.116917127748245682.stgit@hbathini.in.ibm.com (mailing list archive)
State Superseded
Headers show
Series [1/2] powerpc/fadump: use static allocation for reserved memory ranges | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (ab326587bb5fb91cc97df9b9f48e9e1469f04621)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 2 checks, 98 lines checked
snowpatch_ozlabs/needsstable success Patch is tagged for stable

Commit Message

Hari Bathini March 10, 2020, 8:27 p.m. UTC
Commit 0962e8004e97 ("powerpc/prom: Scan reserved-ranges node for
memory reservations") enabled support to parse reserved-ranges DT
node and reserve kernel memory falling in these ranges for F/W
purposes. Memory reserved for FADump should not overlap with these
ranges as it could corrupt memory meant for F/W or crash'ed kernel
memory to be exported as vmcore.

But since commit 579ca1a27675 ("powerpc/fadump: make use of memblock's
bottom up allocation mode"), memblock_find_in_range() is being used to
find the appropriate area to reserve memory for FADump, which can't
account for reserved-ranges as these ranges are reserved only after
FADump memory reservation.

With reserved-ranges now being populated during early boot, look out
for these memory ranges while reserving memory for FADump. Without
this change, MPIPL on PowerNV systems aborts with hostboot failure,
when memory reserved for FADump is less than 4096MB.

Fixes: 579ca1a27675 ("powerpc/fadump: make use of memblock's bottom up allocation mode")
Cc: stable@vger.kernel.org # v5.4+
Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/kernel/fadump.c |   76 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 10 deletions(-)

Comments

Mahesh J Salgaonkar April 20, 2020, 5:20 a.m. UTC | #1
On 2020-03-11 01:57:10 Wed, Hari Bathini wrote:
> Commit 0962e8004e97 ("powerpc/prom: Scan reserved-ranges node for
> memory reservations") enabled support to parse reserved-ranges DT
> node and reserve kernel memory falling in these ranges for F/W
> purposes. Memory reserved for FADump should not overlap with these
> ranges as it could corrupt memory meant for F/W or crash'ed kernel
> memory to be exported as vmcore.
> 
> But since commit 579ca1a27675 ("powerpc/fadump: make use of memblock's
> bottom up allocation mode"), memblock_find_in_range() is being used to
> find the appropriate area to reserve memory for FADump, which can't
> account for reserved-ranges as these ranges are reserved only after
> FADump memory reservation.
> 
> With reserved-ranges now being populated during early boot, look out
> for these memory ranges while reserving memory for FADump. Without
> this change, MPIPL on PowerNV systems aborts with hostboot failure,
> when memory reserved for FADump is less than 4096MB.
> 
> Fixes: 579ca1a27675 ("powerpc/fadump: make use of memblock's bottom up allocation mode")
> Cc: stable@vger.kernel.org # v5.4+
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/kernel/fadump.c |   76 ++++++++++++++++++++++++++++++++++++------
>  1 file changed, 66 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> index 7fcf4a8f..ab83be9 100644
> --- a/arch/powerpc/kernel/fadump.c
> +++ b/arch/powerpc/kernel/fadump.c
> @@ -443,10 +443,70 @@ static int __init fadump_get_boot_mem_regions(void)
>  	return ret;
>  }
>  
> +/*
> + * Returns true, if the given range overlaps with reserved memory ranges
> + * starting at idx. Also, updates idx to index of overlapping memory range
> + * with the given memory range.
> + * False, otherwise.
> + */
> +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
> +{
> +	bool ret = false;
> +	int i;
> +
> +	for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
> +		u64 rbase = reserved_mrange_info.mem_ranges[i].base;
> +		u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
> +
> +		if (end <= rbase)
> +			break;
> +
> +		if ((end > rbase) &&  (base < rend)) {
> +			*idx = i;
> +			ret = true;
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * Locate a suitable memory area to reserve memory for FADump. While at it,
> + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
> + */
> +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
> +{
> +	struct fadump_memory_range *mrngs;
> +	phys_addr_t mstart, mend;
> +	int idx = 0;
> +	u64 i;
> +
> +	mrngs = reserved_mrange_info.mem_ranges;
> +	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
> +				&mstart, &mend, NULL) {
> +		pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
> +			 i, mstart, mend, base);
> +
> +		if (mstart > base)
> +			base = PAGE_ALIGN(mstart);
> +
> +		while ((mend > base) && ((mend - base) >= size)) {
> +			if (!overlaps_reserved_ranges(base, base + size, &idx))
> +				goto out;
> +
> +			base = mrngs[idx].base + mrngs[idx].size;
> +			base = PAGE_ALIGN(base);

What happens when all the memory ranges found to be overlaped with
reserved ranges ? Shoudn't this function return NULL ? Looks like in
that case this function returns the last set base address which is
either still overlaped or not big enough in size.

Rest looks good to me.

Thanks,
-Mahesh.

> +		}
> +	}
> +
> +out:
> +	return base;
> +}
> +
Hari Bathini April 20, 2020, 7:30 a.m. UTC | #2
On 20/04/20 10:50 AM, Mahesh J Salgaonkar wrote:
> On 2020-03-11 01:57:10 Wed, Hari Bathini wrote:
>> Commit 0962e8004e97 ("powerpc/prom: Scan reserved-ranges node for
>> memory reservations") enabled support to parse reserved-ranges DT
>> node and reserve kernel memory falling in these ranges for F/W
>> purposes. Memory reserved for FADump should not overlap with these
>> ranges as it could corrupt memory meant for F/W or crash'ed kernel
>> memory to be exported as vmcore.
>>
>> But since commit 579ca1a27675 ("powerpc/fadump: make use of memblock's
>> bottom up allocation mode"), memblock_find_in_range() is being used to
>> find the appropriate area to reserve memory for FADump, which can't
>> account for reserved-ranges as these ranges are reserved only after
>> FADump memory reservation.
>>
>> With reserved-ranges now being populated during early boot, look out
>> for these memory ranges while reserving memory for FADump. Without
>> this change, MPIPL on PowerNV systems aborts with hostboot failure,
>> when memory reserved for FADump is less than 4096MB.
>>
>> Fixes: 579ca1a27675 ("powerpc/fadump: make use of memblock's bottom up allocation mode")
>> Cc: stable@vger.kernel.org # v5.4+
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>  arch/powerpc/kernel/fadump.c |   76 ++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 66 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
>> index 7fcf4a8f..ab83be9 100644
>> --- a/arch/powerpc/kernel/fadump.c
>> +++ b/arch/powerpc/kernel/fadump.c
>> @@ -443,10 +443,70 @@ static int __init fadump_get_boot_mem_regions(void)
>>  	return ret;
>>  }
>>  
>> +/*
>> + * Returns true, if the given range overlaps with reserved memory ranges
>> + * starting at idx. Also, updates idx to index of overlapping memory range
>> + * with the given memory range.
>> + * False, otherwise.
>> + */
>> +static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
>> +{
>> +	bool ret = false;
>> +	int i;
>> +
>> +	for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
>> +		u64 rbase = reserved_mrange_info.mem_ranges[i].base;
>> +		u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
>> +
>> +		if (end <= rbase)
>> +			break;
>> +
>> +		if ((end > rbase) &&  (base < rend)) {
>> +			*idx = i;
>> +			ret = true;
>> +			break;
>> +		}
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +/*
>> + * Locate a suitable memory area to reserve memory for FADump. While at it,
>> + * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
>> + */
>> +static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
>> +{
>> +	struct fadump_memory_range *mrngs;
>> +	phys_addr_t mstart, mend;
>> +	int idx = 0;
>> +	u64 i;
>> +
>> +	mrngs = reserved_mrange_info.mem_ranges;
>> +	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
>> +				&mstart, &mend, NULL) {
>> +		pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
>> +			 i, mstart, mend, base);
>> +
>> +		if (mstart > base)
>> +			base = PAGE_ALIGN(mstart);
>> +
>> +		while ((mend > base) && ((mend - base) >= size)) {
>> +			if (!overlaps_reserved_ranges(base, base + size, &idx))
>> +				goto out;
>> +
>> +			base = mrngs[idx].base + mrngs[idx].size;
>> +			base = PAGE_ALIGN(base);
> 
> What happens when all the memory ranges found to be overlaped with
> reserved ranges ? Shoudn't this function return NULL ? Looks like in
> that case this function returns the last set base address which is
> either still overlaped or not big enough in size.

Thanks for the review, Mahesh. I overlooked that corner case.
Just posted v2 fixing it.

- Hari
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 7fcf4a8f..ab83be9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -443,10 +443,70 @@  static int __init fadump_get_boot_mem_regions(void)
 	return ret;
 }
 
+/*
+ * Returns true, if the given range overlaps with reserved memory ranges
+ * starting at idx. Also, updates idx to index of overlapping memory range
+ * with the given memory range.
+ * False, otherwise.
+ */
+static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+{
+	bool ret = false;
+	int i;
+
+	for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
+		u64 rbase = reserved_mrange_info.mem_ranges[i].base;
+		u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
+
+		if (end <= rbase)
+			break;
+
+		if ((end > rbase) &&  (base < rend)) {
+			*idx = i;
+			ret = true;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
+ */
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
+{
+	struct fadump_memory_range *mrngs;
+	phys_addr_t mstart, mend;
+	int idx = 0;
+	u64 i;
+
+	mrngs = reserved_mrange_info.mem_ranges;
+	for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+				&mstart, &mend, NULL) {
+		pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
+			 i, mstart, mend, base);
+
+		if (mstart > base)
+			base = PAGE_ALIGN(mstart);
+
+		while ((mend > base) && ((mend - base) >= size)) {
+			if (!overlaps_reserved_ranges(base, base + size, &idx))
+				goto out;
+
+			base = mrngs[idx].base + mrngs[idx].size;
+			base = PAGE_ALIGN(base);
+		}
+	}
+
+out:
+	return base;
+}
+
 int __init fadump_reserve_mem(void)
 {
-	u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
-	bool is_memblock_bottom_up = memblock_bottom_up();
+	u64 base, size, mem_boundary, bootmem_min;
 	int ret = 1;
 
 	if (!fw_dump.fadump_enabled)
@@ -467,9 +527,9 @@  int __init fadump_reserve_mem(void)
 			PAGE_ALIGN(fadump_calculate_reserve_size());
 #ifdef CONFIG_CMA
 		if (!fw_dump.nocma) {
-			align = FADUMP_CMA_ALIGNMENT;
 			fw_dump.boot_memory_size =
-				ALIGN(fw_dump.boot_memory_size, align);
+				ALIGN(fw_dump.boot_memory_size,
+				      FADUMP_CMA_ALIGNMENT);
 		}
 #endif
 
@@ -537,13 +597,9 @@  int __init fadump_reserve_mem(void)
 		 * Reserve memory at an offset closer to bottom of the RAM to
 		 * minimize the impact of memory hot-remove operation.
 		 */
-		memblock_set_bottom_up(true);
-		base = memblock_find_in_range(base, mem_boundary, size, align);
-
-		/* Restore the previous allocation mode */
-		memblock_set_bottom_up(is_memblock_bottom_up);
+		base = fadump_locate_reserve_mem(base, size);
 
-		if (!base) {
+		if (base > (mem_boundary - size)) {
 			pr_err("Failed to find memory chunk for reservation!\n");
 			goto error_out;
 		}