diff mbox series

[04/11] ppc64/kexec_file: avoid stomping memory used by special regions

Message ID 159319831192.16351.17443438699302756548.stgit@hbathini.in.ibm.com (mailing list archive)
State Changes Requested
Headers show
Series ppc64: enable kdump support for kexec_file_load syscall | expand

Commit Message

Hari Bathini June 26, 2020, 7:05 p.m. UTC
crashkernel region could have an overlap with special memory regions
like  opal, rtas, tce-table & such. These regions are referred to as
exclude memory ranges. Setup this ranges during image probe in order
to avoid them while finding the buffer for different kdump segments.
Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
accounting for these ranges. Also, override arch_kexec_add_buffer()
to locate a memory hole & later call __kexec_add_buffer() function
with kbuf->mem set to skip the generic locate memory hole lookup.

Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
---
 arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
 arch/powerpc/include/asm/kexec.h           |    7 -
 arch/powerpc/kexec/elf_64.c                |    7 +
 arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
 4 files changed, 312 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h

Comments

Pingfan Liu June 28, 2020, 2:14 a.m. UTC | #1
Hi Hari,

After a quick through for this series, I have a few question/comment on
this patch for the time being. Pls see comment inline.

On 06/27/2020 03:05 AM, Hari Bathini wrote:
> crashkernel region could have an overlap with special memory regions
> like  opal, rtas, tce-table & such. These regions are referred to as
> exclude memory ranges. Setup this ranges during image probe in order
> to avoid them while finding the buffer for different kdump segments.
> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
> accounting for these ranges. Also, override arch_kexec_add_buffer()
> to locate a memory hole & later call __kexec_add_buffer() function
> with kbuf->mem set to skip the generic locate memory hole lookup.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
>  arch/powerpc/include/asm/kexec.h           |    7 -
>  arch/powerpc/kexec/elf_64.c                |    7 +
>  arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
>  4 files changed, 312 insertions(+), 4 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
> 
> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
> new file mode 100644
> index 0000000..3596c25
> --- /dev/null
> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
> +#define _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
> +
> +/* min & max addresses for kdump load segments */
> +#define KDUMP_BUF_MIN		(crashk_res.start)
> +#define KDUMP_BUF_MAX		((crashk_res.end < ppc64_rma_size) ? \
> +				 crashk_res.end : (ppc64_rma_size - 1))
> +
> +#endif /* __ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H */
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index 7008ea1..bf47a01 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co
>  #ifdef CONFIG_KEXEC_FILE
>  extern const struct kexec_file_ops kexec_elf64_ops;
>  
> -#ifdef CONFIG_IMA_KEXEC
>  #define ARCH_HAS_KIMAGE_ARCH
>  
>  struct kimage_arch {
> +	struct crash_mem *exclude_ranges;
> +
> +#ifdef CONFIG_IMA_KEXEC
>  	phys_addr_t ima_buffer_addr;
>  	size_t ima_buffer_size;
> -};
>  #endif
> +};
>  
>  int setup_purgatory(struct kimage *image, const void *slave_code,
>  		    const void *fdt, unsigned long kernel_load_addr,
> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>  			unsigned long initrd_load_addr,
>  			unsigned long initrd_len, const char *cmdline);
>  #endif /* CONFIG_PPC64 */
> +
>  #endif /* CONFIG_KEXEC_FILE */
>  
>  #else /* !CONFIG_KEXEC_CORE */
> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> index 23ad04c..c695f94 100644
> --- a/arch/powerpc/kexec/elf_64.c
> +++ b/arch/powerpc/kexec/elf_64.c
> @@ -22,6 +22,7 @@
>  #include <linux/of_fdt.h>
>  #include <linux/slab.h>
>  #include <linux/types.h>
> +#include <asm/crashdump-ppc64.h>
>  
>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>  			unsigned long kernel_len, char *initrd,
> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>  	if (ret)
>  		goto out;
>  
> +	if (image->type == KEXEC_TYPE_CRASH) {
> +		/* min & max buffer values for kdump case */
> +		kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
> +		kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;
> +	}
> +
>  	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
>  	if (ret)
>  		goto out;
> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
> index e6bff960..f1d7160 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -17,6 +17,8 @@
>  #include <linux/kexec.h>
>  #include <linux/of_fdt.h>
>  #include <linux/libfdt.h>
> +#include <asm/kexec_ranges.h>
> +#include <asm/crashdump-ppc64.h>
>  
>  const struct kexec_file_ops * const kexec_file_loaders[] = {
>  	&kexec_elf64_ops,
> @@ -24,6 +26,247 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
>  };
>  
>  /**
> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
> + *                             regions like opal/rtas, tce-table, initrd,
> + *                             kernel, htab which should be avoided while
> + *                             setting up kexec load segments.
> + * @mem_ranges:                Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
Is it needed? See the comment below.
> +{
> +	int ret;
> +
> +	ret = add_tce_mem_ranges(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_initrd_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_htab_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_kernel_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_rtas_mem_range(mem_ranges, false);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_opal_mem_range(mem_ranges, false);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_reserved_ranges(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	/* exclude memory ranges should be sorted for easy lookup */
> +	sort_memory_ranges(*mem_ranges);
> +out:
> +	if (ret)
> +		pr_err("Failed to setup exclude memory ranges\n");
> +	return ret;
> +}
> +
> +/**
> + * __locate_mem_hole_ppc64 - Tests if the memory hole between buf_min & buf_max
> + *                           is large enough for the buffer. If true, sets
> + *                           kbuf->mem to the buffer.
> + * @kbuf:                    Buffer contents and memory parameters.
> + * @buf_min:                 Minimum address for the buffer.
> + * @buf_max:                 Maximum address for the buffer.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int __locate_mem_hole_ppc64(struct kexec_buf *kbuf,
> +				   u64 buf_min, u64 buf_max)
> +{
> +	int ret = -EADDRNOTAVAIL;
> +
> +	buf_min = ALIGN(buf_min, kbuf->buf_align);
> +
> +	if (buf_min < buf_max &&
> +	    (buf_max - buf_min + 1) >= kbuf->memsz) {
> +		/*
> +		 * Suitable memory range found. Set kbuf->mem here to skip
> +		 * locate memory hole routine in __kexec_add_buffer() call.
> +		 */
> +		ret = 0;
> +		if (kbuf->top_down)
> +			kbuf->mem = ALIGN_DOWN(buf_max - kbuf->memsz + 1,
> +					       kbuf->buf_align);
> +		else
> +			kbuf->mem = buf_min;
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
> + *                                  suitable buffer with top down approach.
> + * @kbuf:                           Buffer contents and memory parameters.
> + * @buf_min:                        Minimum address for the buffer.
> + * @buf_max:                        Maximum address for the buffer.
> + * @emem:                           Exclude memory ranges.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
> +					  u64 buf_min, u64 buf_max,
> +					  const struct crash_mem *emem)
> +{
> +	int i, ret = 0, err = -EADDRNOTAVAIL;
> +	u64 start, end, tmin, tmax;
> +
> +	tmax = buf_max;
> +	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
> +		start = emem->ranges[i].start;
> +		end = emem->ranges[i].end;
> +
> +		if (start > tmax)
> +			continue;
> +
> +		if (end < tmax) {
> +			tmin = (end < buf_min ? buf_min : end + 1);
> +			ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
> +			if (!ret)
> +				return 0;
> +		}
> +
> +		tmax = start - 1;
> +
> +		if (tmax < buf_min) {
> +			ret = err;
> +			break;
> +		}
> +		ret = 0;
> +	}
> +
> +	if (!ret) {
> +		tmin = buf_min;
> +		ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
> + *                                   suitable buffer with bottom up approach.
> + * @kbuf:                            Buffer contents and memory parameters.
> + * @buf_min:                         Minimum address for the buffer.
> + * @buf_max:                         Maximum address for the buffer.
> + * @emem:                            Exclude memory ranges.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
> +					   u64 buf_min, u64 buf_max,
> +					   const struct crash_mem *emem)
> +{
> +	int i, ret = 0, err = -EADDRNOTAVAIL;
> +	u64 start, end, tmin, tmax;
> +
> +	tmin = buf_min;
> +	for (i = 0; i < emem->nr_ranges; i++) {
> +		start = emem->ranges[i].start;
> +		end = emem->ranges[i].end;
> +
> +		if (end < tmin)
> +			continue;
> +
> +		if (start > tmin) {
> +			tmax = (start > buf_max ? buf_max : start - 1);
> +			ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
> +			if (!ret)
> +				return 0;
> +		}
> +
> +		tmin = end + 1;
> +
> +		if (tmin > buf_max) {
> +			ret = err;
> +			break;
> +		}
> +		ret = 0;
> +	}
> +
> +	if (!ret) {
> +		tmax = buf_max;
> +		ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * kexec_locate_mem_hole_ppc64 - Skip special memory regions like rtas,
> + *                               tce-table, opal, reserved-ranges & such
> + *                               (exclude memory ranges) as they can't be
> + *                               used for kexec segment buffer. Use buf_min
> + *                               & buf_max fields in kexec_buf structure to
> + *                               skip regions. Sets kbuf->mem when a
> + *                               suitable memory hole is found.
> + * @kbuf:                        Buffer contents and memory parameters.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int kexec_locate_mem_hole_ppc64(struct kexec_buf *kbuf)
> +{
> +	struct crash_mem **emem;
> +	u64 buf_min, buf_max;
> +	int ret;
> +
> +	/*
> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
> +	 * kexec_file_load syscall
> +	 */
> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
> +		return 0;
Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
there is no requirement for @exclude_ranges.


I guess you have a design for future. If not true, then it is better to
fold the condition "if (kbuf->image->type != KEXEC_TYPE_CRASH)" into the
caller and rename this function to better distinguish use cases between
kexec and kdump

Thanks,
Pingfan
> +
> +	/* Look up the exclude ranges list while locating the memory hole */
> +	emem = &(kbuf->image->arch.exclude_ranges);
> +	if (!(*emem) || ((*emem)->nr_ranges == 0)) {
> +		pr_warn("No exclude range list. Using the default locate mem hole method\n");
> +		return 0;
> +	}
> +
> +	/* Ensure minimum alignment needed for segments. */
> +	kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
> +	kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
> +
> +	/* Segments for kdump kernel should be within crashkernel region */
> +	buf_min = (kbuf->buf_min < crashk_res.start ?
> +		   crashk_res.start : kbuf->buf_min);
> +	buf_max = (kbuf->buf_max > crashk_res.end ?
> +		   crashk_res.end : kbuf->buf_max);
> +
> +	if (buf_min > buf_max) {
> +		pr_err("Invalid buffer min and/or max values\n");
> +		return -EINVAL;
> +	}
> +
> +	if (kbuf->top_down)
> +		ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
> +						     *emem);
> +	else
> +		ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
> +						      *emem);
> +
> +	/* Add the buffer allocated to the exclude list for the next lookup */
> +	if (!ret) {
> +		add_mem_range(emem, kbuf->mem, kbuf->memsz);
> +		sort_memory_ranges(*emem);
> +	}
> +	return ret;
> +}
> +
> +/**
>   * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
>   *                         variables and call setup_purgatory() to initialize
>   *                         common global variable.
> @@ -89,6 +332,29 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>  }
>  
>  /**
> + * arch_kexec_add_buffer - Locate memory hole before calling kexec_add_buffer().
> + *                         All kexec_add_buffer() callers should use this
> + *                         function instead.
> + * @kbuf:                  Buffer contents and memory parameters.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +int arch_kexec_add_buffer(struct kexec_buf *kbuf)
> +{
> +	int ret;
> +
> +	ret = kexec_locate_mem_hole_ppc64(kbuf);
> +	if (ret)
> +		goto out;
> +
> +	ret = __kexec_add_buffer(kbuf);
> +out:
> +	if (ret)
> +		pr_err("Failed to add buffer of size %lu\n", kbuf->memsz);
> +	return ret;
> +}
> +
> +/**
>   * arch_kexec_kernel_image_probe - Does additional handling needed to setup
>   *                                 kexec segments.
>   * @image:                         kexec image being loaded.
> @@ -100,9 +366,31 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>  int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
>  				  unsigned long buf_len)
>  {
> -	/* We don't support crash kernels yet. */
> -	if (image->type == KEXEC_TYPE_CRASH)
> +	if (image->type == KEXEC_TYPE_CRASH) {
> +		int ret;
> +
> +		/* Get exclude memory ranges needed for setting up kdump segments */
> +		ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
> +		if (ret)
> +			pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
> +		/* Return this until all changes for panic kernel are in */
>  		return -EOPNOTSUPP;
> +	}
>  
>  	return kexec_image_probe_default(image, buf, buf_len);
>  }
> +
> +/**
> + * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
> + *                                      while loading the image.
> + * @image:                              kexec image being loaded.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +int arch_kimage_file_post_load_cleanup(struct kimage *image)
> +{
> +	kfree(image->arch.exclude_ranges);
> +	image->arch.exclude_ranges = NULL;
> +
> +	return kexec_image_post_load_cleanup_default(image);
> +}
> 
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
>
Hari Bathini June 29, 2020, 5:55 a.m. UTC | #2
On 28/06/20 7:44 am, piliu wrote:
> Hi Hari,

Hi Pingfan,

> 
> After a quick through for this series, I have a few question/comment on
> this patch for the time being. Pls see comment inline.
> 
> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>> crashkernel region could have an overlap with special memory regions
>> like  opal, rtas, tce-table & such. These regions are referred to as
>> exclude memory ranges. Setup this ranges during image probe in order
>> to avoid them while finding the buffer for different kdump segments.

[...]

>> +	/*
>> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
>> +	 * kexec_file_load syscall
>> +	 */
>> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
>> +		return 0;
> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
> there is no requirement for @exclude_ranges.

The ranges like rtas, opal are loaded by f/w. They almost always overlap with
crashkernel region. So, @exclude_ranges is required to support kdump.

> I guess you have a design for future. If not true, then it is better to
> fold the condition "if (kbuf->image->type != KEXEC_TYPE_CRASH)" into the
> caller and rename this function to better distinguish use cases between
> kexec and kdump

Yeah, this condition will be folded. I have a follow-up patch for that explaining
why kexec case should also be folded. Will try to add that to this series for v2.

Thanks
Hari
Pingfan Liu June 30, 2020, 3:30 a.m. UTC | #3
On 06/29/2020 01:55 PM, Hari Bathini wrote:
> 
> 
> On 28/06/20 7:44 am, piliu wrote:
>> Hi Hari,
> 
> Hi Pingfan,
> 
>>
>> After a quick through for this series, I have a few question/comment on
>> this patch for the time being. Pls see comment inline.
>>
>> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>>> crashkernel region could have an overlap with special memory regions
>>> like  opal, rtas, tce-table & such. These regions are referred to as
>>> exclude memory ranges. Setup this ranges during image probe in order
>>> to avoid them while finding the buffer for different kdump segments.
> 
> [...]
> 
>>> +	/*
>>> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
>>> +	 * kexec_file_load syscall
>>> +	 */
>>> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
>>> +		return 0;
>> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
>> there is no requirement for @exclude_ranges.
> 
> The ranges like rtas, opal are loaded by f/w. They almost always overlap with
> crashkernel region. So, @exclude_ranges is required to support kdump.
f/w passes rtas/opal as service, then must f/w mark these ranges as
fdt_reserved_mem in order to make kernel aware not to use these ranges?
Otherwise kernel memory allocation besides kdump can also overwrite
these ranges.

Hmm, revisiting reserve_crashkernel(). It seems not to take any reserved
memory into consider except kernel text. Could it work based on memblock
allocator?

Thanks,
Pingfan
> 
>> I guess you have a design for future. If not true, then it is better to
>> fold the condition "if (kbuf->image->type != KEXEC_TYPE_CRASH)" into the
>> caller and rename this function to better distinguish use cases between
>> kexec and kdump
> 
> Yeah, this condition will be folded. I have a follow-up patch for that explaining
> why kexec case should also be folded. Will try to add that to this series for v2.
> 
> Thanks
> Hari
>
Hari Bathini June 30, 2020, 6:10 a.m. UTC | #4
On 30/06/20 9:00 am, piliu wrote:
> 
> 
> On 06/29/2020 01:55 PM, Hari Bathini wrote:
>>
>>
>> On 28/06/20 7:44 am, piliu wrote:
>>> Hi Hari,
>>
>> Hi Pingfan,
>>
>>>
>>> After a quick through for this series, I have a few question/comment on
>>> this patch for the time being. Pls see comment inline.
>>>
>>> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>>>> crashkernel region could have an overlap with special memory regions
>>>> like  opal, rtas, tce-table & such. These regions are referred to as
>>>> exclude memory ranges. Setup this ranges during image probe in order
>>>> to avoid them while finding the buffer for different kdump segments.
>>
>> [...]
>>
>>>> +	/*
>>>> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
>>>> +	 * kexec_file_load syscall
>>>> +	 */
>>>> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
>>>> +		return 0;
>>> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
>>> there is no requirement for @exclude_ranges.
>>
>> The ranges like rtas, opal are loaded by f/w. They almost always overlap with
>> crashkernel region. So, @exclude_ranges is required to support kdump.
> f/w passes rtas/opal as service, then must f/w mark these ranges as
> fdt_reserved_mem in order to make kernel aware not to use these ranges?

It does. Actually, reserve_map + reserved-ranges are reserved as soon as
memblock allocator is ready but not before crashkernel reservation.
Check early_reserve_mem() call in kernel/prom.c

> Otherwise kernel memory allocation besides kdump can also overwrite
> these ranges.> 
> Hmm, revisiting reserve_crashkernel(). It seems not to take any reserved
> memory into consider except kernel text. Could it work based on memblock
> allocator?

So, kdump could possibly overwrite these regions which is why an exclude
range list is needed. Same thing was done in kexec-tools as well.

Thanks
Hari
Pingfan Liu June 30, 2020, 8:13 a.m. UTC | #5
On 06/30/2020 02:10 PM, Hari Bathini wrote:
> 
> 
> On 30/06/20 9:00 am, piliu wrote:
>>
>>
>> On 06/29/2020 01:55 PM, Hari Bathini wrote:
>>>
>>>
>>> On 28/06/20 7:44 am, piliu wrote:
>>>> Hi Hari,
>>>
>>> Hi Pingfan,
>>>
>>>>
>>>> After a quick through for this series, I have a few question/comment on
>>>> this patch for the time being. Pls see comment inline.
>>>>
>>>> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>>>>> crashkernel region could have an overlap with special memory regions
>>>>> like  opal, rtas, tce-table & such. These regions are referred to as
>>>>> exclude memory ranges. Setup this ranges during image probe in order
>>>>> to avoid them while finding the buffer for different kdump segments.
>>>
>>> [...]
>>>
>>>>> +	/*
>>>>> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
>>>>> +	 * kexec_file_load syscall
>>>>> +	 */
>>>>> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
>>>>> +		return 0;
>>>> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
>>>> there is no requirement for @exclude_ranges.
>>>
>>> The ranges like rtas, opal are loaded by f/w. They almost always overlap with
>>> crashkernel region. So, @exclude_ranges is required to support kdump.
>> f/w passes rtas/opal as service, then must f/w mark these ranges as
>> fdt_reserved_mem in order to make kernel aware not to use these ranges?
> 
> It does. Actually, reserve_map + reserved-ranges are reserved as soon as
> memblock allocator is ready but not before crashkernel reservation.
> Check early_reserve_mem() call in kernel/prom.c
> 
>> Otherwise kernel memory allocation besides kdump can also overwrite
>> these ranges.> 
>> Hmm, revisiting reserve_crashkernel(). It seems not to take any reserved
>> memory into consider except kernel text. Could it work based on memblock
>> allocator?
> 
> So, kdump could possibly overwrite these regions which is why an exclude
> range list is needed. Same thing was done in kexec-tools as well.
OK, got it.

Thanks,
Pingfan
> 
> Thanks
> Hari
> 
> _______________________________________________
> kexec mailing list
> kexec@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
>
Dave Young July 1, 2020, 7:40 a.m. UTC | #6
Hi Hari,
On 06/27/20 at 12:35am, Hari Bathini wrote:
> crashkernel region could have an overlap with special memory regions
> like  opal, rtas, tce-table & such. These regions are referred to as
> exclude memory ranges. Setup this ranges during image probe in order
> to avoid them while finding the buffer for different kdump segments.
> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
> accounting for these ranges. Also, override arch_kexec_add_buffer()
> to locate a memory hole & later call __kexec_add_buffer() function
> with kbuf->mem set to skip the generic locate memory hole lookup.
> 
> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
>  arch/powerpc/include/asm/kexec.h           |    7 -
>  arch/powerpc/kexec/elf_64.c                |    7 +
>  arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
>  4 files changed, 312 insertions(+), 4 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
> 
[snip]
>  /**
> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
> + *                             regions like opal/rtas, tce-table, initrd,
> + *                             kernel, htab which should be avoided while
> + *                             setting up kexec load segments.
> + * @mem_ranges:                Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
> +{
> +	int ret;
> +
> +	ret = add_tce_mem_ranges(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_initrd_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_htab_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_kernel_mem_range(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_rtas_mem_range(mem_ranges, false);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_opal_mem_range(mem_ranges, false);
> +	if (ret)
> +		goto out;
> +
> +	ret = add_reserved_ranges(mem_ranges);
> +	if (ret)
> +		goto out;
> +
> +	/* exclude memory ranges should be sorted for easy lookup */
> +	sort_memory_ranges(*mem_ranges);
> +out:
> +	if (ret)
> +		pr_err("Failed to setup exclude memory ranges\n");
> +	return ret;
> +}

I'm confused about the "overlap with crashkernel memory", does that mean
those normal kernel used memory could be put in crashkernel reserved
memory range?  If so why can't just skip those areas while crashkernel
doing the reservation?

Thanks
Dave
Pingfan Liu July 1, 2020, 12:53 p.m. UTC | #7
On 07/01/2020 03:40 PM, Dave Young wrote:
> Hi Hari,
> On 06/27/20 at 12:35am, Hari Bathini wrote:
>> crashkernel region could have an overlap with special memory regions
>> like  opal, rtas, tce-table & such. These regions are referred to as
>> exclude memory ranges. Setup this ranges during image probe in order
>> to avoid them while finding the buffer for different kdump segments.
>> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
>> accounting for these ranges. Also, override arch_kexec_add_buffer()
>> to locate a memory hole & later call __kexec_add_buffer() function
>> with kbuf->mem set to skip the generic locate memory hole lookup.
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
>>  arch/powerpc/include/asm/kexec.h           |    7 -
>>  arch/powerpc/kexec/elf_64.c                |    7 +
>>  arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
>>  4 files changed, 312 insertions(+), 4 deletions(-)
>>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
>>
> [snip]
>>  /**
>> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
>> + *                             regions like opal/rtas, tce-table, initrd,
>> + *                             kernel, htab which should be avoided while
>> + *                             setting up kexec load segments.
>> + * @mem_ranges:                Range list to add the memory ranges to.
>> + *
>> + * Returns 0 on success, negative errno on error.
>> + */
>> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
>> +{
>> +	int ret;
>> +
>> +	ret = add_tce_mem_ranges(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_initrd_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_htab_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_kernel_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_rtas_mem_range(mem_ranges, false);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_opal_mem_range(mem_ranges, false);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_reserved_ranges(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	/* exclude memory ranges should be sorted for easy lookup */
>> +	sort_memory_ranges(*mem_ranges);
>> +out:
>> +	if (ret)
>> +		pr_err("Failed to setup exclude memory ranges\n");
>> +	return ret;
>> +}
> 
> I'm confused about the "overlap with crashkernel memory", does that mean
> those normal kernel used memory could be put in crashkernel reserved
> memory range?  If so why can't just skip those areas while crashkernel
> doing the reservation?
I raised the same question in another mail. As Hari's answer, "kexec -p"
skips these ranges in user space. And the same logic should be done in
"kexec -s -p"

Regards,
Pingfan
Hari Bathini July 1, 2020, 6:18 p.m. UTC | #8
On 01/07/20 1:10 pm, Dave Young wrote:
> Hi Hari,
> On 06/27/20 at 12:35am, Hari Bathini wrote:
>> crashkernel region could have an overlap with special memory regions
>> like  opal, rtas, tce-table & such. These regions are referred to as
>> exclude memory ranges. Setup this ranges during image probe in order
>> to avoid them while finding the buffer for different kdump segments.
>> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
>> accounting for these ranges. Also, override arch_kexec_add_buffer()
>> to locate a memory hole & later call __kexec_add_buffer() function
>> with kbuf->mem set to skip the generic locate memory hole lookup.
>>
>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>> ---
>>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
>>  arch/powerpc/include/asm/kexec.h           |    7 -
>>  arch/powerpc/kexec/elf_64.c                |    7 +
>>  arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
>>  4 files changed, 312 insertions(+), 4 deletions(-)
>>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
>>
> [snip]
>>  /**
>> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
>> + *                             regions like opal/rtas, tce-table, initrd,
>> + *                             kernel, htab which should be avoided while
>> + *                             setting up kexec load segments.
>> + * @mem_ranges:                Range list to add the memory ranges to.
>> + *
>> + * Returns 0 on success, negative errno on error.
>> + */
>> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
>> +{
>> +	int ret;
>> +
>> +	ret = add_tce_mem_ranges(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_initrd_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_htab_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_kernel_mem_range(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_rtas_mem_range(mem_ranges, false);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_opal_mem_range(mem_ranges, false);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = add_reserved_ranges(mem_ranges);
>> +	if (ret)
>> +		goto out;
>> +
>> +	/* exclude memory ranges should be sorted for easy lookup */
>> +	sort_memory_ranges(*mem_ranges);
>> +out:
>> +	if (ret)
>> +		pr_err("Failed to setup exclude memory ranges\n");
>> +	return ret;
>> +}
> 
> I'm confused about the "overlap with crashkernel memory", does that mean
> those normal kernel used memory could be put in crashkernel reserved

There are regions that could overlap with crashkernel region but they are
not normal kernel used memory though. These are regions that kernel and/or
f/w chose to place at a particular address for real mode accessibility
and/or memory layout between kernel & f/w kind of thing.

> memory range?  If so why can't just skip those areas while crashkernel
> doing the reservation?

crashkernel region has a dependency to be in the first memory block for it
to be accessible in real mode. Accommodating this requirement while addressing
other requirements would mean something like what we have now. A list of
possible special memory regions in crashkernel region to take care of.

I have plans to split crashkernel region into low & high to have exclusive
regions for crashkernel, even if that means to have two of them. But that
is for another day with its own set of complexities to deal with...

Thanks
Hari
Dave Young July 2, 2020, 11:54 a.m. UTC | #9
On 07/01/20 at 11:48pm, Hari Bathini wrote:
> 
> 
> On 01/07/20 1:10 pm, Dave Young wrote:
> > Hi Hari,
> > On 06/27/20 at 12:35am, Hari Bathini wrote:
> >> crashkernel region could have an overlap with special memory regions
> >> like  opal, rtas, tce-table & such. These regions are referred to as
> >> exclude memory ranges. Setup this ranges during image probe in order
> >> to avoid them while finding the buffer for different kdump segments.
> >> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
> >> accounting for these ranges. Also, override arch_kexec_add_buffer()
> >> to locate a memory hole & later call __kexec_add_buffer() function
> >> with kbuf->mem set to skip the generic locate memory hole lookup.
> >>
> >> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
> >> ---
> >>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
> >>  arch/powerpc/include/asm/kexec.h           |    7 -
> >>  arch/powerpc/kexec/elf_64.c                |    7 +
> >>  arch/powerpc/kexec/file_load_64.c          |  292 ++++++++++++++++++++++++++++
> >>  4 files changed, 312 insertions(+), 4 deletions(-)
> >>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
> >>
> > [snip]
> >>  /**
> >> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
> >> + *                             regions like opal/rtas, tce-table, initrd,
> >> + *                             kernel, htab which should be avoided while
> >> + *                             setting up kexec load segments.
> >> + * @mem_ranges:                Range list to add the memory ranges to.
> >> + *
> >> + * Returns 0 on success, negative errno on error.
> >> + */
> >> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = add_tce_mem_ranges(mem_ranges);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_initrd_mem_range(mem_ranges);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_htab_mem_range(mem_ranges);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_kernel_mem_range(mem_ranges);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_rtas_mem_range(mem_ranges, false);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_opal_mem_range(mem_ranges, false);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	ret = add_reserved_ranges(mem_ranges);
> >> +	if (ret)
> >> +		goto out;
> >> +
> >> +	/* exclude memory ranges should be sorted for easy lookup */
> >> +	sort_memory_ranges(*mem_ranges);
> >> +out:
> >> +	if (ret)
> >> +		pr_err("Failed to setup exclude memory ranges\n");
> >> +	return ret;
> >> +}
> > 
> > I'm confused about the "overlap with crashkernel memory", does that mean
> > those normal kernel used memory could be put in crashkernel reserved
> 
> There are regions that could overlap with crashkernel region but they are
> not normal kernel used memory though. These are regions that kernel and/or
> f/w chose to place at a particular address for real mode accessibility
> and/or memory layout between kernel & f/w kind of thing.
> 
> > memory range?  If so why can't just skip those areas while crashkernel
> > doing the reservation?
> 
> crashkernel region has a dependency to be in the first memory block for it
> to be accessible in real mode. Accommodating this requirement while addressing
> other requirements would mean something like what we have now. A list of
> possible special memory regions in crashkernel region to take care of.
> 
> I have plans to split crashkernel region into low & high to have exclusive
> regions for crashkernel, even if that means to have two of them. But that
> is for another day with its own set of complexities to deal with...

Ok, I was not aware the powerpc crashkernel reservation is not
dynamically reserved.  But seems powerpc need those tricks at least for
the time being like you said.

Thanks
Dave
Dave Young July 2, 2020, 11:59 a.m. UTC | #10
> > I'm confused about the "overlap with crashkernel memory", does that mean
> > those normal kernel used memory could be put in crashkernel reserved
> > memory range?  If so why can't just skip those areas while crashkernel
> > doing the reservation?
> I raised the same question in another mail. As Hari's answer, "kexec -p"
> skips these ranges in user space. And the same logic should be done in
> "kexec -s -p"

See it, thanks!  The confusion also applied to the userspace
implementation though.  Seems they have to be special cases because of 
the powerpc crashkernel reservation implemtation in kernel limitation

Thanks
Dave
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index 0000000..3596c25
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,10 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
+#define _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
+
+/* min & max addresses for kdump load segments */
+#define KDUMP_BUF_MIN		(crashk_res.start)
+#define KDUMP_BUF_MAX		((crashk_res.end < ppc64_rma_size) ? \
+				 crashk_res.end : (ppc64_rma_size - 1))
+
+#endif /* __ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 7008ea1..bf47a01 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -100,14 +100,16 @@  void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
-#ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
 
 struct kimage_arch {
+	struct crash_mem *exclude_ranges;
+
+#ifdef CONFIG_IMA_KEXEC
 	phys_addr_t ima_buffer_addr;
 	size_t ima_buffer_size;
-};
 #endif
+};
 
 int setup_purgatory(struct kimage *image, const void *slave_code,
 		    const void *fdt, unsigned long kernel_load_addr,
@@ -125,6 +127,7 @@  int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 			unsigned long initrd_load_addr,
 			unsigned long initrd_len, const char *cmdline);
 #endif /* CONFIG_PPC64 */
+
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 23ad04c..c695f94 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -22,6 +22,7 @@ 
 #include <linux/of_fdt.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <asm/crashdump-ppc64.h>
 
 static void *elf64_load(struct kimage *image, char *kernel_buf,
 			unsigned long kernel_len, char *initrd,
@@ -46,6 +47,12 @@  static void *elf64_load(struct kimage *image, char *kernel_buf,
 	if (ret)
 		goto out;
 
+	if (image->type == KEXEC_TYPE_CRASH) {
+		/* min & max buffer values for kdump case */
+		kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
+		kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;
+	}
+
 	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
 	if (ret)
 		goto out;
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index e6bff960..f1d7160 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -17,6 +17,8 @@ 
 #include <linux/kexec.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
 	&kexec_elf64_ops,
@@ -24,6 +26,247 @@  const struct kexec_file_ops * const kexec_file_loaders[] = {
 };
 
 /**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ *                             regions like opal/rtas, tce-table, initrd,
+ *                             kernel, htab which should be avoided while
+ *                             setting up kexec load segments.
+ * @mem_ranges:                Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_initrd_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_htab_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_kernel_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges, false);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges, false);
+	if (ret)
+		goto out;
+
+	ret = add_reserved_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* exclude memory ranges should be sorted for easy lookup */
+	sort_memory_ranges(*mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup exclude memory ranges\n");
+	return ret;
+}
+
+/**
+ * __locate_mem_hole_ppc64 - Tests if the memory hole between buf_min & buf_max
+ *                           is large enough for the buffer. If true, sets
+ *                           kbuf->mem to the buffer.
+ * @kbuf:                    Buffer contents and memory parameters.
+ * @buf_min:                 Minimum address for the buffer.
+ * @buf_max:                 Maximum address for the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __locate_mem_hole_ppc64(struct kexec_buf *kbuf,
+				   u64 buf_min, u64 buf_max)
+{
+	int ret = -EADDRNOTAVAIL;
+
+	buf_min = ALIGN(buf_min, kbuf->buf_align);
+
+	if (buf_min < buf_max &&
+	    (buf_max - buf_min + 1) >= kbuf->memsz) {
+		/*
+		 * Suitable memory range found. Set kbuf->mem here to skip
+		 * locate memory hole routine in __kexec_add_buffer() call.
+		 */
+		ret = 0;
+		if (kbuf->top_down)
+			kbuf->mem = ALIGN_DOWN(buf_max - kbuf->memsz + 1,
+					       kbuf->buf_align);
+		else
+			kbuf->mem = buf_min;
+	}
+
+	return ret;
+}
+
+/**
+ * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
+ *                                  suitable buffer with top down approach.
+ * @kbuf:                           Buffer contents and memory parameters.
+ * @buf_min:                        Minimum address for the buffer.
+ * @buf_max:                        Maximum address for the buffer.
+ * @emem:                           Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
+					  u64 buf_min, u64 buf_max,
+					  const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmax = buf_max;
+	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (start > tmax)
+			continue;
+
+		if (end < tmax) {
+			tmin = (end < buf_min ? buf_min : end + 1);
+			ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmax = start - 1;
+
+		if (tmax < buf_min) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmin = buf_min;
+		ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
+ *                                   suitable buffer with bottom up approach.
+ * @kbuf:                            Buffer contents and memory parameters.
+ * @buf_min:                         Minimum address for the buffer.
+ * @buf_max:                         Maximum address for the buffer.
+ * @emem:                            Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
+					   u64 buf_min, u64 buf_max,
+					   const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmin = buf_min;
+	for (i = 0; i < emem->nr_ranges; i++) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (end < tmin)
+			continue;
+
+		if (start > tmin) {
+			tmax = (start > buf_max ? buf_max : start - 1);
+			ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmin = end + 1;
+
+		if (tmin > buf_max) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmax = buf_max;
+		ret = __locate_mem_hole_ppc64(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * kexec_locate_mem_hole_ppc64 - Skip special memory regions like rtas,
+ *                               tce-table, opal, reserved-ranges & such
+ *                               (exclude memory ranges) as they can't be
+ *                               used for kexec segment buffer. Use buf_min
+ *                               & buf_max fields in kexec_buf structure to
+ *                               skip regions. Sets kbuf->mem when a
+ *                               suitable memory hole is found.
+ * @kbuf:                        Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int kexec_locate_mem_hole_ppc64(struct kexec_buf *kbuf)
+{
+	struct crash_mem **emem;
+	u64 buf_min, buf_max;
+	int ret;
+
+	/*
+	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
+	 * kexec_file_load syscall
+	 */
+	if (kbuf->image->type != KEXEC_TYPE_CRASH)
+		return 0;
+
+	/* Look up the exclude ranges list while locating the memory hole */
+	emem = &(kbuf->image->arch.exclude_ranges);
+	if (!(*emem) || ((*emem)->nr_ranges == 0)) {
+		pr_warn("No exclude range list. Using the default locate mem hole method\n");
+		return 0;
+	}
+
+	/* Ensure minimum alignment needed for segments. */
+	kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
+	kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
+
+	/* Segments for kdump kernel should be within crashkernel region */
+	buf_min = (kbuf->buf_min < crashk_res.start ?
+		   crashk_res.start : kbuf->buf_min);
+	buf_max = (kbuf->buf_max > crashk_res.end ?
+		   crashk_res.end : kbuf->buf_max);
+
+	if (buf_min > buf_max) {
+		pr_err("Invalid buffer min and/or max values\n");
+		return -EINVAL;
+	}
+
+	if (kbuf->top_down)
+		ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
+						     *emem);
+	else
+		ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
+						      *emem);
+
+	/* Add the buffer allocated to the exclude list for the next lookup */
+	if (!ret) {
+		add_mem_range(emem, kbuf->mem, kbuf->memsz);
+		sort_memory_ranges(*emem);
+	}
+	return ret;
+}
+
+/**
  * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
  *                         variables and call setup_purgatory() to initialize
  *                         common global variable.
@@ -89,6 +332,29 @@  int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 }
 
 /**
+ * arch_kexec_add_buffer - Locate memory hole before calling kexec_add_buffer().
+ *                         All kexec_add_buffer() callers should use this
+ *                         function instead.
+ * @kbuf:                  Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_add_buffer(struct kexec_buf *kbuf)
+{
+	int ret;
+
+	ret = kexec_locate_mem_hole_ppc64(kbuf);
+	if (ret)
+		goto out;
+
+	ret = __kexec_add_buffer(kbuf);
+out:
+	if (ret)
+		pr_err("Failed to add buffer of size %lu\n", kbuf->memsz);
+	return ret;
+}
+
+/**
  * arch_kexec_kernel_image_probe - Does additional handling needed to setup
  *                                 kexec segments.
  * @image:                         kexec image being loaded.
@@ -100,9 +366,31 @@  int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
 				  unsigned long buf_len)
 {
-	/* We don't support crash kernels yet. */
-	if (image->type == KEXEC_TYPE_CRASH)
+	if (image->type == KEXEC_TYPE_CRASH) {
+		int ret;
+
+		/* Get exclude memory ranges needed for setting up kdump segments */
+		ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+		if (ret)
+			pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
+		/* Return this until all changes for panic kernel are in */
 		return -EOPNOTSUPP;
+	}
 
 	return kexec_image_probe_default(image, buf, buf_len);
 }
+
+/**
+ * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
+ *                                      while loading the image.
+ * @image:                              kexec image being loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	kfree(image->arch.exclude_ranges);
+	image->arch.exclude_ranges = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}