diff mbox series

[6/6] powerpc/rtas: constrain user region allocation to RMA

Message ID 20210114220004.1138993-7-nathanl@linux.ibm.com (mailing list archive)
State Superseded
Headers show
Series powerpc/rtas: miscellaneous cleanups, user region allocation | expand

Commit Message

Nathan Lynch Jan. 14, 2021, 10 p.m. UTC
Memory locations passed as arguments from the OS to RTAS usually need
to be addressable in 32-bit mode and must reside in the Real Mode
Area. On PAPR guests, the RMA starts at logical address 0 and is the
first logical memory block reported in the LPAR’s device tree.

On powerpc targets with RTAS, Linux makes available to user space a
region of memory suitable for arguments to be passed to RTAS via
sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
API during boot in order to ensure that it satisfies the requirements
described above.

With radix MMU, the upper limit supplied to the memblock allocation
can exceed the bounds of the first logical memory block, since
ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
a common size of the first memory block according to a small sample of
LPARs I have checked.) This leads to failures when user space invokes
an RTAS function that uses a work area, such as
ibm,configure-connector.

Alter the determination of the upper limit for rtas_rmo_buf's
allocation to consult the device tree directly, ensuring placement
within the RMA regardless of the MMU in use.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
 arch/powerpc/kernel/rtas.c | 80 +++++++++++++++++++++++++++++++-------
 1 file changed, 65 insertions(+), 15 deletions(-)

Comments

Alexey Kardashevskiy Jan. 15, 2021, 4:38 a.m. UTC | #1
On 15/01/2021 09:00, Nathan Lynch wrote:
> Memory locations passed as arguments from the OS to RTAS usually need
> to be addressable in 32-bit mode and must reside in the Real Mode
> Area. On PAPR guests, the RMA starts at logical address 0 and is the
> first logical memory block reported in the LPAR’s device tree.
> 
> On powerpc targets with RTAS, Linux makes available to user space a
> region of memory suitable for arguments to be passed to RTAS via
> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
> API during boot in order to ensure that it satisfies the requirements
> described above.
> 
> With radix MMU, the upper limit supplied to the memblock allocation
> can exceed the bounds of the first logical memory block, since
> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
> a common size of the first memory block according to a small sample of
> LPARs I have checked.) This leads to failures when user space invokes
> an RTAS function that uses a work area, such as
> ibm,configure-connector.
> 
> Alter the determination of the upper limit for rtas_rmo_buf's
> allocation to consult the device tree directly, ensuring placement
> within the RMA regardless of the MMU in use.

Can we tie this with RTAS (which also needs to be in RMA) and simply add 
extra 64K in prom_instantiate_rtas() and advertise this address 
(ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do 
not need this RMO area before that point.

And probably do the same with per-cpu RTAS argument structures mentioned 
in the cover letter?



> 
> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
> ---
>   arch/powerpc/kernel/rtas.c | 80 +++++++++++++++++++++++++++++++-------
>   1 file changed, 65 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index da65faadbbb2..98dfb112f4df 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -1166,6 +1166,70 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
>   	return 0;
>   }
>   
> +/*
> + * Memory locations passed to RTAS must be in the RMA as described by
> + * the range in /memory@0.
> + */
> +static phys_addr_t rtas_arg_addr_limit(void)
> +{
> +	unsigned int addr_cells;
> +	unsigned int size_cells;
> +	struct device_node *np;
> +	const __be32 *prop;
> +	u64 limit;
> +	u64 base;
> +
> +	/* RTAS is instantiated in 32-bit mode. */
> +	limit = 1ULL << 32;
> +
> +	/* Account for mem=. */
> +	if (memory_limit != 0)
> +		limit = min(limit, memory_limit);
> +
> +	np = of_find_node_by_path("/memory@0");
> +	if (!np)
> +		goto out;
> +
> +	prop = of_get_property(np, "reg", NULL);
> +	if (!prop)
> +		goto put;
> +
> +	addr_cells = of_n_addr_cells(np);
> +	base = of_read_number(prop, addr_cells);
> +	prop += addr_cells;
> +	size_cells = of_n_size_cells(np);
> +	limit = min(limit, of_read_number(prop, size_cells));
> +put:
> +	of_node_put(np);
> +out:
> +	pr_debug("%s: base = %#llx limit = %#llx", __func__, base, limit);
> +
> +	return limit;
> +}
> +
> +static void __init rtas_user_region_setup(void)
> +{
> +	phys_addr_t limit, align, size;
> +
> +	limit = rtas_arg_addr_limit();
> +	size = RTAS_USER_REGION_SIZE;
> +
> +	/*
> +	 * Although work areas need only 4KB alignment, user space
> +	 * accesses this region via mmap so it must be placed on a
> +	 * page boundary.
> +	 */
> +	align = PAGE_SIZE;
> +
> +	rtas_rmo_buf = memblock_phys_alloc_range(size, align, 0, limit);
> +	if (rtas_rmo_buf == 0) {
> +		panic("Failed to allocate %llu bytes for user region below %pa\n",
> +		      size, &limit);
> +	}
> +
> +	pr_debug("RTAS user region allocated at %pa\n", &rtas_rmo_buf);
> +}
> +
>   /*
>    * Call early during boot, before mem init, to retrieve the RTAS
>    * information from the device-tree and allocate the RMO buffer for userland
> @@ -1173,7 +1237,6 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
>    */
>   void __init rtas_initialize(void)
>   {
> -	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
>   	u32 base, size, entry;
>   	int no_base, no_size, no_entry;
>   
> @@ -1197,23 +1260,10 @@ void __init rtas_initialize(void)
>   	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
>   	rtas.entry = no_entry ? rtas.base : entry;
>   
> -	/* If RTAS was found, allocate the RMO buffer for it and look for
> -	 * the stop-self token if any
> -	 */
> -#ifdef CONFIG_PPC64
> -	if (firmware_has_feature(FW_FEATURE_LPAR))
> -		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
> -#endif
> -	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
> -						 0, rtas_region);
> -	if (!rtas_rmo_buf)
> -		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
> -		      PAGE_SIZE, &rtas_region);
> -
>   #ifdef CONFIG_RTAS_ERROR_LOGGING
>   	rtas_last_error_token = rtas_token("rtas-last-error");
>   #endif
> -
> +	rtas_user_region_setup();
>   	rtas_syscall_filter_init();
>   }
>   
>
Nathan Lynch Jan. 15, 2021, 3:38 p.m. UTC | #2
Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> On 15/01/2021 09:00, Nathan Lynch wrote:
>> Memory locations passed as arguments from the OS to RTAS usually need
>> to be addressable in 32-bit mode and must reside in the Real Mode
>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>> first logical memory block reported in the LPAR’s device tree.
>> 
>> On powerpc targets with RTAS, Linux makes available to user space a
>> region of memory suitable for arguments to be passed to RTAS via
>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>> API during boot in order to ensure that it satisfies the requirements
>> described above.
>> 
>> With radix MMU, the upper limit supplied to the memblock allocation
>> can exceed the bounds of the first logical memory block, since
>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>> a common size of the first memory block according to a small sample of
>> LPARs I have checked.) This leads to failures when user space invokes
>> an RTAS function that uses a work area, such as
>> ibm,configure-connector.
>> 
>> Alter the determination of the upper limit for rtas_rmo_buf's
>> allocation to consult the device tree directly, ensuring placement
>> within the RMA regardless of the MMU in use.
>
> Can we tie this with RTAS (which also needs to be in RMA) and simply add 
> extra 64K in prom_instantiate_rtas() and advertise this address 
> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do 
> not need this RMO area before that point.

Can you explain more about what advantage that would bring? I'm not
seeing it. It's a more significant change than what I've written
here. Would it interact well with kexec?

> And probably do the same with per-cpu RTAS argument structures mentioned 
> in the cover letter?

I don't think so, since those need to be allocated with the pacas and
limited to the maximum possible CPUs, which is discovered by the kernel
much later.

But maybe I misunderstand what you're suggesting.
Alexey Kardashevskiy Jan. 18, 2021, 4:12 a.m. UTC | #3
On 16/01/2021 02:38, Nathan Lynch wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>> Memory locations passed as arguments from the OS to RTAS usually need
>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>> first logical memory block reported in the LPAR’s device tree.
>>>
>>> On powerpc targets with RTAS, Linux makes available to user space a
>>> region of memory suitable for arguments to be passed to RTAS via
>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>> API during boot in order to ensure that it satisfies the requirements
>>> described above.
>>>
>>> With radix MMU, the upper limit supplied to the memblock allocation
>>> can exceed the bounds of the first logical memory block, since
>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>> a common size of the first memory block according to a small sample of
>>> LPARs I have checked.) This leads to failures when user space invokes
>>> an RTAS function that uses a work area, such as
>>> ibm,configure-connector.
>>>
>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>> allocation to consult the device tree directly, ensuring placement
>>> within the RMA regardless of the MMU in use.
>>
>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>> extra 64K in prom_instantiate_rtas() and advertise this address
>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>> not need this RMO area before that point.
> 
> Can you explain more about what advantage that would bring? I'm not
> seeing it. It's a more significant change than what I've written
> here.


We already allocate space for RTAS and (like RMO) it needs to be in RMA, 
and RMO is useless without RTAS. We can reuse RTAS allocation code for 
RMO like this:

===
diff --git a/arch/powerpc/kernel/prom_init.c 
b/arch/powerpc/kernel/prom_init.c
index e9d4eb6144e1..d9527d3e01d2 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1821,7 +1821,8 @@ static void __init prom_instantiate_rtas(void)
         if (size == 0)
                 return;

-       base = alloc_down(size, PAGE_SIZE, 0);
+       /* One page for RTAS, one for RMO */
+       base = alloc_down(size, PAGE_SIZE + PAGE_SIZE, 0);
         if (base == 0)
                 prom_panic("Could not allocate memory for RTAS\n");

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d126d71ea5bd..885d95cf4ed3 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1186,6 +1186,7 @@ void __init rtas_initialize(void)
         rtas.size = size;
         no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", 
&entry);
         rtas.entry = no_entry ? rtas.base : entry;
+       rtas_rmo_buf = rtas.base + PAGE_SIZE;

         /* If RTAS was found, allocate the RMO buffer for it and look for
          * the stop-self token if any
@@ -1196,11 +1197,6 @@ void __init rtas_initialize(void)
                 ibm_suspend_me_token = rtas_token("ibm,suspend-me");
         }
  #endif
-       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
-                                                0, rtas_region);
-       if (!rtas_rmo_buf)
-               panic("ERROR: RTAS: Failed to allocate %lx bytes below 
%pa\n",
-                     PAGE_SIZE, &rtas_region);
===


May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base", 
for clarity, as sharing symbols between prom and main kernel is a bit 
tricky.

The benefit is that we do not do the same thing   (== find 64K in RMA) 
in 2 different ways and if the RMO allocated my way is broken - we'll 
know it much sooner as RTAS itself will break too.


> Would it interact well with kexec?


Good point. For this, the easiest will be setting rtas-size in the FDT 
to the allocated RTAS space (PAGE_SIZE*2 with the hunk above applied). 
Probably.


>> And probably do the same with per-cpu RTAS argument structures mentioned
>> in the cover letter?
> 
> I don't think so, since those need to be allocated with the pacas and
> limited to the maximum possible CPUs, which is discovered by the kernel
> much later.


The first cell of /proc/device-tree/cpus/ibm,drc-indexes is the number 
of cores, it is there when RTAS is instantiated, we know SMT after 
"ibm,client-architecture-support" (if I remember correctly).


> But maybe I misunderstand what you're suggesting.


Usually it is me missing the bigger picture :)
Michael Ellerman Jan. 19, 2021, 9 a.m. UTC | #4
Nathan Lynch <nathanl@linux.ibm.com> writes:
> Memory locations passed as arguments from the OS to RTAS usually need
> to be addressable in 32-bit mode and must reside in the Real Mode
> Area. On PAPR guests, the RMA starts at logical address 0 and is the
> first logical memory block reported in the LPAR’s device tree.
>
> On powerpc targets with RTAS, Linux makes available to user space a
> region of memory suitable for arguments to be passed to RTAS via
> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
> API during boot in order to ensure that it satisfies the requirements
> described above.
>
> With radix MMU, the upper limit supplied to the memblock allocation
> can exceed the bounds of the first logical memory block, since
> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB.

Why does the size of the first memory block matter for radix?

The 1GB limit is sufficient to make it accessible by 32-bit code.

> (512MB is a common size of the first memory block according to a small sample of
> LPARs I have checked.)

That's the minimum we request, see prom_init.c:

	/* option vector 2: Open Firmware options supported */
	.vec2 = {
		.byte1 = OV2_REAL_MODE,
		.reserved = 0,
		.real_base = cpu_to_be32(0xffffffff),
		.real_size = cpu_to_be32(0xffffffff),
		.virt_base = cpu_to_be32(0xffffffff),
		.virt_size = cpu_to_be32(0xffffffff),
		.load_base = cpu_to_be32(0xffffffff),
		.min_rma = cpu_to_be32(512),		/* 512MB min RMA */

Since v4.12 in 2017.

cheers
Nathan Lynch Jan. 19, 2021, 9 p.m. UTC | #5
Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> Memory locations passed as arguments from the OS to RTAS usually need
>> to be addressable in 32-bit mode and must reside in the Real Mode
>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>> first logical memory block reported in the LPAR’s device tree.
>>
>> On powerpc targets with RTAS, Linux makes available to user space a
>> region of memory suitable for arguments to be passed to RTAS via
>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>> API during boot in order to ensure that it satisfies the requirements
>> described above.
>>
>> With radix MMU, the upper limit supplied to the memblock allocation
>> can exceed the bounds of the first logical memory block, since
>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB.
>
> Why does the size of the first memory block matter for radix?

Here is my understanding: in the platform architecture, the size of the
first memory block equals the RMA, regardless of the MMU mode. It just
so happens that when using radix, Linux can pass ibm,configure-connector
a work area address outside of the RMA because the allocation
constraints for the work area are computed differently. It would be
wrong of the OS to pass RTAS arguments outside of this region with hash
MMU as well.

> The 1GB limit is sufficient to make it accessible by 32-bit code.

But the requirement is that memory arguments passed to RTAS reside in
the RMA, which may be less than 1GB.


>> (512MB is a common size of the first memory block according to a small sample of
>> LPARs I have checked.)
>
> That's the minimum we request, see prom_init.c:
>
> 	/* option vector 2: Open Firmware options supported */
> 	.vec2 = {
> 		.byte1 = OV2_REAL_MODE,
> 		.reserved = 0,
> 		.real_base = cpu_to_be32(0xffffffff),
> 		.real_size = cpu_to_be32(0xffffffff),
> 		.virt_base = cpu_to_be32(0xffffffff),
> 		.virt_size = cpu_to_be32(0xffffffff),
> 		.load_base = cpu_to_be32(0xffffffff),
> 		.min_rma = cpu_to_be32(512),		/* 512MB min RMA */
>
> Since v4.12 in 2017.

Thanks for the pointer, I had been trying to find this without success.
Nathan Lynch Jan. 20, 2021, 12:39 a.m. UTC | #6
Alexey Kardashevskiy <aik@ozlabs.ru> writes:
> On 16/01/2021 02:38, Nathan Lynch wrote:
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>> first logical memory block reported in the LPAR’s device tree.
>>>>
>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>> region of memory suitable for arguments to be passed to RTAS via
>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>> API during boot in order to ensure that it satisfies the requirements
>>>> described above.
>>>>
>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>> can exceed the bounds of the first logical memory block, since
>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>>> a common size of the first memory block according to a small sample of
>>>> LPARs I have checked.) This leads to failures when user space invokes
>>>> an RTAS function that uses a work area, such as
>>>> ibm,configure-connector.
>>>>
>>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>>> allocation to consult the device tree directly, ensuring placement
>>>> within the RMA regardless of the MMU in use.
>>>
>>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>>> extra 64K in prom_instantiate_rtas() and advertise this address
>>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>>> not need this RMO area before that point.
>> 
>> Can you explain more about what advantage that would bring? I'm not
>> seeing it. It's a more significant change than what I've written
>> here.
>
>
> We already allocate space for RTAS and (like RMO) it needs to be in RMA, 
> and RMO is useless without RTAS. We can reuse RTAS allocation code for 
> RMO like this:

When you say RMO I assume you are referring to rtas_rmo_buf? (I don't
think it is well-named.)


> ===
> diff --git a/arch/powerpc/kernel/prom_init.c 
> b/arch/powerpc/kernel/prom_init.c
> index e9d4eb6144e1..d9527d3e01d2 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -1821,7 +1821,8 @@ static void __init prom_instantiate_rtas(void)
>          if (size == 0)
>                  return;
>
> -       base = alloc_down(size, PAGE_SIZE, 0);
> +       /* One page for RTAS, one for RMO */

One page for RTAS? RTAS is ~20MB on LPARs I've checked:

# lsprop /proc/device-tree/rtas/{rtas-size,linux,rtas-base}
/proc/device-tree/rtas/rtas-size
		 01370000 (20381696)

> +       base = alloc_down(size, PAGE_SIZE + PAGE_SIZE, 0);

This changes the alignment but not the size of the allocation.


>          if (base == 0)
>                  prom_panic("Could not allocate memory for RTAS\n");
>
> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
> index d126d71ea5bd..885d95cf4ed3 100644
> --- a/arch/powerpc/kernel/rtas.c
> +++ b/arch/powerpc/kernel/rtas.c
> @@ -1186,6 +1186,7 @@ void __init rtas_initialize(void)
>          rtas.size = size;
>          no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", 
> &entry);
>          rtas.entry = no_entry ? rtas.base : entry;
> +       rtas_rmo_buf = rtas.base + PAGE_SIZE;

I think this would overlay the user region on top of the RTAS private
data area, allowing user space to corrupt it.


>
>          /* If RTAS was found, allocate the RMO buffer for it and look for
>           * the stop-self token if any
> @@ -1196,11 +1197,6 @@ void __init rtas_initialize(void)
>                  ibm_suspend_me_token = rtas_token("ibm,suspend-me");
>          }
>   #endif
> -       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
> -                                                0, rtas_region);
> -       if (!rtas_rmo_buf)
> -               panic("ERROR: RTAS: Failed to allocate %lx bytes below 
> %pa\n",
> -                     PAGE_SIZE, &rtas_region);
> ===
>
> May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base", 
> for clarity, as sharing symbols between prom and main kernel is a bit 
> tricky.
>
> The benefit is that we do not do the same thing   (== find 64K in RMA) 
> in 2 different ways and if the RMO allocated my way is broken - we'll 
> know it much sooner as RTAS itself will break too.

Implementation details aside... I'll grant that combining the
allocations into one in prom_init reduces some duplication in the sense
that both are subject to the same constraints (mostly - the RTAS data
area must not cross a 256MB boundary, while the user region may). But
they really are distinct concerns. The RTAS private data area is
specified in the platform architecture, the OS is obligated to allocate
it and pass it to instantiate-rtas, etc etc. However the user region
(rtas_rmo_buf) is purely a Linux construct which is there to support
sys_rtas.

Now, there are multiple sites in the kernel proper that must allocate
memory suitable for passing to RTAS. Obviously there is value in
consolidating the logic for that purpose in one place, so I'll work on
adding that in v2. OK?
Alexey Kardashevskiy Jan. 20, 2021, 4:49 a.m. UTC | #7
On 20/01/2021 11:39, Nathan Lynch wrote:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> On 16/01/2021 02:38, Nathan Lynch wrote:
>>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>>> first logical memory block reported in the LPAR’s device tree.
>>>>>
>>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>>> region of memory suitable for arguments to be passed to RTAS via
>>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>>> API during boot in order to ensure that it satisfies the requirements
>>>>> described above.
>>>>>
>>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>>> can exceed the bounds of the first logical memory block, since
>>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>>>> a common size of the first memory block according to a small sample of
>>>>> LPARs I have checked.) This leads to failures when user space invokes
>>>>> an RTAS function that uses a work area, such as
>>>>> ibm,configure-connector.
>>>>>
>>>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>>>> allocation to consult the device tree directly, ensuring placement
>>>>> within the RMA regardless of the MMU in use.
>>>>
>>>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>>>> extra 64K in prom_instantiate_rtas() and advertise this address
>>>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>>>> not need this RMO area before that point.
>>>
>>> Can you explain more about what advantage that would bring? I'm not
>>> seeing it. It's a more significant change than what I've written
>>> here.
>>
>>
>> We already allocate space for RTAS and (like RMO) it needs to be in RMA,
>> and RMO is useless without RTAS. We can reuse RTAS allocation code for
>> RMO like this:
> 
> When you say RMO I assume you are referring to rtas_rmo_buf? (I don't
> think it is well-named.)
> 
> 
>> ===
>> diff --git a/arch/powerpc/kernel/prom_init.c
>> b/arch/powerpc/kernel/prom_init.c
>> index e9d4eb6144e1..d9527d3e01d2 100644
>> --- a/arch/powerpc/kernel/prom_init.c
>> +++ b/arch/powerpc/kernel/prom_init.c
>> @@ -1821,7 +1821,8 @@ static void __init prom_instantiate_rtas(void)
>>           if (size == 0)
>>                   return;
>>
>> -       base = alloc_down(size, PAGE_SIZE, 0);
>> +       /* One page for RTAS, one for RMO */
> 
> One page for RTAS? RTAS is ~20MB on LPARs I've checked:
> 
> # lsprop /proc/device-tree/rtas/{rtas-size,linux,rtas-base}
> /proc/device-tree/rtas/rtas-size
> 		 01370000 (20381696)

You are right, I did not sleep well when replied, sorry about that :) I 
tried it with KVM where RTAS is just a few KBs (20 constant bytes + MCE 
log, depends on cpu number) so it worked for me.


> 
>> +       base = alloc_down(size, PAGE_SIZE + PAGE_SIZE, 0);
> 
> This changes the alignment but not the size of the allocation.


Should be:

base = alloc_down(ALIGN_UP(size, PAGE_SIZE) + PAGE_SIZE, PAGE_SIZE, 0);

> 
> 
>>           if (base == 0)
>>                   prom_panic("Could not allocate memory for RTAS\n");
>>
>> diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
>> index d126d71ea5bd..885d95cf4ed3 100644
>> --- a/arch/powerpc/kernel/rtas.c
>> +++ b/arch/powerpc/kernel/rtas.c
>> @@ -1186,6 +1186,7 @@ void __init rtas_initialize(void)
>>           rtas.size = size;
>>           no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry",
>> &entry);
>>           rtas.entry = no_entry ? rtas.base : entry;
>> +       rtas_rmo_buf = rtas.base + PAGE_SIZE;
> 
> I think this would overlay the user region on top of the RTAS private
> data area, allowing user space to corrupt it.


Right, my bad. Should be:

rtas_rmo_buf = ALIGN_UP(rtas.base + rtas.size, PAGE_SIZE);


> 
>>
>>           /* If RTAS was found, allocate the RMO buffer for it and look for
>>            * the stop-self token if any
>> @@ -1196,11 +1197,6 @@ void __init rtas_initialize(void)
>>                   ibm_suspend_me_token = rtas_token("ibm,suspend-me");
>>           }
>>    #endif
>> -       rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
>> -                                                0, rtas_region);
>> -       if (!rtas_rmo_buf)
>> -               panic("ERROR: RTAS: Failed to allocate %lx bytes below
>> %pa\n",
>> -                     PAGE_SIZE, &rtas_region);
>> ===
>>
>> May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base",
>> for clarity, as sharing symbols between prom and main kernel is a bit
>> tricky.
>>
>> The benefit is that we do not do the same thing   (== find 64K in RMA)
>> in 2 different ways and if the RMO allocated my way is broken - we'll
>> know it much sooner as RTAS itself will break too.
> 
> Implementation details aside... I'll grant that combining the
> allocations into one in prom_init reduces some duplication in the sense
> that both are subject to the same constraints (mostly - the RTAS data
> area must not cross a 256MB boundary, while the user region may). But
> they really are distinct concerns. The RTAS private data area is
> specified in the platform architecture, the OS is obligated to allocate
> it and pass it to instantiate-rtas, etc etc. However the user region
> (rtas_rmo_buf) is purely a Linux construct which is there to support
> sys_rtas.

Not purely - it should be an address which RTAS accepts. Cannot argue 
with the rest though, it all sounds correct.

> Now, there are multiple sites in the kernel proper that must allocate
> memory suitable for passing to RTAS. Obviously there is value in
> consolidating the logic for that purpose in one place, so I'll work on
> adding that in v2. OK?

Sure.
Michael Ellerman Jan. 20, 2021, 12:06 p.m. UTC | #8
Nathan Lynch <nathanl@linux.ibm.com> writes:
> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>> On 16/01/2021 02:38, Nathan Lynch wrote:
>>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>>> first logical memory block reported in the LPAR’s device tree.
>>>>>
>>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>>> region of memory suitable for arguments to be passed to RTAS via
>>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>>> API during boot in order to ensure that it satisfies the requirements
>>>>> described above.
>>>>>
>>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>>> can exceed the bounds of the first logical memory block, since
>>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>>>> a common size of the first memory block according to a small sample of
>>>>> LPARs I have checked.) This leads to failures when user space invokes
>>>>> an RTAS function that uses a work area, such as
>>>>> ibm,configure-connector.
>>>>>
>>>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>>>> allocation to consult the device tree directly, ensuring placement
>>>>> within the RMA regardless of the MMU in use.
>>>>
>>>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>>>> extra 64K in prom_instantiate_rtas() and advertise this address
>>>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>>>> not need this RMO area before that point.
>>> 
>>> Can you explain more about what advantage that would bring? I'm not
>>> seeing it. It's a more significant change than what I've written
>>> here.
>>
>>
>> We already allocate space for RTAS and (like RMO) it needs to be in RMA, 
>> and RMO is useless without RTAS. We can reuse RTAS allocation code for 
>> RMO like this:
>
> When you say RMO I assume you are referring to rtas_rmo_buf? (I don't
> think it is well-named.)
...

RMO (Real mode offset) is the old term we used to use to refer to what
is now called the RMA (Real mode area). There are still many references
to RMO in Linux, but they almost certainly all refer to what we now call
the RMA.

>> May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base", 
>> for clarity, as sharing symbols between prom and main kernel is a bit 
>> tricky.
>>
>> The benefit is that we do not do the same thing   (== find 64K in RMA) 
>> in 2 different ways and if the RMO allocated my way is broken - we'll 
>> know it much sooner as RTAS itself will break too.
>
> Implementation details aside... I'll grant that combining the
> allocations into one in prom_init reduces some duplication in the sense
> that both are subject to the same constraints (mostly - the RTAS data
> area must not cross a 256MB boundary, while the user region may). But
> they really are distinct concerns. The RTAS private data area is
> specified in the platform architecture, the OS is obligated to allocate
> it and pass it to instantiate-rtas, etc etc. However the user region
> (rtas_rmo_buf) is purely a Linux construct which is there to support
> sys_rtas.
>
> Now, there are multiple sites in the kernel proper that must allocate
> memory suitable for passing to RTAS. Obviously there is value in
> consolidating the logic for that purpose in one place, so I'll work on
> adding that in v2. OK?

I don't think we want to move any allocations into prom_init.c unless we
have to.

It's best thought of as a trampoline, that runs before the kernel
proper, to transition from live OF to a flat DT environment. One thing
that must be done as part of that is instantiating RTAS, because it's
basically a runtime copy of the live OF. But any other allocs are for
Linux to handle later, IMHO.

cheers
Michael Ellerman Jan. 20, 2021, 12:13 p.m. UTC | #9
Nathan Lynch <nathanl@linux.ibm.com> writes:
> Michael Ellerman <mpe@ellerman.id.au> writes:
>> Nathan Lynch <nathanl@linux.ibm.com> writes:
>>> Memory locations passed as arguments from the OS to RTAS usually need
>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>> first logical memory block reported in the LPAR’s device tree.
>>>
>>> On powerpc targets with RTAS, Linux makes available to user space a
>>> region of memory suitable for arguments to be passed to RTAS via
>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>> API during boot in order to ensure that it satisfies the requirements
>>> described above.
>>>
>>> With radix MMU, the upper limit supplied to the memblock allocation
>>> can exceed the bounds of the first logical memory block, since
>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB.
>>
>> Why does the size of the first memory block matter for radix?
>
> Here is my understanding: in the platform architecture, the size of the
> first memory block equals the RMA, regardless of the MMU mode. It just
> so happens that when using radix, Linux can pass ibm,configure-connector
> a work area address outside of the RMA because the allocation
> constraints for the work area are computed differently. It would be
> wrong of the OS to pass RTAS arguments outside of this region with hash
> MMU as well.

If that's the requirement then shouldn't we be adjusting ppc64_rma_size?
Otherwise aren't other uses of ppc64_rma_size going to run into similar
problems.

Or does the RMA only apply for RTAS calls when using radix?

cheers
Nathan Lynch Jan. 21, 2021, 12:26 a.m. UTC | #10
Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> Michael Ellerman <mpe@ellerman.id.au> writes:
>>> Nathan Lynch <nathanl@linux.ibm.com> writes:
>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>> first logical memory block reported in the LPAR’s device tree.
>>>>
>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>> region of memory suitable for arguments to be passed to RTAS via
>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>> API during boot in order to ensure that it satisfies the requirements
>>>> described above.
>>>>
>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>> can exceed the bounds of the first logical memory block, since
>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB.
>>>
>>> Why does the size of the first memory block matter for radix?
>>
>> Here is my understanding: in the platform architecture, the size of the
>> first memory block equals the RMA, regardless of the MMU mode. It just
>> so happens that when using radix, Linux can pass ibm,configure-connector
>> a work area address outside of the RMA because the allocation
>> constraints for the work area are computed differently. It would be
>> wrong of the OS to pass RTAS arguments outside of this region with hash
>> MMU as well.
>
> If that's the requirement then shouldn't we be adjusting ppc64_rma_size?
> Otherwise aren't other uses of ppc64_rma_size going to run into similar
> problems.

Not all allocations limited by ppc64_rma_size set up memory that is
passed to RTAS though, do they? e.g. emergency_stack_init and
init_fallback_flush? Those shouldn't be confined to the first LMB
unnecessarily.

That's why I'm thinking what I've written here should be generalized a
bit and placed in an early allocator function that can be used to set up
the user region and the per-cpu reentrant RTAS argument buffers
(see allocate_paca_ptrs/new_rtas_args). So far those two sites are the
only ones I'm convinced need attention.
Nathan Lynch Jan. 21, 2021, 3:27 p.m. UTC | #11
Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>> On 16/01/2021 02:38, Nathan Lynch wrote:
>>>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>>>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>>>> first logical memory block reported in the LPAR’s device tree.
>>>>>>
>>>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>>>> region of memory suitable for arguments to be passed to RTAS via
>>>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>>>> API during boot in order to ensure that it satisfies the requirements
>>>>>> described above.
>>>>>>
>>>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>>>> can exceed the bounds of the first logical memory block, since
>>>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>>>>> a common size of the first memory block according to a small sample of
>>>>>> LPARs I have checked.) This leads to failures when user space invokes
>>>>>> an RTAS function that uses a work area, such as
>>>>>> ibm,configure-connector.
>>>>>>
>>>>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>>>>> allocation to consult the device tree directly, ensuring placement
>>>>>> within the RMA regardless of the MMU in use.
>>>>>
>>>>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>>>>> extra 64K in prom_instantiate_rtas() and advertise this address
>>>>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>>>>> not need this RMO area before that point.
>>>> 
>>>> Can you explain more about what advantage that would bring? I'm not
>>>> seeing it. It's a more significant change than what I've written
>>>> here.
>>>
>>>
>>> We already allocate space for RTAS and (like RMO) it needs to be in RMA, 
>>> and RMO is useless without RTAS. We can reuse RTAS allocation code for 
>>> RMO like this:
>>
>> When you say RMO I assume you are referring to rtas_rmo_buf? (I don't
>> think it is well-named.)
> ...
>
> RMO (Real mode offset) is the old term we used to use to refer to what
> is now called the RMA (Real mode area). There are still many references
> to RMO in Linux, but they almost certainly all refer to what we now call
> the RMA.

Yes... but I think in this discussion Alexey was using RMO to stand in
for rtas_rmo_buf, which was what I was trying to clarify.


>>> May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base", 
>>> for clarity, as sharing symbols between prom and main kernel is a bit 
>>> tricky.
>>>
>>> The benefit is that we do not do the same thing   (== find 64K in RMA) 
>>> in 2 different ways and if the RMO allocated my way is broken - we'll 
>>> know it much sooner as RTAS itself will break too.
>>
>> Implementation details aside... I'll grant that combining the
>> allocations into one in prom_init reduces some duplication in the sense
>> that both are subject to the same constraints (mostly - the RTAS data
>> area must not cross a 256MB boundary, while the user region may). But
>> they really are distinct concerns. The RTAS private data area is
>> specified in the platform architecture, the OS is obligated to allocate
>> it and pass it to instantiate-rtas, etc etc. However the user region
>> (rtas_rmo_buf) is purely a Linux construct which is there to support
>> sys_rtas.
>>
>> Now, there are multiple sites in the kernel proper that must allocate
>> memory suitable for passing to RTAS. Obviously there is value in
>> consolidating the logic for that purpose in one place, so I'll work on
>> adding that in v2. OK?
>
> I don't think we want to move any allocations into prom_init.c unless we
> have to.
>
> It's best thought of as a trampoline, that runs before the kernel
> proper, to transition from live OF to a flat DT environment. One thing
> that must be done as part of that is instantiating RTAS, because it's
> basically a runtime copy of the live OF. But any other allocs are for
> Linux to handle later, IMHO.

Agreed.
Alexey Kardashevskiy Jan. 23, 2021, 1:54 a.m. UTC | #12
On 22/01/2021 02:27, Nathan Lynch wrote:
> Michael Ellerman <mpe@ellerman.id.au> writes:
>> Nathan Lynch <nathanl@linux.ibm.com> writes:
>>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>>> On 16/01/2021 02:38, Nathan Lynch wrote:
>>>>> Alexey Kardashevskiy <aik@ozlabs.ru> writes:
>>>>>> On 15/01/2021 09:00, Nathan Lynch wrote:
>>>>>>> Memory locations passed as arguments from the OS to RTAS usually need
>>>>>>> to be addressable in 32-bit mode and must reside in the Real Mode
>>>>>>> Area. On PAPR guests, the RMA starts at logical address 0 and is the
>>>>>>> first logical memory block reported in the LPAR’s device tree.
>>>>>>>
>>>>>>> On powerpc targets with RTAS, Linux makes available to user space a
>>>>>>> region of memory suitable for arguments to be passed to RTAS via
>>>>>>> sys_rtas(). This region (rtas_rmo_buf) is allocated via the memblock
>>>>>>> API during boot in order to ensure that it satisfies the requirements
>>>>>>> described above.
>>>>>>>
>>>>>>> With radix MMU, the upper limit supplied to the memblock allocation
>>>>>>> can exceed the bounds of the first logical memory block, since
>>>>>>> ppc64_rma_size is ULONG_MAX and RTAS_INSTANTIATE_MAX is 1GB. (512MB is
>>>>>>> a common size of the first memory block according to a small sample of
>>>>>>> LPARs I have checked.) This leads to failures when user space invokes
>>>>>>> an RTAS function that uses a work area, such as
>>>>>>> ibm,configure-connector.
>>>>>>>
>>>>>>> Alter the determination of the upper limit for rtas_rmo_buf's
>>>>>>> allocation to consult the device tree directly, ensuring placement
>>>>>>> within the RMA regardless of the MMU in use.
>>>>>>
>>>>>> Can we tie this with RTAS (which also needs to be in RMA) and simply add
>>>>>> extra 64K in prom_instantiate_rtas() and advertise this address
>>>>>> (ALIGH_UP(rtas-base + rtas-size, PAGE_SIZE)) to the user space? We do
>>>>>> not need this RMO area before that point.
>>>>>
>>>>> Can you explain more about what advantage that would bring? I'm not
>>>>> seeing it. It's a more significant change than what I've written
>>>>> here.
>>>>
>>>>
>>>> We already allocate space for RTAS and (like RMO) it needs to be in RMA,
>>>> and RMO is useless without RTAS. We can reuse RTAS allocation code for
>>>> RMO like this:
>>>
>>> When you say RMO I assume you are referring to rtas_rmo_buf? (I don't
>>> think it is well-named.)
>> ...
>>
>> RMO (Real mode offset) is the old term we used to use to refer to what
>> is now called the RMA (Real mode area). There are still many references
>> to RMO in Linux, but they almost certainly all refer to what we now call
>> the RMA.
> 
> Yes... but I think in this discussion Alexey was using RMO to stand in
> for rtas_rmo_buf, which was what I was trying to clarify.


Correct. Thanks for the clarification, appreciated.

> 
>>>> May be store in the FDT as "linux,rmo-base" next to "linux,rtas-base",
>>>> for clarity, as sharing symbols between prom and main kernel is a bit
>>>> tricky.
>>>>
>>>> The benefit is that we do not do the same thing   (== find 64K in RMA)
>>>> in 2 different ways and if the RMO allocated my way is broken - we'll
>>>> know it much sooner as RTAS itself will break too.
>>>
>>> Implementation details aside... I'll grant that combining the
>>> allocations into one in prom_init reduces some duplication in the sense
>>> that both are subject to the same constraints (mostly - the RTAS data
>>> area must not cross a 256MB boundary, while the user region may). But
>>> they really are distinct concerns. The RTAS private data area is
>>> specified in the platform architecture, the OS is obligated to allocate
>>> it and pass it to instantiate-rtas, etc etc. However the user region
>>> (rtas_rmo_buf) is purely a Linux construct which is there to support
>>> sys_rtas.
>>>
>>> Now, there are multiple sites in the kernel proper that must allocate
>>> memory suitable for passing to RTAS. Obviously there is value in
>>> consolidating the logic for that purpose in one place, so I'll work on
>>> adding that in v2. OK?
>>
>> I don't think we want to move any allocations into prom_init.c unless we
>> have to.
>>
>> It's best thought of as a trampoline, that runs before the kernel
>> proper, to transition from live OF to a flat DT environment. One thing
>> that must be done as part of that is instantiating RTAS, because it's
>> basically a runtime copy of the live OF. But any other allocs are for
>> Linux to handle later, IMHO.
> 
> Agreed.

Then the only comment I have left is may be use of_address_to_resource() 
+ resource_size() instead of of_n_addr_cells()/of_n_size_cells() (like 
pseries_memory_block_size()). And now I shut up :) Thanks,
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index da65faadbbb2..98dfb112f4df 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1166,6 +1166,70 @@  SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 	return 0;
 }
 
+/*
+ * Memory locations passed to RTAS must be in the RMA as described by
+ * the range in /memory@0.
+ */
+static phys_addr_t rtas_arg_addr_limit(void)
+{
+	unsigned int addr_cells;
+	unsigned int size_cells;
+	struct device_node *np;
+	const __be32 *prop;
+	u64 limit;
+	u64 base;
+
+	/* RTAS is instantiated in 32-bit mode. */
+	limit = 1ULL << 32;
+
+	/* Account for mem=. */
+	if (memory_limit != 0)
+		limit = min(limit, memory_limit);
+
+	np = of_find_node_by_path("/memory@0");
+	if (!np)
+		goto out;
+
+	prop = of_get_property(np, "reg", NULL);
+	if (!prop)
+		goto put;
+
+	addr_cells = of_n_addr_cells(np);
+	base = of_read_number(prop, addr_cells);
+	prop += addr_cells;
+	size_cells = of_n_size_cells(np);
+	limit = min(limit, of_read_number(prop, size_cells));
+put:
+	of_node_put(np);
+out:
+	pr_debug("%s: base = %#llx limit = %#llx", __func__, base, limit);
+
+	return limit;
+}
+
+static void __init rtas_user_region_setup(void)
+{
+	phys_addr_t limit, align, size;
+
+	limit = rtas_arg_addr_limit();
+	size = RTAS_USER_REGION_SIZE;
+
+	/*
+	 * Although work areas need only 4KB alignment, user space
+	 * accesses this region via mmap so it must be placed on a
+	 * page boundary.
+	 */
+	align = PAGE_SIZE;
+
+	rtas_rmo_buf = memblock_phys_alloc_range(size, align, 0, limit);
+	if (rtas_rmo_buf == 0) {
+		panic("Failed to allocate %llu bytes for user region below %pa\n",
+		      size, &limit);
+	}
+
+	pr_debug("RTAS user region allocated at %pa\n", &rtas_rmo_buf);
+}
+
 /*
  * Call early during boot, before mem init, to retrieve the RTAS
  * information from the device-tree and allocate the RMO buffer for userland
@@ -1173,7 +1237,6 @@  SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
  */
 void __init rtas_initialize(void)
 {
-	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
 	u32 base, size, entry;
 	int no_base, no_size, no_entry;
 
@@ -1197,23 +1260,10 @@  void __init rtas_initialize(void)
 	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
 	rtas.entry = no_entry ? rtas.base : entry;
 
-	/* If RTAS was found, allocate the RMO buffer for it and look for
-	 * the stop-self token if any
-	 */
-#ifdef CONFIG_PPC64
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
-#endif
-	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
-						 0, rtas_region);
-	if (!rtas_rmo_buf)
-		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
-		      PAGE_SIZE, &rtas_region);
-
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 	rtas_last_error_token = rtas_token("rtas-last-error");
 #endif
-
+	rtas_user_region_setup();
 	rtas_syscall_filter_init();
 }