Patchwork [02/15,PS3] Get lv1 high memory region from devtree

login
register
mail settings
Submitter Andre Heider
Date Aug. 1, 2011, 8:02 p.m.
Message ID <1312228986-32307-3-git-send-email-a.heider@gmail.com>
Download mbox | patch
Permalink /patch/107799/
State Superseded
Headers show

Comments

Andre Heider - Aug. 1, 2011, 8:02 p.m.
From: Hector Martin <hector@marcansoft.com>

This lets the bootloader preallocate the high lv1 region and pass its
location to the kernel through the devtree. Thus, it can be used to hold
the initrd. If the property doesn't exist, the kernel retains the old
behavior and attempts to allocate the region itself.

Signed-off-by: Hector Martin <hector@marcansoft.com>
[a.heider: Various cleanups to make checkpatch.pl happy]
Signed-off-by: Andre Heider <a.heider@gmail.com>
---
 arch/powerpc/platforms/ps3/mm.c |   61 +++++++++++++++++++++++++++++++++++++-
 1 files changed, 59 insertions(+), 2 deletions(-)
Geoff Levand - Aug. 3, 2011, 10:30 p.m.
On 08/01/2011 01:02 PM, Andre Heider wrote:
> 
> This lets the bootloader preallocate the high lv1 region and pass its
> location to the kernel through the devtree. Thus, it can be used to hold
> the initrd. If the property doesn't exist, the kernel retains the old
> behavior and attempts to allocate the region itself.

With this mechanism how is the address of the initrd passed to the
new kernel, in the DT?

How would a kexec based bootloader work?  If it's kernel were to allocate
high mem and the bootloader program uses the high mem, how could it tell
that kernel not to destroy the region on shutdown?

If arch/powerpc/boot/ps3.c allocated the mem and added a DT entry
then other OSes that don't know about the Linux device tree won't
be able to use that allocated memory.  Other OSes could do a
test to see if the allocation was already done.  Another option
that might work is to write info into the LV1 repository then
have boot code look there for allocated hig mem.

> Signed-off-by: Hector Martin <hector@marcansoft.com>
> [a.heider: Various cleanups to make checkpatch.pl happy]
> Signed-off-by: Andre Heider <a.heider@gmail.com>
> ---
>  arch/powerpc/platforms/ps3/mm.c |   61 +++++++++++++++++++++++++++++++++++++-
>  1 files changed, 59 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
> index c204588..30bb096 100644
> --- a/arch/powerpc/platforms/ps3/mm.c
> +++ b/arch/powerpc/platforms/ps3/mm.c
> @@ -110,6 +110,7 @@ struct map {
>  	u64 htab_size;
>  	struct mem_region rm;
>  	struct mem_region r1;
> +	int destroy_r1;

In the general case we could have multiple high mem
regions, and each could need to be destroyed, so I
think struct mem_region should have a destroy flag.

>  };
>  
>  #define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__)
> @@ -287,6 +288,49 @@ static void ps3_mm_region_destroy(struct mem_region *r)
>  	}
>  }
>  
> +static int ps3_mm_scan_memory(unsigned long node, const char *uname,
> +			      int depth, void *data)
> +{

Something like 'ps3_mm_dt_scan_highmem() is more descriptive.

> +	struct mem_region *r = data;
> +	void *p;
> +	u64 prop[2];
> +	unsigned long l;
> +	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
> +
> +	if (type == NULL)
> +		return 0;
> +	if (strcmp(type, "memory") != 0)

Should this be 'if (strcmp(type, "memory"))'?

> +		return 0;
> +
> +	p = of_get_flat_dt_prop(node, "sony,lv1-highmem", &l);
> +	if (p == NULL)
> +		return 0;
> +
> +	BUG_ON(l != sizeof(prop));
> +	memcpy(prop, p, sizeof(prop));
> +
> +	r->base = prop[0];
> +	r->size = prop[1];
> +	r->offset = r->base - map.rm.size;
> +
> +	return -1;
> +}
> +
> +static int ps3_mm_get_devtree_highmem(struct mem_region *r)
> +{
> +	r->size = r->base = r->offset = 0;
> +	of_scan_flat_dt(ps3_mm_scan_memory, r);
> +
> +	if (r->base && r->size) {
> +		DBG("%s:%d got high region from devtree: %llxh %llxh\n",
> +		__func__, __LINE__, r->base, r->size);
> +		return 0;
> +	} else {
> +		DBG("%s:%d no high region in devtree...\n", __func__, __LINE__);
> +		return -1;
> +	}
> +}
> +
>  /**
>   * ps3_mm_add_memory - hot add memory
>   */
> @@ -303,6 +347,12 @@ static int __init ps3_mm_add_memory(void)
>  
>  	BUG_ON(!mem_init_done);
>  
> +	if (!map.r1.size) {
> +		DBG("%s:%d: no region 1, not adding memory\n",
> +		    __func__, __LINE__);
> +		return 0;
> +	}

Did you find this to be hit?  Also, in the general case,
there could be more than one high mem region, but I don't
know of any current systems that do.

> +
>  	start_addr = map.rm.size;
>  	start_pfn = start_addr >> PAGE_SHIFT;
>  	nr_pages = (map.r1.size + PAGE_SIZE - 1) >> PAGE_SHIFT;
> @@ -1219,7 +1269,13 @@ void __init ps3_mm_init(void)
>  
>  
>  	/* arrange to do this in ps3_mm_add_memory */
> -	ps3_mm_region_create(&map.r1, map.total - map.rm.size);
> +
> +	if (ps3_mm_get_devtree_highmem(&map.r1) == 0) {
> +		map.destroy_r1 = 0;
> +	} else {

This should be

	if (!ps3_mm_get_devtree_highmem(&map.r1))
		map.destroy_r1 = 0;
	else {

> +		ps3_mm_region_create(&map.r1, map.total - map.rm.size);
> +		map.destroy_r1 = 1;
> +	}
>  
>  	/* correct map.total for the real total amount of memory we use */
>  	map.total = map.rm.size + map.r1.size;
> @@ -1233,5 +1289,6 @@ void __init ps3_mm_init(void)
>  
>  void ps3_mm_shutdown(void)
>  {
> -	ps3_mm_region_destroy(&map.r1);
> +	if (map.destroy_r1)
> +		ps3_mm_region_destroy(&map.r1);
>  }

-Geoff
Hector Martin - Aug. 4, 2011, 1:19 a.m.
On 08/04/2011 12:30 AM, Geoff Levand wrote:
> With this mechanism how is the address of the initrd passed to the
> new kernel, in the DT?

Using the /chosen linux,initrd-{start,end} properties. The bootloader
knows about the Linux trick of sticking together bootmem and highmem and
precalculates the linux "physical" address. Yeah, that's a hack, it
should probably be done in the kernel so the bootloader doesn't have to
know or care about how Linux decides to lay out its physical address
space. Do you have any suggestion as to how we would do this sanely?
Right now early_init_dt_setup_initrd_arch in arch/powerpc/kernel/prom.c
is generic and doesn't know anything about platform specifics.

> How would a kexec based bootloader work?  If it's kernel were to allocate
> high mem and the bootloader program uses the high mem, how could it tell
> that kernel not to destroy the region on shutdown?

The current code contemplates the case where a non-kexec based
bootloader is the first stage and allocates highmem (and knows how to
tell the kernel about it), possibly followed by kexec stages that just
keep that allocation. To support a kexec bootloader as the first
bootloader using this mechanism would indeed require extra support to
tell that kernel to retain its allocation, preferably something that can
be decided from userland. Of course the current kexec bootloader
behavior where highmem isn't handed over to the child kernel will still
work.

> If arch/powerpc/boot/ps3.c allocated the mem and added a DT entry
> then other OSes that don't know about the Linux device tree won't
> be able to use that allocated memory.  Other OSes could do a
> test to see if the allocation was already done.  Another option
> that might work is to write info into the LV1 repository then
> have boot code look there for allocated hig mem.

If you're booting another OS that isn't Linux then it also has no use
for a Linux-specific ramdisk (linux,initrd-start) and thus no use for
preallocated highmem and should be booted as such (maybe make the
userland tools tell the kernel to release highmem if there's no initrd
defined).

Using the lv1 repo is an option, but does it make sense? It's even less
standard than a FDT and we'd have to put both the region1 location and
the initrd location in there (there's no point to maintaining highmem if
you aren't going to use it).

FWIW, the lv1 repo writing hypercalls are unused and undocumented.

>> +	if (!map.r1.size) {
>> +		DBG("%s:%d: no region 1, not adding memory\n",
>> +		    __func__, __LINE__);
>> +		return 0;
>> +	}
> 
> Did you find this to be hit?  Also, in the general case,
> there could be more than one high mem region, but I don't
> know of any current systems that do.

Probably only during debugging, but it doesn't sound like a bad idea
anyway (e.g. bootloader allocated highmem but didn't tell the kernel so
the kernel couldn't allocate it).

As for multiple regions, well, currently it only supports one and that
is hardcoded in the phys->lpar translation, so I see no point in
worrying about that now.

ACK on the other code comments.
Geoff Levand - Aug. 4, 2011, 7:24 p.m.
On 08/03/2011 06:19 PM, Hector Martin wrote:
> On 08/04/2011 12:30 AM, Geoff Levand wrote:
>> How would a kexec based bootloader work?  If it's kernel were to allocate
>> high mem and the bootloader program uses the high mem, how could it tell
>> that kernel not to destroy the region on shutdown?
> 
> The current code contemplates the case where a non-kexec based
> bootloader is the first stage and allocates highmem (and knows how to
> tell the kernel about it), possibly followed by kexec stages that just
> keep that allocation. To support a kexec bootloader as the first
> bootloader using this mechanism would indeed require extra support to
> tell that kernel to retain its allocation, preferably something that can
> be decided from userland. Of course the current kexec bootloader
> behavior where highmem isn't handed over to the child kernel will still
> work.
> 
>> If arch/powerpc/boot/ps3.c allocated the mem and added a DT entry
>> then other OSes that don't know about the Linux device tree won't
>> be able to use that allocated memory.  Other OSes could do a
>> test to see if the allocation was already done.  Another option
>> that might work is to write info into the LV1 repository then
>> have boot code look there for allocated hig mem.
> 
> If you're booting another OS that isn't Linux then it also has no use
> for a Linux-specific ramdisk (linux,initrd-start) and thus no use for
> preallocated highmem and should be booted as such (maybe make the
> userland tools tell the kernel to release highmem if there's no initrd
> defined).

This sounds complicated, user programs managing memory regions.

Also, it needs to be considered that a lot of kernels are out
there will be confused if started with high mem already allocated.

> Using the lv1 repo is an option, but does it make sense? It's even less
> standard than a FDT and we'd have to put both the region1 location and
> the initrd location in there (there's no point to maintaining highmem if
> you aren't going to use it).
> 
> FWIW, the lv1 repo writing hypercalls are unused and undocumented.

The hcalls to create, write, and delete nodes are known, but I don't
recall if I verified they work:

  http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob;f=arch/powerpc/include/asm/lv1call.h;hb=HEAD#l265

#92 should be named lv1_write_repository_node.

You can only modify the repo for your lpar, so:

  lv1_{create,write}_repository_node(n1, n2, n3, n4, v1, v2);
  lv1_delete_repository_node(n1, n2, n3, n4);

-Geoff
Andre Heider - Aug. 6, 2011, 11:50 a.m.
On Thu, Aug 4, 2011 at 9:24 PM, Geoff Levand <geoff@infradead.org> wrote:
> Also, it needs to be considered that a lot of kernels are out
> there will be confused if started with high mem already allocated.

True, but is there anything we can do about that?
Isn't is okay to tell users of first stage boot loaders utilizing this
mechanism that whatever steps their boot chain contains has to support
this highmem pass over? As far as I can tell all current loaders won't
be affected by this patch.
When a user wants to chain a loader with this mechanism with
petitboot, he needs a petitboot coming with a kernel containing this
feature, and can then kexec whatever contains it too.

>> Using the lv1 repo is an option, but does it make sense? It's even less
>> standard than a FDT and we'd have to put both the region1 location and
>> the initrd location in there (there's no point to maintaining highmem if
>> you aren't going to use it).
>>
>> FWIW, the lv1 repo writing hypercalls are unused and undocumented.
>
> The hcalls to create, write, and delete nodes are known, but I don't
> recall if I verified they work:
>
>  http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=blob;f=arch/powerpc/include/asm/lv1call.h;hb=HEAD#l265
>
> #92 should be named lv1_write_repository_node.
>
> You can only modify the repo for your lpar, so:
>
>  lv1_{create,write}_repository_node(n1, n2, n3, n4, v1, v2);
>  lv1_delete_repository_node(n1, n2, n3, n4);

I tried this and it indeed works - I can pass over the highmem info
just fine using repository nodes.
If there is a chance that another OS might require this highmem pass
over then I agree that using the repository makes more sense.

I can prepare a patch for that, replacing this one. Any suggestions on
which nodes to use?
For a test run I used:
  FIELD_FIRST("bi", 0),
  FIELD("highmem", 0),
  FIELD("address", 0),
  0
and
  FIELD_FIRST("bi", 0),
  FIELD("highmem", 0),
  FIELD("size", 0),
  0

Regards,
Andre

Patch

diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c
index c204588..30bb096 100644
--- a/arch/powerpc/platforms/ps3/mm.c
+++ b/arch/powerpc/platforms/ps3/mm.c
@@ -110,6 +110,7 @@  struct map {
 	u64 htab_size;
 	struct mem_region rm;
 	struct mem_region r1;
+	int destroy_r1;
 };
 
 #define debug_dump_map(x) _debug_dump_map(x, __func__, __LINE__)
@@ -287,6 +288,49 @@  static void ps3_mm_region_destroy(struct mem_region *r)
 	}
 }
 
+static int ps3_mm_scan_memory(unsigned long node, const char *uname,
+			      int depth, void *data)
+{
+	struct mem_region *r = data;
+	void *p;
+	u64 prop[2];
+	unsigned long l;
+	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+	if (type == NULL)
+		return 0;
+	if (strcmp(type, "memory") != 0)
+		return 0;
+
+	p = of_get_flat_dt_prop(node, "sony,lv1-highmem", &l);
+	if (p == NULL)
+		return 0;
+
+	BUG_ON(l != sizeof(prop));
+	memcpy(prop, p, sizeof(prop));
+
+	r->base = prop[0];
+	r->size = prop[1];
+	r->offset = r->base - map.rm.size;
+
+	return -1;
+}
+
+static int ps3_mm_get_devtree_highmem(struct mem_region *r)
+{
+	r->size = r->base = r->offset = 0;
+	of_scan_flat_dt(ps3_mm_scan_memory, r);
+
+	if (r->base && r->size) {
+		DBG("%s:%d got high region from devtree: %llxh %llxh\n",
+		__func__, __LINE__, r->base, r->size);
+		return 0;
+	} else {
+		DBG("%s:%d no high region in devtree...\n", __func__, __LINE__);
+		return -1;
+	}
+}
+
 /**
  * ps3_mm_add_memory - hot add memory
  */
@@ -303,6 +347,12 @@  static int __init ps3_mm_add_memory(void)
 
 	BUG_ON(!mem_init_done);
 
+	if (!map.r1.size) {
+		DBG("%s:%d: no region 1, not adding memory\n",
+		    __func__, __LINE__);
+		return 0;
+	}
+
 	start_addr = map.rm.size;
 	start_pfn = start_addr >> PAGE_SHIFT;
 	nr_pages = (map.r1.size + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -1219,7 +1269,13 @@  void __init ps3_mm_init(void)
 
 
 	/* arrange to do this in ps3_mm_add_memory */
-	ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+
+	if (ps3_mm_get_devtree_highmem(&map.r1) == 0) {
+		map.destroy_r1 = 0;
+	} else {
+		ps3_mm_region_create(&map.r1, map.total - map.rm.size);
+		map.destroy_r1 = 1;
+	}
 
 	/* correct map.total for the real total amount of memory we use */
 	map.total = map.rm.size + map.r1.size;
@@ -1233,5 +1289,6 @@  void __init ps3_mm_init(void)
 
 void ps3_mm_shutdown(void)
 {
-	ps3_mm_region_destroy(&map.r1);
+	if (map.destroy_r1)
+		ps3_mm_region_destroy(&map.r1);
 }