Patchwork [v2,2/5,ppc] Define virtual-physical translations for PIE relocations

login
register
mail settings
Submitter Suzuki Poulose
Date Oct. 25, 2011, 11:54 a.m.
Message ID <20111025115406.8183.57851.stgit@suzukikp.in.ibm.com>
Download mbox | patch
Permalink /patch/121619/
State Superseded
Headers show

Comments

Suzuki Poulose - Oct. 25, 2011, 11:54 a.m.
We find the runtime address of _stext and relocate ourselves based
on the following calculation.

	virtual_base = ALIGN(KERNELBASE,KERNEL_TLB_PIN_SIZE) +
			MODULO(_stext.run,KERNEL_TLB_PIN_SIZE)

relocate() is called with the Effective Virtual Base Address (as
shown below)

            | Phys. Addr| Virt. Addr |
Page        |------------------------|
Boundary    |           |            |
            |           |            |
            |           |            |
Kernel Load |___________|_ __ _ _ _ _|<- Effective
Addr(_stext)|           |      ^     |Virt. Base Addr
            |           |      |     |
            |           |      |     |
            |           |reloc_offset|
            |           |      |     |
            |           |      |     |
            |           |______v_____|<-(KERNELBASE)%TLB_SIZE
            |           |            |
            |           |            |
            |           |            |
Page        |-----------|------------|
Boundary    |           |            |


On BookE, we need __va() & __pa() early in the boot process to access
the device tree.

Currently this has been defined as :

#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) -
						PHYSICAL_START + KERNELBASE)
where:
 PHYSICAL_START is kernstart_addr - a variable updated at runtime.
 KERNELBASE	is the compile time Virtual base address of kernel.

This won't work for us, as kernstart_addr is dynamic and will yield different
results for __va()/__pa() for same mapping.

e.g.,

Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as
PAGE_OFFSET).

In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M

Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
		= 0xbc100000 , which is wrong.

it should be : 0xc0000000 + 0x100000 = 0xc0100000

On platforms which support AMP, like PPC_47x (based on 44x), the kernel
could be loaded at highmem. Hence we cannot always depend on the compile
time constants for mapping.

Here are the possible solutions:

1) Update kernstart_addr(PHSYICAL_START) to match the Physical address of
compile time KERNELBASE value, instead of the actual Physical_Address(_stext).

The disadvantage is that we may break other users of PHYSICAL_START. They
could be replaced with __pa(_stext).

2) Redefine __va() & __pa() with relocation offset


#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_44x)
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + (KERNELBASE + RELOC_OFFSET)))
#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - (KERNELBASE + RELOC_OFFSET))
#endif

where, RELOC_OFFSET could be

  a) A variable, say relocation_offset (like kernstart_addr), updated
     at boot time. This impacts performance, as we have to load an additional
     variable from memory.

		OR

  b) #define RELOC_OFFSET ((PHYSICAL_START & PPC_PIN_SIZE_OFFSET_MASK) - \
                      (KERNELBASE & PPC_PIN_SIZE_OFFSET_MASK))

   This introduces more calculations for doing the translation.

3) Redefine __va() & __pa() with a new variable

i.e,

#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))

where VIRT_PHYS_OFFSET :

#ifdef CONFIG_RELOCATABLE_PPC32_PIE
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif /* CONFIG_RELOCATABLE_PPC32_PIE */

where virt_phy_offset is updated at runtime to :

	Effective KERNELBASE - kernstart_addr.

Taking our example, above:

virt_phys_offset = effective_kernelstart_vaddr - kernstart_addr
		 = 0xc0400000 - 0x400000
		 = 0xc0000000
	and

	__va(0x100000) = 0xc0000000 + 0x100000 = 0xc0100000
	 which is what we want.

I have implemented (3) in the following patch which has same cost of
operation as the existing one.

I have tested the patches on 440x platforms only. However this should
work fine for PPC_47x also, as we only depend on the runtime address
and the current TLB XLAT entry for the startup code, which is available
in r25. I don't have access to a 47x board yet. So, it would be great if
somebody could test this on 47x.

Signed-off-by: Suzuki K. Poulose <suzuki@in.ibm.com>
Cc:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc:	Kumar Gala <galak@kernel.crashing.org>
Cc:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
---

 arch/powerpc/Makefile           |    1 
 arch/powerpc/include/asm/page.h |   85 ++++++++++++++++++++++++++++++++++++++-
 arch/powerpc/mm/init_32.c       |    7 +++
 3 files changed, 90 insertions(+), 3 deletions(-)

Patch

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 57af16e..77f928f 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -65,6 +65,7 @@  endif
 
 LDFLAGS_vmlinux-yy := -Bstatic
 LDFLAGS_vmlinux-$(CONFIG_PPC64)$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux-y$(CONFIG_RELOCATABLE_PPC32_PIE) := -pie
 LDFLAGS_vmlinux	:= $(LDFLAGS_vmlinux-yy)
 
 CFLAGS-$(CONFIG_PPC64)	:= -mminimal-toc -mtraceback=no -mcall-aixdesc
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index dd9c4fd..dc0d9fd 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -97,12 +97,26 @@  extern unsigned int HPAGE_SHIFT;
 
 extern phys_addr_t memstart_addr;
 extern phys_addr_t kernstart_addr;
+
+#ifdef CONFIG_RELOCATABLE_PPC32_PIE
+extern long long virt_phys_offset;
 #endif
+
+#endif /* __ASSEMBLY__ */
 #define PHYSICAL_START	kernstart_addr
-#else
+
+#else	/* !CONFIG_RELOCATABLE */
 #define PHYSICAL_START	ASM_CONST(CONFIG_PHYSICAL_START)
 #endif
 
+/* See Description below for VIRT_PHYS_OFFSET */
+#ifdef CONFIG_RELOCATABLE_PPC32_PIE
+#define VIRT_PHYS_OFFSET virt_phys_offset
+#else
+#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
+#endif
+
+
 #ifdef CONFIG_PPC64
 #define MEMORY_START	0UL
 #elif defined(CONFIG_RELOCATABLE)
@@ -125,12 +139,77 @@  extern phys_addr_t kernstart_addr;
  * determine MEMORY_START until then.  However we can determine PHYSICAL_START
  * from information at hand (program counter, TLB lookup).
  *
+ *  Relocation on BookE with PIE (RELOCATABLE_PPC32_PIE)
+ *
+ *   With RELOCATABLE_PPC32_PIE,  we support loading the kernel at any physical 
+ *   address without any restriction on the page alignment.
+ *
+ *   We find the runtime address of _stext and relocate ourselves based on 
+ *   the following calculation:
+ *
+ *  	  virtual_base = ALIGN_DOWN(KERNELBASE,256M) +
+ *  				MODULO(_stext.run,256M)
+ *   and create the following mapping:
+ *
+ * 	  ALIGN_DOWN(_stext.run,256M) => ALIGN_DOWN(KERNELBASE,256M)
+ *
+ *   When we process relocations, we cannot depend on the
+ *   existing equation for the __va()/__pa() translations:
+ *
+ * 	   __va(x) = (x)  - PHYSICAL_START + KERNELBASE
+ *
+ *   Where:
+ *   	 PHYSICAL_START = kernstart_addr = Physical address of _stext
+ *  	 KERNELBASE = Compiled virtual address of _stext.
+ *
+ *   This formula holds true iff, kernel load address is TLB page aligned.
+ *
+ *   In our case, we need to also account for the shift in the kernel Virtual 
+ *   address.
+ *
+ *   E.g.,
+ *
+ *   Let the kernel be loaded at 64MB and KERNELBASE be 0xc0000000 (same as PAGE_OFFSET).
+ *   In this case, we would be mapping 0 to 0xc0000000, and kernstart_addr = 64M
+ *
+ *   Now __va(1MB) = (0x100000) - (0x4000000) + 0xc0000000
+ *                 = 0xbc100000 , which is wrong.
+ *
+ *   Rather, it should be : 0xc0000000 + 0x100000 = 0xc0100000
+ *      	according to our mapping.
+ *
+ *   Hence we use the following formula to get the translations right:
+ *
+ * 	  __va(x) = (x) - [ PHYSICAL_START - Effective KERNELBASE ]
+ *
+ * 	  Where :
+ * 		PHYSICAL_START = dynamic load address.(kernstart_addr variable)
+ * 		Effective KERNELBASE = virtual_base =
+ * 				     = ALIGN_DOWN(KERNELBASE,256M) +
+ * 						MODULO(PHYSICAL_START,256M)
+ *
+ * 	To make the cost of __va() / __pa() more light weight, we introduce
+ * 	a new variable virt_phys_offset, which will hold :
+ *
+ * 	virt_phys_offset = Effective KERNELBASE - PHYSICAL_START
+ * 			 = ALIGN_DOWN(KERNELBASE,256M) - 
+ * 			 	ALIGN_DOWN(PHYSICALSTART,256M)
+ *
+ * 	Hence :
+ *
+ * 	__va(x) = x - PHYSICAL_START + Effective KERNELBASE
+ * 		= x + virt_phys_offset
+ *
+ * 		and
+ * 	__pa(x) = x + PHYSICAL_START - Effective KERNELBASE
+ * 		= x - virt_phys_offset
+ * 		
  * On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
  * the other definitions for __va & __pa.
  */
 #ifdef CONFIG_BOOKE
-#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) - PHYSICAL_START + KERNELBASE))
-#define __pa(x) ((unsigned long)(x) + PHYSICAL_START - KERNELBASE)
+#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
+#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
 #else
 #define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
 #define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + MEMORY_START)
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 161cefd..c4e7815 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -65,6 +65,13 @@  phys_addr_t memstart_addr = (phys_addr_t)~0ull;
 EXPORT_SYMBOL(memstart_addr);
 phys_addr_t kernstart_addr;
 EXPORT_SYMBOL(kernstart_addr);
+
+#ifdef CONFIG_RELOCATABLE_PPC32_PIE
+/* Used in __va()/__pa() */
+long long virt_phys_offset;
+EXPORT_SYMBOL(virt_phys_offset);
+#endif
+
 phys_addr_t lowmem_end_addr;
 
 int boot_mapsize;