powerpc/lib: Use patch_site to patch copy_32 functions once cache is enabled

Message ID da0c182b76c27988b88ebd35ee284f5be5882dd1.1533802042.git.christophe.leroy@c-s.fr
State Accepted
Commit fa54a981ea7a852c145b05c95abba11e81fc1157
Headers show
Series
  • powerpc/lib: Use patch_site to patch copy_32 functions once cache is enabled
Related show

Checks

Context Check Description
snowpatch_ozlabs/build-ppc32 success Test build-ppc32 on branch next
snowpatch_ozlabs/build-ppc64e success Test build-ppc64e on branch next
snowpatch_ozlabs/build-ppc64be success Test build-ppc64be on branch next
snowpatch_ozlabs/build-ppc64le success Test build-ppc64le on branch next
snowpatch_ozlabs/checkpatch success Test checkpatch on branch next
snowpatch_ozlabs/apply_patch success next/apply_patch Successfully applied

Commit Message

Christophe Leroy Aug. 9, 2018, 8:14 a.m.
The symbol memcpy_nocache_branch defined in order to allow patching
of memset function once cache is enabled leads to confusing reports
by perf tool.

Using the new patch_site functionality solves this issue.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/include/asm/asm-prototypes.h | 1 +
 arch/powerpc/kernel/setup_32.c            | 7 +++----
 arch/powerpc/lib/copy_32.S                | 9 ++++++---
 3 files changed, 10 insertions(+), 7 deletions(-)

Comments

Michael Ellerman Aug. 13, 2018, 11:23 a.m. | #1
On Thu, 2018-08-09 at 08:14:41 UTC, Christophe Leroy wrote:
> The symbol memcpy_nocache_branch defined in order to allow patching
> of memset function once cache is enabled leads to confusing reports
> by perf tool.
> 
> Using the new patch_site functionality solves this issue.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/fa54a981ea7a852c145b05c95abba1

cheers

Patch

diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 70fdc5b9b9fb..1f4691ce4126 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -146,6 +146,7 @@  void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 /* Patch sites */
 extern s32 patch__call_flush_count_cache;
 extern s32 patch__flush_count_cache_return;
+extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
 extern long flush_count_cache;
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 0e3743343280..ba969278bf4d 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -97,11 +97,10 @@  notrace unsigned long __init early_init(unsigned long dt_ptr)
  * We do the initial parsing of the flat device-tree and prepares
  * for the MMU to be fully initialized.
  */
-extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
-
 notrace void __init machine_init(u64 dt_ptr)
 {
-	unsigned int *addr = &memset_nocache_branch;
+	unsigned int *addr = (unsigned int *)((unsigned long)&patch__memset_nocache +
+					       patch__memset_nocache);
 	unsigned long insn;
 
 	/* Configure static keys first, now that we're relocated. */
@@ -110,7 +109,7 @@  notrace void __init machine_init(u64 dt_ptr)
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
-	patch_instruction((unsigned int *)&memcpy, PPC_INST_NOP);
+	patch_instruction_site(&patch__memcpy_nocache, PPC_INST_NOP);
 
 	insn = create_cond_branch(addr, branch_target(addr), 0x820000);
 	patch_instruction(addr, insn);	/* replace b by bne cr0 */
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index da425bb6b369..ba66846fe973 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -13,6 +13,7 @@ 
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
+#include <asm/code-patching-asm.h>
 
 #define COPY_16_BYTES		\
 	lwz	r7,4(r4);	\
@@ -107,8 +108,8 @@  _GLOBAL(memset)
 	 * Skip optimised bloc until cache is enabled. Will be replaced
 	 * by 'bne' during boot to use normal procedure if r4 is not zero
 	 */
-_GLOBAL(memset_nocache_branch)
-	b	2f
+5:	b	2f
+	patch_site	5b, patch__memset_nocache
 
 	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
 	add	r8,r7,r5
@@ -168,7 +169,9 @@  _GLOBAL(memmove)
 	/* fall through */
 
 _GLOBAL(memcpy)
-	b	generic_memcpy
+1:	b	generic_memcpy
+	patch_site	1b, patch__memcpy_nocache
+
 	add	r7,r3,r5		/* test if the src & dst overlap */
 	add	r8,r4,r5
 	cmplw	0,r4,r7