@@ -48,6 +48,10 @@ static inline void logmpp(u64 x)
#endif /* __powerpc64__ && ! __ASSEMBLY__ */
+#ifdef CONFIG_PPC32
+#define CACHE_NOW_ON 1
+#endif
+
#if defined(__ASSEMBLY__)
/*
* For a snooping icache, we still need a dummy icbi to purge all the
@@ -64,6 +68,10 @@ static inline void logmpp(u64 x)
#else
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#ifdef CONFIG_PPC32
+extern unsigned int __start___cache_fixup, __stop___cache_fixup;
+#endif
+
#ifdef CONFIG_6xx
extern long _get_L2CR(void);
extern long _get_L3CR(void);
@@ -184,4 +184,34 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+/* Cache related sections */
+#define BEGIN_CACHE_SECTION_NESTED(label) START_FTR_SECTION(label)
+#define BEGIN_CACHE_SECTION START_FTR_SECTION(97)
+
+#define END_CACHE_SECTION_NESTED(msk, val, label) \
+ FTR_SECTION_ELSE_NESTED(label) \
+ MAKE_FTR_SECTION_ENTRY(msk, val, label, __cache_fixup)
+
+#define END_CACHE_SECTION(msk, val) \
+ END_CACHE_SECTION_NESTED(msk, val, 97)
+
+#define END_CACHE_SECTION_IFSET(msk) END_CACHE_SECTION((msk), (msk))
+#define END_CACHE_SECTION_IFCLR(msk) END_CACHE_SECTION((msk), 0)
+
+/* CACHE feature sections with alternatives, use BEGIN_FTR_SECTION to start */
+#define CACHE_SECTION_ELSE_NESTED(label) FTR_SECTION_ELSE_NESTED(label)
+#define CACHE_SECTION_ELSE CACHE_SECTION_ELSE_NESTED(97)
+#define ALT_CACHE_SECTION_END_NESTED(msk, val, label) \
+ MAKE_FTR_SECTION_ENTRY(msk, val, label, __cache_fixup)
+#define ALT_CACHE_SECTION_END_NESTED_IFSET(msk, label) \
+ ALT_CACHE_SECTION_END_NESTED(msk, msk, label)
+#define ALT_CACHE_SECTION_END_NESTED_IFCLR(msk, label) \
+ ALT_CACHE_SECTION_END_NESTED(msk, 0, label)
+#define ALT_CACHE_SECTION_END(msk, val) \
+ ALT_CACHE_SECTION_END_NESTED(msk, val, 97)
+#define ALT_CACHE_SECTION_END_IFSET(msk) \
+ ALT_CACHE_SECTION_END_NESTED_IFSET(msk, 97)
+#define ALT_CACHE_SECTION_END_IFCLR(msk) \
+ ALT_CACHE_SECTION_END_NESTED_IFCLR(msk, 97)
+
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
@@ -122,6 +122,9 @@ notrace void __init machine_init(u64 dt_ptr)
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
+ do_feature_fixups(CACHE_NOW_ON, &__start___cache_fixup,
+ &__stop___cache_fixup);
+
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
@@ -148,6 +148,14 @@ SECTIONS
__stop___fw_ftr_fixup = .;
}
#endif
+#ifdef CONFIG_PPC32
+ . = ALIGN(8);
+ __cache_fixup : AT(ADDR(__cache_fixup) - LOAD_OFFSET) {
+ __start___cache_fixup = .;
+ *(__cache_fixup)
+ __stop___cache_fixup = .;
+ }
+#endif
.init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
INIT_RAM_FS
}
@@ -87,7 +87,11 @@ _GLOBAL(memset)
add r5,r0,r5
subf r6,r0,r6
cmplwi 0,r4,0
+BEGIN_CACHE_SECTION
+ b 2f /* Use normal procedure until cache is active */
+CACHE_SECTION_ELSE
bne 2f /* Use normal procedure if r4 is not zero */
+ALT_CACHE_SECTION_END_IFCLR(CACHE_NOW_ON)
clrlwi r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
@@ -172,7 +176,19 @@ _GLOBAL(memcpy)
mtctr r0
beq 63f
53:
+ /*
+ * During early init, cache might not be active yet, so dcbz cannot be
+ * used. We put dcbtst instead of dcbz. If cache is not active, it's
+ * just like a nop. If cache is active, at least it prefetchs the line
+ * to be overwritten.
+ * Will be replaced by dcbz at runtime in machine_init()
+ */
+BEGIN_CACHE_SECTION
+ dcbtst r11,r6
+CACHE_SECTION_ELSE
dcbz r11,r6
+ALT_CACHE_SECTION_END_IFCLR(CACHE_NOW_ON)
+
COPY_16_BYTES
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES
memcpy() and memset() uses instruction dcbz to speed up copy by not wasting time loading cache line with data that will be overwritten. Some platform like mpc52xx do no have cache active at startup and can therefore not use memcpy(). Allthough no part of the code explicitly uses memcpy(), GCC makes calls to it. This patch implements fixups linked to the cache. At startup, the functions implement code that does not use dcbz: * For memcpy(), dcbz is replaced by dcbtst which is harmless when cache is not enabled, and which helps a bit (allthough not as much as dcbz) if cache is already enabled. * For memset(), it branches inconditionnally to the alternative part normally used only when setting non-zero value. That part doesn't use dcbz Once the initial MMU is set up, in machine_init() we call do_feature_fixups() which replaces the temporary instructions with the final ones. Reported-by: Michal Sojka <sojkam1@fel.cvut.cz> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> --- changes in v2: Using feature-fixups instead of hardcoded call to patch_instruction() Handling of memset() added arch/powerpc/include/asm/cache.h | 8 ++++++++ arch/powerpc/include/asm/feature-fixups.h | 30 ++++++++++++++++++++++++++++++ arch/powerpc/kernel/setup_32.c | 3 +++ arch/powerpc/kernel/vmlinux.lds.S | 8 ++++++++ arch/powerpc/lib/copy_32.S | 16 ++++++++++++++++ 5 files changed, 65 insertions(+)