Message ID | de7a813c71c4823797bb351bea8be15acae83be2.1565970465.git.christophe.leroy@c-s.fr (mailing list archive) |
---|---|
State | Rejected |
Headers | show |
Series | [RFC] powerpc: Convert ____flush_dcache_icache_phys() to C | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | warning | Failed to apply on branch next (c9633332103e55bc73d80d07ead28b95a22a85a3) |
snowpatch_ozlabs/apply_patch | fail | Failed to apply to any branch |
On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: > Resulting code (8xx with 16 bytes per cacheline and 16k pages) > > 0000016c <__flush_dcache_icache_phys>: > 16c: 54 63 00 22 rlwinm r3,r3,0,0,17 > 170: 7d 20 00 a6 mfmsr r9 > 174: 39 40 04 00 li r10,1024 > 178: 55 28 07 34 rlwinm r8,r9,0,28,26 > 17c: 7c 67 1b 78 mr r7,r3 > 180: 7d 49 03 a6 mtctr r10 > 184: 7d 00 01 24 mtmsr r8 > 188: 4c 00 01 2c isync > 18c: 7c 00 18 6c dcbst 0,r3 > 190: 38 63 00 10 addi r3,r3,16 > 194: 42 00 ff f8 bdnz 18c <__flush_dcache_icache_phys+0x20> > 198: 7c 00 04 ac hwsync > 19c: 7d 49 03 a6 mtctr r10 > 1a0: 7c 00 3f ac icbi 0,r7 > 1a4: 38 e7 00 10 addi r7,r7,16 > 1a8: 42 00 ff f8 bdnz 1a0 <__flush_dcache_icache_phys+0x34> > 1ac: 7c 00 04 ac hwsync > 1b0: 7d 20 01 24 mtmsr r9 > 1b4: 4c 00 01 2c isync > 1b8: 4e 80 00 20 blr > > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> > --- > This patch is on top of Alastair's series "powerpc: convert cache > asm to C" > Patch 3 of that series should touch __flush_dcache_icache_phys and > this > patch could come just after patch 3. > > arch/powerpc/include/asm/cacheflush.h | 8 +++++ > arch/powerpc/mm/mem.c | 55 > ++++++++++++++++++++++++++++------- > 2 files changed, 53 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/cacheflush.h > b/arch/powerpc/include/asm/cacheflush.h > index 1826bf2cc137..bf4f2dc4eb76 100644 > --- a/arch/powerpc/include/asm/cacheflush.h > +++ b/arch/powerpc/include/asm/cacheflush.h > @@ -47,6 +47,14 @@ void flush_icache_user_range(struct vm_area_struct > *vma, > struct page *page, unsigned long > addr, > int len); > void flush_dcache_icache_page(struct page *page); > +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) > +void __flush_dcache_icache_phys(unsigned long physaddr); > +#else > +static inline void __flush_dcache_icache_phys(unsigned long > physaddr) > +{ > + BUG(); > +} > +#endif > > /** > * flush_dcache_range(): Write any modified data cache blocks out to > memory and invalidate them. > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index 43be99de7c9a..43009f9227c4 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -402,6 +402,50 @@ void flush_dcache_page(struct page *page) > } > EXPORT_SYMBOL(flush_dcache_page); > > +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) > +void __flush_dcache_icache_phys(unsigned long physaddr) > +{ > + unsigned long bytes = l1_dcache_bytes(); > + unsigned long nb = PAGE_SIZE / bytes; > + unsigned long addr = physaddr & PAGE_MASK; > + unsigned long msr, msr0; > + unsigned long loop1 = addr, loop2 = addr; > + > + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { > + /* For a snooping icache, we still need a dummy icbi to > purge all the > + * prefetched instructions from the ifetch buffers. We > also need a sync > + * before the icbi to order the the actual stores to > memory that might > + * have modified instructions with the icbi. > + */ > + mb(); /* sync */ > + icbi((void *)addr); > + mb(); /* sync */ > + isync(); > + return; > + } > + msr0 = mfmsr(); > + msr = msr0 & ~MSR_DR; > + asm volatile( > + " mtctr %2;" > + " mtmsr %3;" > + " isync;" > + "0: dcbst 0, %0;" > + " addi %0, %0, %4;" > + " bdnz 0b;" > + " sync;" > + " mtctr %2;" > + "1: icbi 0, %1;" > + " addi %1, %1, %4;" > + " bdnz 1b;" > + " sync;" > + " mtmsr %5;" > + " isync;" > + : "+r" (loop1), "+r" (loop2) > + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) > + : "ctr", "memory"); > +} > +#endif > + > void flush_dcache_icache_page(struct page *page) > { > #ifdef CONFIG_HUGETLB_PAGE > @@ -419,16 +463,7 @@ void flush_dcache_icache_page(struct page *page) > __flush_dcache_icache(start); > kunmap_atomic(start); > } else { > - unsigned long msr = mfmsr(); > - > - /* Clear the DR bit so that we operate on physical > - * rather than virtual addresses > - */ > - mtmsr(msr & ~(MSR_DR)); > - > - __flush_dcache_icache((void *)physaddr); > - > - mtmsr(msr); > + __flush_dcache_icache_phys(page_to_pfn(page) << > PAGE_SHIFT); > } > #endif > } Thanks Christophe, I'm trying a somewhat different approach that requires less knowledge of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside this function. The code below is not a patch as my tree is a bit messy, sorry: /** * flush_dcache_icache_phys() - Flush a page by it's physical address * @addr: the physical address of the page */ static void flush_dcache_icache_phys(unsigned long addr) { register unsigned long msr; register unsigned long dlines = PAGE_SIZE >> l1_dcache_shift(); register unsigned long dbytes = l1_dcache_bytes(); register unsigned long ilines = PAGE_SIZE >> l1_icache_shift(); register unsigned long ibytes = l1_icache_bytes(); register unsigned long i; register unsigned long address = addr; /* * Clear the DR bit so that we operate on physical * rather than virtual addresses */ msr = mfmsr(); mtmsr(msr & ~(MSR_DR)); /* Write out the data cache */ for (i = 0; i < dlines; i++, address += dbytes) dcbst((void *)address); /* Invalidate the instruction cache */ address = addr; for (i = 0; i < ilines; i++, address += ibytes) icbi((void *)address); mtmsr(msr); } void test_flush_phys(unsigned long addr) { flush_dcache_icache_phys(addr); } This gives the following assembler (using pmac32_defconfig): 000003cc <test_flush_phys>: 3cc: 94 21 ff f0 stwu r1,-16(r1) 3d0: 7d 00 00 a6 mfmsr r8 3d4: 55 09 07 34 rlwinm r9,r8,0,28,26 3d8: 7d 20 01 24 mtmsr r9 3dc: 39 20 00 80 li r9,128 3e0: 7d 29 03 a6 mtctr r9 3e4: 39 43 10 00 addi r10,r3,4096 3e8: 7c 69 1b 78 mr r9,r3 3ec: 7c 00 48 6c dcbst 0,r9 3f0: 39 29 00 20 addi r9,r9,32 3f4: 42 00 ff f8 bdnz 3ec <test_flush_phys+0x20> 3f8: 7c 00 1f ac icbi 0,r3 3fc: 38 63 00 20 addi r3,r3,32 400: 7f 8a 18 40 cmplw cr7,r10,r3 404: 40 9e ff f4 bne cr7,3f8 <test_flush_phys+0x2c> 408: 7d 00 01 24 mtmsr r8 40c: 38 21 00 10 addi r1,r1,16 410: 4e 80 00 20 blr
Le 20/08/2019 à 06:36, Alastair D'Silva a écrit : > On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: [...] > > > Thanks Christophe, > > I'm trying a somewhat different approach that requires less knowledge > of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside this > function. The code below is not a patch as my tree is a bit messy, > sorry: Can we be 100% sure that GCC won't add any code accessing some global data or stack while the Data MMU is OFF ? Christophe > > /** > * flush_dcache_icache_phys() - Flush a page by it's physical address > * @addr: the physical address of the page > */ > static void flush_dcache_icache_phys(unsigned long addr) > { > register unsigned long msr; > register unsigned long dlines = PAGE_SIZE >> l1_dcache_shift(); > register unsigned long dbytes = l1_dcache_bytes(); > register unsigned long ilines = PAGE_SIZE >> l1_icache_shift(); > register unsigned long ibytes = l1_icache_bytes(); > register unsigned long i; > register unsigned long address = addr; > > /* > * Clear the DR bit so that we operate on physical > * rather than virtual addresses > */ > msr = mfmsr(); > mtmsr(msr & ~(MSR_DR)); > > /* Write out the data cache */ > for (i = 0; i < dlines; i++, address += dbytes) > dcbst((void *)address); > > /* Invalidate the instruction cache */ > address = addr; > for (i = 0; i < ilines; i++, address += ibytes) > icbi((void *)address); > > mtmsr(msr); > } > > void test_flush_phys(unsigned long addr) > { > flush_dcache_icache_phys(addr); > } > > > This gives the following assembler (using pmac32_defconfig): > 000003cc <test_flush_phys>: > 3cc: 94 21 ff f0 stwu r1,-16(r1) > 3d0: 7d 00 00 a6 mfmsr r8 > 3d4: 55 09 07 34 rlwinm r9,r8,0,28,26 > 3d8: 7d 20 01 24 mtmsr r9 > 3dc: 39 20 00 80 li r9,128 > 3e0: 7d 29 03 a6 mtctr r9 > 3e4: 39 43 10 00 addi r10,r3,4096 > 3e8: 7c 69 1b 78 mr r9,r3 > 3ec: 7c 00 48 6c dcbst 0,r9 > 3f0: 39 29 00 20 addi r9,r9,32 > 3f4: 42 00 ff f8 bdnz 3ec <test_flush_phys+0x20> > 3f8: 7c 00 1f ac icbi 0,r3 > 3fc: 38 63 00 20 addi r3,r3,32 > 400: 7f 8a 18 40 cmplw cr7,r10,r3 > 404: 40 9e ff f4 bne cr7,3f8 <test_flush_phys+0x2c> > 408: 7d 00 01 24 mtmsr r8 > 40c: 38 21 00 10 addi r1,r1,16 > 410: 4e 80 00 20 blr > >
On Wed, 2019-08-21 at 22:27 +0200, Christophe Leroy wrote: > > Le 20/08/2019 à 06:36, Alastair D'Silva a écrit : > > On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: > > [...] > > > > > Thanks Christophe, > > > > I'm trying a somewhat different approach that requires less > > knowledge > > of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside this > > function. The code below is not a patch as my tree is a bit messy, > > sorry: > > Can we be 100% sure that GCC won't add any code accessing some > global > data or stack while the Data MMU is OFF ? > > Christophe > +mpe I'm not sure how we would go about making such a guarantee, but I've tied every variable used to a register and addr is passed in a register, so there is no stack usage, and every call in there only operates on it's operands. The calls to the inline cache helpers (for the PPC32 case) are all constants, so I can't see a reasonable scenario where there would be a function call and reordered to after the DR bit is turned off, but I guess if we want to be paranoid, we could always add an mb() call before the DR bit is manipulated to prevent the compiler from reordering across the section where the data MMU is disabled. > > > /** > > * flush_dcache_icache_phys() - Flush a page by it's physical > > address > > * @addr: the physical address of the page > > */ > > static void flush_dcache_icache_phys(unsigned long addr) > > { > > register unsigned long msr; > > register unsigned long dlines = PAGE_SIZE >> l1_dcache_shift(); > > register unsigned long dbytes = l1_dcache_bytes(); > > register unsigned long ilines = PAGE_SIZE >> l1_icache_shift(); > > register unsigned long ibytes = l1_icache_bytes(); > > register unsigned long i; > > register unsigned long address = addr; > > > > /* > > * Clear the DR bit so that we operate on physical > > * rather than virtual addresses > > */ > > msr = mfmsr(); > > mtmsr(msr & ~(MSR_DR)); > > > > /* Write out the data cache */ > > for (i = 0; i < dlines; i++, address += dbytes) > > dcbst((void *)address); > > > > /* Invalidate the instruction cache */ > > address = addr; > > for (i = 0; i < ilines; i++, address += ibytes) > > icbi((void *)address); > > > > mtmsr(msr); > > } > > > > void test_flush_phys(unsigned long addr) > > { > > flush_dcache_icache_phys(addr); > > } > > > > > > This gives the following assembler (using pmac32_defconfig): > > 000003cc <test_flush_phys>: > > 3cc: 94 21 ff f0 stwu r1,-16(r1) > > 3d0: 7d 00 00 a6 mfmsr r8 > > 3d4: 55 09 07 34 rlwinm r9,r8,0,28,26 > > 3d8: 7d 20 01 24 mtmsr r9 > > 3dc: 39 20 00 80 li r9,128 > > 3e0: 7d 29 03 a6 mtctr r9 > > 3e4: 39 43 10 00 addi r10,r3,4096 > > 3e8: 7c 69 1b 78 mr r9,r3 > > 3ec: 7c 00 48 6c dcbst 0,r9 > > 3f0: 39 29 00 20 addi r9,r9,32 > > 3f4: 42 00 ff f8 bdnz 3ec <test_flush_phys+0x20> > > 3f8: 7c 00 1f ac icbi 0,r3 > > 3fc: 38 63 00 20 addi r3,r3,32 > > 400: 7f 8a 18 40 cmplw cr7,r10,r3 > > 404: 40 9e ff f4 bne cr7,3f8 <test_flush_phys+0x2c> > > 408: 7d 00 01 24 mtmsr r8 > > 40c: 38 21 00 10 addi r1,r1,16 > > 410: 4e 80 00 20 blr > > > >
Le 22/08/2019 à 02:27, Alastair D'Silva a écrit : > On Wed, 2019-08-21 at 22:27 +0200, Christophe Leroy wrote: >> >> Le 20/08/2019 à 06:36, Alastair D'Silva a écrit : >>> On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: >> >> [...] >> >>> >>> Thanks Christophe, >>> >>> I'm trying a somewhat different approach that requires less >>> knowledge >>> of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside this >>> function. The code below is not a patch as my tree is a bit messy, >>> sorry: >> >> Can we be 100% sure that GCC won't add any code accessing some >> global >> data or stack while the Data MMU is OFF ? >> >> Christophe >> > > +mpe > > I'm not sure how we would go about making such a guarantee, but I've > tied every variable used to a register and addr is passed in a > register, so there is no stack usage, and every call in there only > operates on it's operands. > > The calls to the inline cache helpers (for the PPC32 case) are all > constants, so I can't see a reasonable scenario where there would be a > function call and reordered to after the DR bit is turned off, but I > guess if we want to be paranoid, we could always add an mb() call > before the DR bit is manipulated to prevent the compiler from > reordering across the section where the data MMU is disabled. > > Anyway, I think the benefit of converting that function to C is pretty small. flush_dcache_range() and friends were converted to C mainly in order to inline them. But this __flush_dcache_icache_phys() is too big to be worth inlining, yet small and stable enough to remain in assembly for the time being. So I suggest you keep it aside your series for now, just move PURGE_PREFETCHED_INS inside it directly as it will be the only remaining user of it. Christophe
On Thu, 2019-08-22 at 07:06 +0200, Christophe Leroy wrote: > > Le 22/08/2019 à 02:27, Alastair D'Silva a écrit : > > On Wed, 2019-08-21 at 22:27 +0200, Christophe Leroy wrote: > > > Le 20/08/2019 à 06:36, Alastair D'Silva a écrit : > > > > On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: > > > > > > [...] > > > > > > > Thanks Christophe, > > > > > > > > I'm trying a somewhat different approach that requires less > > > > knowledge > > > > of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside > > > > this > > > > function. The code below is not a patch as my tree is a bit > > > > messy, > > > > sorry: > > > > > > Can we be 100% sure that GCC won't add any code accessing some > > > global > > > data or stack while the Data MMU is OFF ? > > > > > > Christophe > > > > > > > +mpe > > > > I'm not sure how we would go about making such a guarantee, but > > I've > > tied every variable used to a register and addr is passed in a > > register, so there is no stack usage, and every call in there only > > operates on it's operands. > > > > The calls to the inline cache helpers (for the PPC32 case) are all > > constants, so I can't see a reasonable scenario where there would > > be a > > function call and reordered to after the DR bit is turned off, but > > I > > guess if we want to be paranoid, we could always add an mb() call > > before the DR bit is manipulated to prevent the compiler from > > reordering across the section where the data MMU is disabled. > > > > > > Anyway, I think the benefit of converting that function to C is > pretty > small. flush_dcache_range() and friends were converted to C mainly > in > order to inline them. But this __flush_dcache_icache_phys() is too > big > to be worth inlining, yet small and stable enough to remain in > assembly > for the time being. > I disagree on this point, after converting it to C, using 44x/currituck.defconfig, the compiler definitely will inline it (noting that there is only 1 caller of it): 00000134 <flush_dcache_icache_page>: 134: 94 21 ff f0 stwu r1,-16(r1) 138: 3d 20 00 00 lis r9,0 13c: 81 29 00 00 lwz r9,0(r9) 140: 7c 08 02 a6 mflr r0 144: 38 81 00 0c addi r4,r1,12 148: 90 01 00 14 stw r0,20(r1) 14c: 91 21 00 0c stw r9,12(r1) 150: 48 00 00 01 bl 150 <flush_dcache_icache_page+0x1c> 154: 39 00 00 20 li r8,32 158: 39 43 10 00 addi r10,r3,4096 15c: 7c 69 1b 78 mr r9,r3 160: 7d 09 03 a6 mtctr r8 164: 7c 00 48 6c dcbst 0,r9 168: 39 29 00 80 addi r9,r9,128 16c: 42 00 ff f8 bdnz 164 <flush_dcache_icache_page+0x30> 170: 7c 00 04 ac hwsync 174: 7c 69 1b 78 mr r9,r3 178: 7c 00 4f ac icbi 0,r9 17c: 39 29 00 80 addi r9,r9,128 180: 7f 8a 48 40 cmplw cr7,r10,r9 184: 40 9e ff f4 bne cr7,178 <flush_dcache_icache_page+0x44> 188: 7c 00 04 ac hwsync 18c: 4c 00 01 2c isync 190: 80 01 00 14 lwz r0,20(r1) 194: 38 21 00 10 addi r1,r1,16 198: 7c 08 03 a6 mtlr r0 19c: 48 00 00 00 b 19c <flush_dcache_icache_page+0x68> > So I suggest you keep it aside your series for now, just move > PURGE_PREFETCHED_INS inside it directly as it will be the only > remaining > user of it. > > Christophe
"Alastair D'Silva" <alastair@au1.ibm.com> writes: > On Wed, 2019-08-21 at 22:27 +0200, Christophe Leroy wrote: >> >> Le 20/08/2019 à 06:36, Alastair D'Silva a écrit : >> > On Fri, 2019-08-16 at 15:52 +0000, Christophe Leroy wrote: >> >> [...] >> >> > >> > Thanks Christophe, >> > >> > I'm trying a somewhat different approach that requires less >> > knowledge >> > of assembler. Handling of CPU_FTR_COHERENT_ICACHE is outside this >> > function. The code below is not a patch as my tree is a bit messy, >> > sorry: >> >> Can we be 100% sure that GCC won't add any code accessing some >> global data or stack while the Data MMU is OFF ? > > +mpe > > I'm not sure how we would go about making such a guarantee, but I've > tied every variable used to a register and addr is passed in a > register, so there is no stack usage, and every call in there only > operates on it's operands. That's not safe, I can believe it happens to work but the compiler people will laugh at us if it ever breaks. Let's leave it in asm. cheers
On Mon, Sep 02, 2019 at 11:48:59AM +1000, Michael Ellerman wrote: > "Alastair D'Silva" <alastair@au1.ibm.com> writes: > > On Wed, 2019-08-21 at 22:27 +0200, Christophe Leroy wrote: > >> Can we be 100% sure that GCC won't add any code accessing some > >> global data or stack while the Data MMU is OFF ? > > > > +mpe > > > > I'm not sure how we would go about making such a guarantee, but I've > > tied every variable used to a register and addr is passed in a > > register, so there is no stack usage, and every call in there only > > operates on it's operands. > > That's not safe, I can believe it happens to work but the compiler > people will laugh at us if it ever breaks. Yes. Sorry. > Let's leave it in asm. +1 The asm is simpler, more readable, more maintainable, and perhaps more performant even. Plus the being-laughed-at issue. Segher
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 1826bf2cc137..bf4f2dc4eb76 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -47,6 +47,14 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, int len); void flush_dcache_icache_page(struct page *page); +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) +void __flush_dcache_icache_phys(unsigned long physaddr); +#else +static inline void __flush_dcache_icache_phys(unsigned long physaddr) +{ + BUG(); +} +#endif /** * flush_dcache_range(): Write any modified data cache blocks out to memory and invalidate them. diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 43be99de7c9a..43009f9227c4 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -402,6 +402,50 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE) +void __flush_dcache_icache_phys(unsigned long physaddr) +{ + unsigned long bytes = l1_dcache_bytes(); + unsigned long nb = PAGE_SIZE / bytes; + unsigned long addr = physaddr & PAGE_MASK; + unsigned long msr, msr0; + unsigned long loop1 = addr, loop2 = addr; + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { + /* For a snooping icache, we still need a dummy icbi to purge all the + * prefetched instructions from the ifetch buffers. We also need a sync + * before the icbi to order the the actual stores to memory that might + * have modified instructions with the icbi. + */ + mb(); /* sync */ + icbi((void *)addr); + mb(); /* sync */ + isync(); + return; + } + msr0 = mfmsr(); + msr = msr0 & ~MSR_DR; + asm volatile( + " mtctr %2;" + " mtmsr %3;" + " isync;" + "0: dcbst 0, %0;" + " addi %0, %0, %4;" + " bdnz 0b;" + " sync;" + " mtctr %2;" + "1: icbi 0, %1;" + " addi %1, %1, %4;" + " bdnz 1b;" + " sync;" + " mtmsr %5;" + " isync;" + : "+r" (loop1), "+r" (loop2) + : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0) + : "ctr", "memory"); +} +#endif + void flush_dcache_icache_page(struct page *page) { #ifdef CONFIG_HUGETLB_PAGE @@ -419,16 +463,7 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache(start); kunmap_atomic(start); } else { - unsigned long msr = mfmsr(); - - /* Clear the DR bit so that we operate on physical - * rather than virtual addresses - */ - mtmsr(msr & ~(MSR_DR)); - - __flush_dcache_icache((void *)physaddr); - - mtmsr(msr); + __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); } #endif }
Resulting code (8xx with 16 bytes per cacheline and 16k pages) 0000016c <__flush_dcache_icache_phys>: 16c: 54 63 00 22 rlwinm r3,r3,0,0,17 170: 7d 20 00 a6 mfmsr r9 174: 39 40 04 00 li r10,1024 178: 55 28 07 34 rlwinm r8,r9,0,28,26 17c: 7c 67 1b 78 mr r7,r3 180: 7d 49 03 a6 mtctr r10 184: 7d 00 01 24 mtmsr r8 188: 4c 00 01 2c isync 18c: 7c 00 18 6c dcbst 0,r3 190: 38 63 00 10 addi r3,r3,16 194: 42 00 ff f8 bdnz 18c <__flush_dcache_icache_phys+0x20> 198: 7c 00 04 ac hwsync 19c: 7d 49 03 a6 mtctr r10 1a0: 7c 00 3f ac icbi 0,r7 1a4: 38 e7 00 10 addi r7,r7,16 1a8: 42 00 ff f8 bdnz 1a0 <__flush_dcache_icache_phys+0x34> 1ac: 7c 00 04 ac hwsync 1b0: 7d 20 01 24 mtmsr r9 1b4: 4c 00 01 2c isync 1b8: 4e 80 00 20 blr Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> --- This patch is on top of Alastair's series "powerpc: convert cache asm to C" Patch 3 of that series should touch __flush_dcache_icache_phys and this patch could come just after patch 3. arch/powerpc/include/asm/cacheflush.h | 8 +++++ arch/powerpc/mm/mem.c | 55 ++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 10 deletions(-)