Message ID | 194fb7bc973ef2ce43016c97dd32f2b2dcbae4e7.1566491310.git.christophe.leroy@c-s.fr (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | powerpc/vdso32 enhancement and optimisation | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch next (0e4523c0b4f64eaf7abe59e143e6bdf8f972acff) |
snowpatch_ozlabs/checkpatch | fail | total: 3 errors, 0 warnings, 1 checks, 138 lines checked |
Hi Christophe, Christophe Leroy <christophe.leroy@c-s.fr> writes: > __get_datapage() is only a few instructions to retrieve the > address of the page where the kernel stores data to the VDSO. > > By inlining this function into its users, a bl/blr pair and > a mflr/mtlr pair is avoided, plus a few reg moves. > > The improvement is noticeable (about 55 nsec/call on an 8xx) > > vdsotest before the patch: > gettimeofday: vdso: 731 nsec/call > clock-gettime-realtime-coarse: vdso: 668 nsec/call > clock-gettime-monotonic-coarse: vdso: 745 nsec/call > > vdsotest after the patch: > gettimeofday: vdso: 677 nsec/call > clock-gettime-realtime-coarse: vdso: 613 nsec/call > clock-gettime-monotonic-coarse: vdso: 690 nsec/call > > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> > --- > arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- > arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- > arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ > arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- > 4 files changed, 26 insertions(+), 37 deletions(-) > create mode 100644 arch/powerpc/kernel/vdso32/datapage.h The datapage.h file should ideally be moved under include/asm, then we can use the same for powerpc64 too. Santosh > > diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S > index 7f882e7b9f43..e9453837e4ee 100644 > --- a/arch/powerpc/kernel/vdso32/cacheflush.S > +++ b/arch/powerpc/kernel/vdso32/cacheflush.S > @@ -10,6 +10,8 @@ > #include <asm/vdso.h> > #include <asm/asm-offsets.h> > > +#include "datapage.h" > + > .text > > /* > @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) > .cfi_startproc > mflr r12 > .cfi_register lr,r12 > - mr r11,r3 > - bl __get_datapage@local > + get_datapage r10, r0 > mtlr r12 > - mr r10,r3 > > lwz r7,CFG_DCACHE_BLOCKSZ(r10) > addi r5,r7,-1 > - andc r6,r11,r5 /* round low to line bdy */ > + andc r6,r3,r5 /* round low to line bdy */ > subf r8,r6,r4 /* compute length */ > add r8,r8,r5 /* ensure we get enough */ > lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) > @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) > > lwz r7,CFG_ICACHE_BLOCKSZ(r10) > addi r5,r7,-1 > - andc r6,r11,r5 /* round low to line bdy */ > + andc r6,r3,r5 /* round low to line bdy */ > subf r8,r6,r4 /* compute length */ > add r8,r8,r5 > lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) > diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S > index 6984125b9fc0..d480d2d4a3fe 100644 > --- a/arch/powerpc/kernel/vdso32/datapage.S > +++ b/arch/powerpc/kernel/vdso32/datapage.S > @@ -11,34 +11,13 @@ > #include <asm/unistd.h> > #include <asm/vdso.h> > > +#include "datapage.h" > + > .text > .global __kernel_datapage_offset; > __kernel_datapage_offset: > .long 0 > > -V_FUNCTION_BEGIN(__get_datapage) > - .cfi_startproc > - /* We don't want that exposed or overridable as we want other objects > - * to be able to bl directly to here > - */ > - .protected __get_datapage > - .hidden __get_datapage > - > - mflr r0 > - .cfi_register lr,r0 > - > - bcl 20,31,data_page_branch > -data_page_branch: > - mflr r3 > - mtlr r0 > - addi r3, r3, __kernel_datapage_offset-data_page_branch > - lwz r0,0(r3) > - .cfi_restore lr > - add r3,r0,r3 > - blr > - .cfi_endproc > -V_FUNCTION_END(__get_datapage) > - > /* > * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; > * > @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) > mflr r12 > .cfi_register lr,r12 > mr r4,r3 > - bl __get_datapage@local > + get_datapage r3, r0 > mtlr r12 > addi r3,r3,CFG_SYSCALL_MAP32 > cmpli cr0,r4,0 > @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) > .cfi_startproc > mflr r12 > .cfi_register lr,r12 > - bl __get_datapage@local > + get_datapage r3, r0 > lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) > lwz r3,CFG_TB_TICKS_PER_SEC(r3) > mtlr r12 > diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h > new file mode 100644 > index 000000000000..74f4f57c2da8 > --- /dev/null > +++ b/arch/powerpc/kernel/vdso32/datapage.h > @@ -0,0 +1,11 @@ > +/* SPDX-License-Identifier: GPL-2.0-or-later */ > + > +.macro get_datapage ptr, tmp > + bcl 20,31,.+4 > + mflr \ptr > + addi \ptr, \ptr, __kernel_datapage_offset - (.-4) > + lwz \tmp, 0(\ptr) > + add \ptr, \tmp, \ptr > +.endm > + > + > diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S > index 355b537d327a..3e55cba19f44 100644 > --- a/arch/powerpc/kernel/vdso32/gettimeofday.S > +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S > @@ -12,6 +12,8 @@ > #include <asm/asm-offsets.h> > #include <asm/unistd.h> > > +#include "datapage.h" > + > /* Offset for the low 32-bit part of a field of long type */ > #ifdef CONFIG_PPC64 > #define LOPART 4 > @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) > > mr r10,r3 /* r10 saves tv */ > mr r11,r4 /* r11 saves tz */ > - bl __get_datapage@local /* get data page */ > - mr r9, r3 /* datapage ptr in r9 */ > + get_datapage r9, r0 > cmplwi r10,0 /* check if tv is NULL */ > beq 3f > lis r7,1000000@ha /* load up USEC_PER_SEC */ > @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) > mflr r12 /* r12 saves lr */ > .cfi_register lr,r12 > mr r11,r4 /* r11 saves tp */ > - bl __get_datapage@local /* get data page */ > - mr r9,r3 /* datapage ptr in r9 */ > + get_datapage r9, r0 > lis r7,NSEC_PER_SEC@h /* want nanoseconds */ > ori r7,r7,NSEC_PER_SEC@l > beq cr5, .Lcoarse_clocks > @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) > > mflr r12 > .cfi_register lr,r12 > - bl __get_datapage@local /* get data page */ > + get_datapage r3, r0 > lwz r5, CLOCK_HRTIMER_RES(r3) > mtlr r12 > li r3,0 > @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time) > .cfi_register lr,r12 > > mr r11,r3 /* r11 holds t */ > - bl __get_datapage@local > - mr r9, r3 /* datapage ptr in r9 */ > + get_datapage r9, r0 > > lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9) > > -- > 2.13.3
Hi Santosh, Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit : > Hi Christophe, > > Christophe Leroy <christophe.leroy@c-s.fr> writes: > >> __get_datapage() is only a few instructions to retrieve the >> address of the page where the kernel stores data to the VDSO. >> >> By inlining this function into its users, a bl/blr pair and >> a mflr/mtlr pair is avoided, plus a few reg moves. >> >> The improvement is noticeable (about 55 nsec/call on an 8xx) >> >> vdsotest before the patch: >> gettimeofday: vdso: 731 nsec/call >> clock-gettime-realtime-coarse: vdso: 668 nsec/call >> clock-gettime-monotonic-coarse: vdso: 745 nsec/call >> >> vdsotest after the patch: >> gettimeofday: vdso: 677 nsec/call >> clock-gettime-realtime-coarse: vdso: 613 nsec/call >> clock-gettime-monotonic-coarse: vdso: 690 nsec/call >> >> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> >> --- >> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- >> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- >> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ >> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- >> 4 files changed, 26 insertions(+), 37 deletions(-) >> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h > > The datapage.h file should ideally be moved under include/asm, then we can use > the same for powerpc64 too. I have a more ambitious project indeed. Most of the VDSO code is duplicated between vdso32 and vdso64. I'm aiming at merging everything into a single source code. This means we would have to generate vdso32.so and vdso64.so out of the same source files. Any idea on how to do that ? I'm not too good at creating Makefiles. I guess we would have everything in arch/powerpc/kernel/vdso/ and would have to build the objects twice, once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/ Christophe
Christophe Leroy <christophe.leroy@c-s.fr> writes: > Hi Santosh, > > Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit : >> Hi Christophe, >> >> Christophe Leroy <christophe.leroy@c-s.fr> writes: >> >>> __get_datapage() is only a few instructions to retrieve the >>> address of the page where the kernel stores data to the VDSO. >>> >>> By inlining this function into its users, a bl/blr pair and >>> a mflr/mtlr pair is avoided, plus a few reg moves. >>> >>> The improvement is noticeable (about 55 nsec/call on an 8xx) >>> >>> vdsotest before the patch: >>> gettimeofday: vdso: 731 nsec/call >>> clock-gettime-realtime-coarse: vdso: 668 nsec/call >>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call >>> >>> vdsotest after the patch: >>> gettimeofday: vdso: 677 nsec/call >>> clock-gettime-realtime-coarse: vdso: 613 nsec/call >>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call >>> >>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> >>> --- >>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- >>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- >>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ >>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- >>> 4 files changed, 26 insertions(+), 37 deletions(-) >>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h >> >> The datapage.h file should ideally be moved under include/asm, then we can use >> the same for powerpc64 too. > > I have a more ambitious project indeed. > > Most of the VDSO code is duplicated between vdso32 and vdso64. I'm > aiming at merging everything into a single source code. > > This means we would have to generate vdso32.so and vdso64.so out of the > same source files. Any idea on how to do that ? I'm not too good at > creating Makefiles. I guess we would have everything in > arch/powerpc/kernel/vdso/ and would have to build the objects twice, > once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/ Should we need to build the objects twice? For 64 bit config it is going to be a 64 bit build else a 32 bit build. It should suffice to get the single source code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional compilation. Am I missing something when you say build twice? Thanks, Santosh
Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit : > Christophe Leroy <christophe.leroy@c-s.fr> writes: > >> Hi Santosh, >> >> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit : >>> Hi Christophe, >>> >>> Christophe Leroy <christophe.leroy@c-s.fr> writes: >>> >>>> __get_datapage() is only a few instructions to retrieve the >>>> address of the page where the kernel stores data to the VDSO. >>>> >>>> By inlining this function into its users, a bl/blr pair and >>>> a mflr/mtlr pair is avoided, plus a few reg moves. >>>> >>>> The improvement is noticeable (about 55 nsec/call on an 8xx) >>>> >>>> vdsotest before the patch: >>>> gettimeofday: vdso: 731 nsec/call >>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call >>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call >>>> >>>> vdsotest after the patch: >>>> gettimeofday: vdso: 677 nsec/call >>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call >>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call >>>> >>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> >>>> --- >>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- >>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- >>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ >>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- >>>> 4 files changed, 26 insertions(+), 37 deletions(-) >>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h >>> >>> The datapage.h file should ideally be moved under include/asm, then we can use >>> the same for powerpc64 too. >> >> I have a more ambitious project indeed. >> >> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm >> aiming at merging everything into a single source code. >> >> This means we would have to generate vdso32.so and vdso64.so out of the >> same source files. Any idea on how to do that ? I'm not too good at >> creating Makefiles. I guess we would have everything in >> arch/powerpc/kernel/vdso/ and would have to build the objects twice, >> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/ > > Should we need to build the objects twice? For 64 bit config it is going to be > a 64 bit build else a 32 bit build. It should suffice to get the single source > code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional > compilation. Am I missing something when you say build twice? > IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for 32bits user apps. In arch/powerpc/kernel/Makefile, you have: obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC64) += vdso64/ And in arch/powerpc/platforms/Kconfig.cputype, you have: config VDSO32 def_bool y depends on PPC32 || CPU_BIG_ENDIAN help This symbol controls whether we build the 32-bit VDSO. We obviously want to do that if we're building a 32-bit kernel. If we're building a 64-bit kernel then we only want a 32-bit VDSO if we're building for big endian. That is because the only little endian configuration we support is ppc64le which is 64-bit only. Christophe
Christophe Leroy <christophe.leroy@c-s.fr> writes: > Le 13/09/2019 à 15:31, Santosh Sivaraj a écrit : >> Christophe Leroy <christophe.leroy@c-s.fr> writes: >> >>> Hi Santosh, >>> >>> Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit : >>>> Hi Christophe, >>>> >>>> Christophe Leroy <christophe.leroy@c-s.fr> writes: >>>> >>>>> __get_datapage() is only a few instructions to retrieve the >>>>> address of the page where the kernel stores data to the VDSO. >>>>> >>>>> By inlining this function into its users, a bl/blr pair and >>>>> a mflr/mtlr pair is avoided, plus a few reg moves. >>>>> >>>>> The improvement is noticeable (about 55 nsec/call on an 8xx) >>>>> >>>>> vdsotest before the patch: >>>>> gettimeofday: vdso: 731 nsec/call >>>>> clock-gettime-realtime-coarse: vdso: 668 nsec/call >>>>> clock-gettime-monotonic-coarse: vdso: 745 nsec/call >>>>> >>>>> vdsotest after the patch: >>>>> gettimeofday: vdso: 677 nsec/call >>>>> clock-gettime-realtime-coarse: vdso: 613 nsec/call >>>>> clock-gettime-monotonic-coarse: vdso: 690 nsec/call >>>>> >>>>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> >>>>> --- >>>>> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- >>>>> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- >>>>> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ >>>>> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- >>>>> 4 files changed, 26 insertions(+), 37 deletions(-) >>>>> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h >>>> >>>> The datapage.h file should ideally be moved under include/asm, then we can use >>>> the same for powerpc64 too. >>> >>> I have a more ambitious project indeed. >>> >>> Most of the VDSO code is duplicated between vdso32 and vdso64. I'm >>> aiming at merging everything into a single source code. >>> >>> This means we would have to generate vdso32.so and vdso64.so out of the >>> same source files. Any idea on how to do that ? I'm not too good at >>> creating Makefiles. I guess we would have everything in >>> arch/powerpc/kernel/vdso/ and would have to build the objects twice, >>> once in arch/powerpc/kernel/vdso32/ and once in arch/powerpc/kernel/vdso64/ >> >> Should we need to build the objects twice? For 64 bit config it is going to be >> a 64 bit build else a 32 bit build. It should suffice to get the single source >> code compile for both, maybe with macros or (!)CONFIG_PPC64 conditional >> compilation. Am I missing something when you say build twice? >> > > IIUC, on PPC64 we build vdso64 for 64bits user apps and vdso32 for > 32bits user apps. > > In arch/powerpc/kernel/Makefile, you have: > > obj-$(CONFIG_VDSO32) += vdso32/ > obj-$(CONFIG_PPC64) += vdso64/ > > And in arch/powerpc/platforms/Kconfig.cputype, you have: > > config VDSO32 > def_bool y > depends on PPC32 || CPU_BIG_ENDIAN > help > This symbol controls whether we build the 32-bit VDSO. We obviously > want to do that if we're building a 32-bit kernel. If we're building > a 64-bit kernel then we only want a 32-bit VDSO if we're building for > big endian. That is because the only little endian configuration we > support is ppc64le which is 64-bit only. > I didn't know we build 32 bit vdso for 64 bit big endians. But I don't think its difficult to do it, might be a bit tricky. We can have two targets from the same source. SRC = vdso/*.c OBJS_32 = $(SRC:.c=vdso32/.o) OBJS_64 = $(SRC:.c=vdso64/.o) Something like this would work. Of course, this is out of memory, might have to do something slightly different for the Makefiles in kernel. Thanks, Santosh > > > > Christophe
Hi Santosh, Le 26/08/2019 à 07:44, Santosh Sivaraj a écrit : > Hi Christophe, > > Christophe Leroy <christophe.leroy@c-s.fr> writes: > >> __get_datapage() is only a few instructions to retrieve the >> address of the page where the kernel stores data to the VDSO. >> >> By inlining this function into its users, a bl/blr pair and >> a mflr/mtlr pair is avoided, plus a few reg moves. >> >> The improvement is noticeable (about 55 nsec/call on an 8xx) >> >> vdsotest before the patch: >> gettimeofday: vdso: 731 nsec/call >> clock-gettime-realtime-coarse: vdso: 668 nsec/call >> clock-gettime-monotonic-coarse: vdso: 745 nsec/call >> >> vdsotest after the patch: >> gettimeofday: vdso: 677 nsec/call >> clock-gettime-realtime-coarse: vdso: 613 nsec/call >> clock-gettime-monotonic-coarse: vdso: 690 nsec/call >> >> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> >> --- >> arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- >> arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- >> arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ >> arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- >> 4 files changed, 26 insertions(+), 37 deletions(-) >> create mode 100644 arch/powerpc/kernel/vdso32/datapage.h > > The datapage.h file should ideally be moved under include/asm, then we can use > the same for powerpc64 too. Finally, I added the get_datapage macro to the existing asm/vdso_datapage.h Christophe > > Santosh > >> >> diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S >> index 7f882e7b9f43..e9453837e4ee 100644 >> --- a/arch/powerpc/kernel/vdso32/cacheflush.S >> +++ b/arch/powerpc/kernel/vdso32/cacheflush.S >> @@ -10,6 +10,8 @@ >> #include <asm/vdso.h> >> #include <asm/asm-offsets.h> >> >> +#include "datapage.h" >> + >> .text >> >> /* >> @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) >> .cfi_startproc >> mflr r12 >> .cfi_register lr,r12 >> - mr r11,r3 >> - bl __get_datapage@local >> + get_datapage r10, r0 >> mtlr r12 >> - mr r10,r3 >> >> lwz r7,CFG_DCACHE_BLOCKSZ(r10) >> addi r5,r7,-1 >> - andc r6,r11,r5 /* round low to line bdy */ >> + andc r6,r3,r5 /* round low to line bdy */ >> subf r8,r6,r4 /* compute length */ >> add r8,r8,r5 /* ensure we get enough */ >> lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) >> @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) >> >> lwz r7,CFG_ICACHE_BLOCKSZ(r10) >> addi r5,r7,-1 >> - andc r6,r11,r5 /* round low to line bdy */ >> + andc r6,r3,r5 /* round low to line bdy */ >> subf r8,r6,r4 /* compute length */ >> add r8,r8,r5 >> lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) >> diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S >> index 6984125b9fc0..d480d2d4a3fe 100644 >> --- a/arch/powerpc/kernel/vdso32/datapage.S >> +++ b/arch/powerpc/kernel/vdso32/datapage.S >> @@ -11,34 +11,13 @@ >> #include <asm/unistd.h> >> #include <asm/vdso.h> >> >> +#include "datapage.h" >> + >> .text >> .global __kernel_datapage_offset; >> __kernel_datapage_offset: >> .long 0 >> >> -V_FUNCTION_BEGIN(__get_datapage) >> - .cfi_startproc >> - /* We don't want that exposed or overridable as we want other objects >> - * to be able to bl directly to here >> - */ >> - .protected __get_datapage >> - .hidden __get_datapage >> - >> - mflr r0 >> - .cfi_register lr,r0 >> - >> - bcl 20,31,data_page_branch >> -data_page_branch: >> - mflr r3 >> - mtlr r0 >> - addi r3, r3, __kernel_datapage_offset-data_page_branch >> - lwz r0,0(r3) >> - .cfi_restore lr >> - add r3,r0,r3 >> - blr >> - .cfi_endproc >> -V_FUNCTION_END(__get_datapage) >> - >> /* >> * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; >> * >> @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) >> mflr r12 >> .cfi_register lr,r12 >> mr r4,r3 >> - bl __get_datapage@local >> + get_datapage r3, r0 >> mtlr r12 >> addi r3,r3,CFG_SYSCALL_MAP32 >> cmpli cr0,r4,0 >> @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) >> .cfi_startproc >> mflr r12 >> .cfi_register lr,r12 >> - bl __get_datapage@local >> + get_datapage r3, r0 >> lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) >> lwz r3,CFG_TB_TICKS_PER_SEC(r3) >> mtlr r12 >> diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h >> new file mode 100644 >> index 000000000000..74f4f57c2da8 >> --- /dev/null >> +++ b/arch/powerpc/kernel/vdso32/datapage.h >> @@ -0,0 +1,11 @@ >> +/* SPDX-License-Identifier: GPL-2.0-or-later */ >> + >> +.macro get_datapage ptr, tmp >> + bcl 20,31,.+4 >> + mflr \ptr >> + addi \ptr, \ptr, __kernel_datapage_offset - (.-4) >> + lwz \tmp, 0(\ptr) >> + add \ptr, \tmp, \ptr >> +.endm >> + >> + >> diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S >> index 355b537d327a..3e55cba19f44 100644 >> --- a/arch/powerpc/kernel/vdso32/gettimeofday.S >> +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S >> @@ -12,6 +12,8 @@ >> #include <asm/asm-offsets.h> >> #include <asm/unistd.h> >> >> +#include "datapage.h" >> + >> /* Offset for the low 32-bit part of a field of long type */ >> #ifdef CONFIG_PPC64 >> #define LOPART 4 >> @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) >> >> mr r10,r3 /* r10 saves tv */ >> mr r11,r4 /* r11 saves tz */ >> - bl __get_datapage@local /* get data page */ >> - mr r9, r3 /* datapage ptr in r9 */ >> + get_datapage r9, r0 >> cmplwi r10,0 /* check if tv is NULL */ >> beq 3f >> lis r7,1000000@ha /* load up USEC_PER_SEC */ >> @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) >> mflr r12 /* r12 saves lr */ >> .cfi_register lr,r12 >> mr r11,r4 /* r11 saves tp */ >> - bl __get_datapage@local /* get data page */ >> - mr r9,r3 /* datapage ptr in r9 */ >> + get_datapage r9, r0 >> lis r7,NSEC_PER_SEC@h /* want nanoseconds */ >> ori r7,r7,NSEC_PER_SEC@l >> beq cr5, .Lcoarse_clocks >> @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) >> >> mflr r12 >> .cfi_register lr,r12 >> - bl __get_datapage@local /* get data page */ >> + get_datapage r3, r0 >> lwz r5, CLOCK_HRTIMER_RES(r3) >> mtlr r12 >> li r3,0 >> @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time) >> .cfi_register lr,r12 >> >> mr r11,r3 /* r11 holds t */ >> - bl __get_datapage@local >> - mr r9, r3 /* datapage ptr in r9 */ >> + get_datapage r9, r0 >> >> lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9) >> >> -- >> 2.13.3
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso32/cacheflush.S index 7f882e7b9f43..e9453837e4ee 100644 --- a/arch/powerpc/kernel/vdso32/cacheflush.S +++ b/arch/powerpc/kernel/vdso32/cacheflush.S @@ -10,6 +10,8 @@ #include <asm/vdso.h> #include <asm/asm-offsets.h> +#include "datapage.h" + .text /* @@ -24,14 +26,12 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) .cfi_startproc mflr r12 .cfi_register lr,r12 - mr r11,r3 - bl __get_datapage@local + get_datapage r10, r0 mtlr r12 - mr r10,r3 lwz r7,CFG_DCACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) @@ -48,7 +48,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) lwz r7,CFG_ICACHE_BLOCKSZ(r10) addi r5,r7,-1 - andc r6,r11,r5 /* round low to line bdy */ + andc r6,r3,r5 /* round low to line bdy */ subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 6984125b9fc0..d480d2d4a3fe 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -11,34 +11,13 @@ #include <asm/unistd.h> #include <asm/vdso.h> +#include "datapage.h" + .text .global __kernel_datapage_offset; __kernel_datapage_offset: .long 0 -V_FUNCTION_BEGIN(__get_datapage) - .cfi_startproc - /* We don't want that exposed or overridable as we want other objects - * to be able to bl directly to here - */ - .protected __get_datapage - .hidden __get_datapage - - mflr r0 - .cfi_register lr,r0 - - bcl 20,31,data_page_branch -data_page_branch: - mflr r3 - mtlr r0 - addi r3, r3, __kernel_datapage_offset-data_page_branch - lwz r0,0(r3) - .cfi_restore lr - add r3,r0,r3 - blr - .cfi_endproc -V_FUNCTION_END(__get_datapage) - /* * void *__kernel_get_syscall_map(unsigned int *syscall_count) ; * @@ -53,7 +32,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr r4,r3 - bl __get_datapage@local + get_datapage r3, r0 mtlr r12 addi r3,r3,CFG_SYSCALL_MAP32 cmpli cr0,r4,0 @@ -74,7 +53,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - bl __get_datapage@local + get_datapage r3, r0 lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) lwz r3,CFG_TB_TICKS_PER_SEC(r3) mtlr r12 diff --git a/arch/powerpc/kernel/vdso32/datapage.h b/arch/powerpc/kernel/vdso32/datapage.h new file mode 100644 index 000000000000..74f4f57c2da8 --- /dev/null +++ b/arch/powerpc/kernel/vdso32/datapage.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +.macro get_datapage ptr, tmp + bcl 20,31,.+4 + mflr \ptr + addi \ptr, \ptr, __kernel_datapage_offset - (.-4) + lwz \tmp, 0(\ptr) + add \ptr, \tmp, \ptr +.endm + + diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 355b537d327a..3e55cba19f44 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -12,6 +12,8 @@ #include <asm/asm-offsets.h> #include <asm/unistd.h> +#include "datapage.h" + /* Offset for the low 32-bit part of a field of long type */ #ifdef CONFIG_PPC64 #define LOPART 4 @@ -35,8 +37,7 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday) mr r10,r3 /* r10 saves tv */ mr r11,r4 /* r11 saves tz */ - bl __get_datapage@local /* get data page */ - mr r9, r3 /* datapage ptr in r9 */ + get_datapage r9, r0 cmplwi r10,0 /* check if tv is NULL */ beq 3f lis r7,1000000@ha /* load up USEC_PER_SEC */ @@ -82,8 +83,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) mflr r12 /* r12 saves lr */ .cfi_register lr,r12 mr r11,r4 /* r11 saves tp */ - bl __get_datapage@local /* get data page */ - mr r9,r3 /* datapage ptr in r9 */ + get_datapage r9, r0 lis r7,NSEC_PER_SEC@h /* want nanoseconds */ ori r7,r7,NSEC_PER_SEC@l beq cr5, .Lcoarse_clocks @@ -208,7 +208,7 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) mflr r12 .cfi_register lr,r12 - bl __get_datapage@local /* get data page */ + get_datapage r3, r0 lwz r5, CLOCK_HRTIMER_RES(r3) mtlr r12 li r3,0 @@ -242,8 +242,7 @@ V_FUNCTION_BEGIN(__kernel_time) .cfi_register lr,r12 mr r11,r3 /* r11 holds t */ - bl __get_datapage@local - mr r9, r3 /* datapage ptr in r9 */ + get_datapage r9, r0 lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
__get_datapage() is only a few instructions to retrieve the address of the page where the kernel stores data to the VDSO. By inlining this function into its users, a bl/blr pair and a mflr/mtlr pair is avoided, plus a few reg moves. The improvement is noticeable (about 55 nsec/call on an 8xx) vdsotest before the patch: gettimeofday: vdso: 731 nsec/call clock-gettime-realtime-coarse: vdso: 668 nsec/call clock-gettime-monotonic-coarse: vdso: 745 nsec/call vdsotest after the patch: gettimeofday: vdso: 677 nsec/call clock-gettime-realtime-coarse: vdso: 613 nsec/call clock-gettime-monotonic-coarse: vdso: 690 nsec/call Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> --- arch/powerpc/kernel/vdso32/cacheflush.S | 10 +++++----- arch/powerpc/kernel/vdso32/datapage.S | 29 ++++------------------------- arch/powerpc/kernel/vdso32/datapage.h | 11 +++++++++++ arch/powerpc/kernel/vdso32/gettimeofday.S | 13 ++++++------- 4 files changed, 26 insertions(+), 37 deletions(-) create mode 100644 arch/powerpc/kernel/vdso32/datapage.h