Message ID | 20220526144214.308921-1-wxjstz@126.com |
---|---|
State | Deferred |
Headers | show |
Series | [v4] lib: sbi: Improve csr read and write | expand |
On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: > > Reduce the overhead of csr read and write code by producing a small > piece of code to perform csr read and write. > > Signed-off-by: Xiang W <wxjstz@126.com> > --- > Changes in v4: > - add fence.i sync instruction memory > > Changes in v3: > - Prevent unnecessary optimizations by the compiler > > Changes in v2: > - Fix thread safety related bugs as suggested by Anup > > lib/sbi/riscv_asm.c | 145 ++++---------------------------------------- > 1 file changed, 13 insertions(+), 132 deletions(-) > > diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c > index a09cf78..452cb19 100644 > --- a/lib/sbi/riscv_asm.c > +++ b/lib/sbi/riscv_asm.c > @@ -9,6 +9,7 @@ > > #include <sbi/riscv_asm.h> > #include <sbi/riscv_encoding.h> > +#include <sbi/riscv_barrier.h> > #include <sbi/sbi_error.h> > #include <sbi/sbi_platform.h> > #include <sbi/sbi_console.h> > @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz) > > unsigned long csr_read_num(int csr_num) > { > -#define switchcase_csr_read(__csr_num, __val) \ > - case __csr_num: \ > - __val = csr_read(__csr_num); \ > - break; > -#define switchcase_csr_read_2(__csr_num, __val) \ > - switchcase_csr_read(__csr_num + 0, __val) \ > - switchcase_csr_read(__csr_num + 1, __val) > -#define switchcase_csr_read_4(__csr_num, __val) \ > - switchcase_csr_read_2(__csr_num + 0, __val) \ > - switchcase_csr_read_2(__csr_num + 2, __val) > -#define switchcase_csr_read_8(__csr_num, __val) \ > - switchcase_csr_read_4(__csr_num + 0, __val) \ > - switchcase_csr_read_4(__csr_num + 4, __val) > -#define switchcase_csr_read_16(__csr_num, __val) \ > - switchcase_csr_read_8(__csr_num + 0, __val) \ > - switchcase_csr_read_8(__csr_num + 8, __val) > -#define switchcase_csr_read_32(__csr_num, __val) \ > - switchcase_csr_read_16(__csr_num + 0, __val) \ > - switchcase_csr_read_16(__csr_num + 16, __val) > -#define switchcase_csr_read_64(__csr_num, __val) \ > - switchcase_csr_read_32(__csr_num + 0, __val) \ > - switchcase_csr_read_32(__csr_num + 32, __val) > - > - unsigned long ret = 0; > - > - switch (csr_num) { > - switchcase_csr_read_16(CSR_PMPCFG0, ret) > - switchcase_csr_read_64(CSR_PMPADDR0, ret) > - switchcase_csr_read(CSR_MCYCLE, ret) > - switchcase_csr_read(CSR_MINSTRET, ret) > - switchcase_csr_read(CSR_MHPMCOUNTER3, ret) > - switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret) > - switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret) > - switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret) > - switchcase_csr_read(CSR_MCOUNTINHIBIT, ret) > - switchcase_csr_read(CSR_MHPMEVENT3, ret) > - switchcase_csr_read_4(CSR_MHPMEVENT4, ret) > - switchcase_csr_read_8(CSR_MHPMEVENT8, ret) > - switchcase_csr_read_16(CSR_MHPMEVENT16, ret) > -#if __riscv_xlen == 32 > - switchcase_csr_read(CSR_MCYCLEH, ret) > - switchcase_csr_read(CSR_MINSTRETH, ret) > - switchcase_csr_read(CSR_MHPMCOUNTER3H, ret) > - switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret) > - switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret) > - switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret) > - /** > - * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf > - * extension is present. The caller must ensure that. > - */ > - switchcase_csr_read(CSR_MHPMEVENT3H, ret) > - switchcase_csr_read_4(CSR_MHPMEVENT4H, ret) > - switchcase_csr_read_8(CSR_MHPMEVENT8H, ret) > - switchcase_csr_read_16(CSR_MHPMEVENT16H, ret) > -#endif > - > - default: > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > - break; > - }; > - > - return ret; > - > -#undef switchcase_csr_read_64 > -#undef switchcase_csr_read_32 > -#undef switchcase_csr_read_16 > -#undef switchcase_csr_read_8 > -#undef switchcase_csr_read_4 > -#undef switchcase_csr_read_2 > -#undef switchcase_csr_read > + volatile uint32_t opcode_buff[2]; > + typedef unsigned long (*read_f)(void); > + opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */ > + opcode_buff[1] = 0x00008067; /* ret */ > + RISCV_FENCE_I; > + return ((read_f)opcode_buff)(); > } > > void csr_write_num(int csr_num, unsigned long val) > { > -#define switchcase_csr_write(__csr_num, __val) \ > - case __csr_num: \ > - csr_write(__csr_num, __val); \ > - break; > -#define switchcase_csr_write_2(__csr_num, __val) \ > - switchcase_csr_write(__csr_num + 0, __val) \ > - switchcase_csr_write(__csr_num + 1, __val) > -#define switchcase_csr_write_4(__csr_num, __val) \ > - switchcase_csr_write_2(__csr_num + 0, __val) \ > - switchcase_csr_write_2(__csr_num + 2, __val) > -#define switchcase_csr_write_8(__csr_num, __val) \ > - switchcase_csr_write_4(__csr_num + 0, __val) \ > - switchcase_csr_write_4(__csr_num + 4, __val) > -#define switchcase_csr_write_16(__csr_num, __val) \ > - switchcase_csr_write_8(__csr_num + 0, __val) \ > - switchcase_csr_write_8(__csr_num + 8, __val) > -#define switchcase_csr_write_32(__csr_num, __val) \ > - switchcase_csr_write_16(__csr_num + 0, __val) \ > - switchcase_csr_write_16(__csr_num + 16, __val) > -#define switchcase_csr_write_64(__csr_num, __val) \ > - switchcase_csr_write_32(__csr_num + 0, __val) \ > - switchcase_csr_write_32(__csr_num + 32, __val) > - > - switch (csr_num) { > - switchcase_csr_write_16(CSR_PMPCFG0, val) > - switchcase_csr_write_64(CSR_PMPADDR0, val) > - switchcase_csr_write(CSR_MCYCLE, val) > - switchcase_csr_write(CSR_MINSTRET, val) > - switchcase_csr_write(CSR_MHPMCOUNTER3, val) > - switchcase_csr_write_4(CSR_MHPMCOUNTER4, val) > - switchcase_csr_write_8(CSR_MHPMCOUNTER8, val) > - switchcase_csr_write_16(CSR_MHPMCOUNTER16, val) > -#if __riscv_xlen == 32 > - switchcase_csr_write(CSR_MCYCLEH, val) > - switchcase_csr_write(CSR_MINSTRETH, val) > - switchcase_csr_write(CSR_MHPMCOUNTER3H, val) > - switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val) > - switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val) > - switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val) > - switchcase_csr_write(CSR_MHPMEVENT3H, val) > - switchcase_csr_write_4(CSR_MHPMEVENT4H, val) > - switchcase_csr_write_8(CSR_MHPMEVENT8H, val) > - switchcase_csr_write_16(CSR_MHPMEVENT16H, val) > -#endif > - switchcase_csr_write(CSR_MCOUNTINHIBIT, val) > - switchcase_csr_write(CSR_MHPMEVENT3, val) > - switchcase_csr_write_4(CSR_MHPMEVENT4, val) > - switchcase_csr_write_8(CSR_MHPMEVENT8, val) > - switchcase_csr_write_16(CSR_MHPMEVENT16, val) > - > - default: > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > - break; > - }; > - > -#undef switchcase_csr_write_64 > -#undef switchcase_csr_write_32 > -#undef switchcase_csr_write_16 > -#undef switchcase_csr_write_8 > -#undef switchcase_csr_write_4 > -#undef switchcase_csr_write_2 > -#undef switchcase_csr_write > + volatile uint32_t opcode_buff[2]; > + typedef void (*write_f)(unsigned long val); > + opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */ > + opcode_buff[1] = 0x00008067; /* ret */ > + RISCV_FENCE_I; I agree we need FENCE.I in the path but this is like a big hammer in the execution path. This is now much slower compared to the existing approach because every CSR access using this function will execute FENCE.I Regards, Anup > + ((write_f)opcode_buff)(val); > } > > static unsigned long ctz(unsigned long x) > -- > 2.30.2 > > > -- > opensbi mailing list > opensbi@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/opensbi
在 2022-05-26星期四的 20:35 +0530,Anup Patel写道: > On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: > > > > Reduce the overhead of csr read and write code by producing a small > > piece of code to perform csr read and write. > > > > Signed-off-by: Xiang W <wxjstz@126.com> > > --- > > Changes in v4: > > - add fence.i sync instruction memory > > > > Changes in v3: > > - Prevent unnecessary optimizations by the compiler > > > > Changes in v2: > > - Fix thread safety related bugs as suggested by Anup > > > > lib/sbi/riscv_asm.c | 145 ++++---------------------------------------- > > 1 file changed, 13 insertions(+), 132 deletions(-) > > > > diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c > > index a09cf78..452cb19 100644 > > --- a/lib/sbi/riscv_asm.c > > +++ b/lib/sbi/riscv_asm.c > > @@ -9,6 +9,7 @@ > > > > #include <sbi/riscv_asm.h> > > #include <sbi/riscv_encoding.h> > > +#include <sbi/riscv_barrier.h> > > #include <sbi/sbi_error.h> > > #include <sbi/sbi_platform.h> > > #include <sbi/sbi_console.h> > > @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz) > > > > unsigned long csr_read_num(int csr_num) > > { > > -#define switchcase_csr_read(__csr_num, __val) \ > > - case __csr_num: \ > > - __val = csr_read(__csr_num); \ > > - break; > > -#define switchcase_csr_read_2(__csr_num, __val) \ > > - switchcase_csr_read(__csr_num + 0, __val) \ > > - switchcase_csr_read(__csr_num + 1, __val) > > -#define switchcase_csr_read_4(__csr_num, __val) \ > > - switchcase_csr_read_2(__csr_num + 0, __val) \ > > - switchcase_csr_read_2(__csr_num + 2, __val) > > -#define switchcase_csr_read_8(__csr_num, __val) \ > > - switchcase_csr_read_4(__csr_num + 0, __val) \ > > - switchcase_csr_read_4(__csr_num + 4, __val) > > -#define switchcase_csr_read_16(__csr_num, __val) \ > > - switchcase_csr_read_8(__csr_num + 0, __val) \ > > - switchcase_csr_read_8(__csr_num + 8, __val) > > -#define switchcase_csr_read_32(__csr_num, __val) \ > > - switchcase_csr_read_16(__csr_num + 0, __val) \ > > - switchcase_csr_read_16(__csr_num + 16, __val) > > -#define switchcase_csr_read_64(__csr_num, __val) \ > > - switchcase_csr_read_32(__csr_num + 0, __val) \ > > - switchcase_csr_read_32(__csr_num + 32, __val) > > - > > - unsigned long ret = 0; > > - > > - switch (csr_num) { > > - switchcase_csr_read_16(CSR_PMPCFG0, ret) > > - switchcase_csr_read_64(CSR_PMPADDR0, ret) > > - switchcase_csr_read(CSR_MCYCLE, ret) > > - switchcase_csr_read(CSR_MINSTRET, ret) > > - switchcase_csr_read(CSR_MHPMCOUNTER3, ret) > > - switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret) > > - switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret) > > - switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret) > > - switchcase_csr_read(CSR_MCOUNTINHIBIT, ret) > > - switchcase_csr_read(CSR_MHPMEVENT3, ret) > > - switchcase_csr_read_4(CSR_MHPMEVENT4, ret) > > - switchcase_csr_read_8(CSR_MHPMEVENT8, ret) > > - switchcase_csr_read_16(CSR_MHPMEVENT16, ret) > > -#if __riscv_xlen == 32 > > - switchcase_csr_read(CSR_MCYCLEH, ret) > > - switchcase_csr_read(CSR_MINSTRETH, ret) > > - switchcase_csr_read(CSR_MHPMCOUNTER3H, ret) > > - switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret) > > - switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret) > > - switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret) > > - /** > > - * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf > > - * extension is present. The caller must ensure that. > > - */ > > - switchcase_csr_read(CSR_MHPMEVENT3H, ret) > > - switchcase_csr_read_4(CSR_MHPMEVENT4H, ret) > > - switchcase_csr_read_8(CSR_MHPMEVENT8H, ret) > > - switchcase_csr_read_16(CSR_MHPMEVENT16H, ret) > > -#endif > > - > > - default: > > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > > - break; > > - }; > > - > > - return ret; > > - > > -#undef switchcase_csr_read_64 > > -#undef switchcase_csr_read_32 > > -#undef switchcase_csr_read_16 > > -#undef switchcase_csr_read_8 > > -#undef switchcase_csr_read_4 > > -#undef switchcase_csr_read_2 > > -#undef switchcase_csr_read > > + volatile uint32_t opcode_buff[2]; > > + typedef unsigned long (*read_f)(void); > > + opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */ > > + opcode_buff[1] = 0x00008067; /* ret */ > > + RISCV_FENCE_I; > > + return ((read_f)opcode_buff)(); > > } > > > > void csr_write_num(int csr_num, unsigned long val) > > { > > -#define switchcase_csr_write(__csr_num, __val) \ > > - case __csr_num: \ > > - csr_write(__csr_num, __val); \ > > - break; > > -#define switchcase_csr_write_2(__csr_num, __val) \ > > - switchcase_csr_write(__csr_num + 0, __val) \ > > - switchcase_csr_write(__csr_num + 1, __val) > > -#define switchcase_csr_write_4(__csr_num, __val) \ > > - switchcase_csr_write_2(__csr_num + 0, __val) \ > > - switchcase_csr_write_2(__csr_num + 2, __val) > > -#define switchcase_csr_write_8(__csr_num, __val) \ > > - switchcase_csr_write_4(__csr_num + 0, __val) \ > > - switchcase_csr_write_4(__csr_num + 4, __val) > > -#define switchcase_csr_write_16(__csr_num, __val) \ > > - switchcase_csr_write_8(__csr_num + 0, __val) \ > > - switchcase_csr_write_8(__csr_num + 8, __val) > > -#define switchcase_csr_write_32(__csr_num, __val) \ > > - switchcase_csr_write_16(__csr_num + 0, __val) \ > > - switchcase_csr_write_16(__csr_num + 16, __val) > > -#define switchcase_csr_write_64(__csr_num, __val) \ > > - switchcase_csr_write_32(__csr_num + 0, __val) \ > > - switchcase_csr_write_32(__csr_num + 32, __val) > > - > > - switch (csr_num) { > > - switchcase_csr_write_16(CSR_PMPCFG0, val) > > - switchcase_csr_write_64(CSR_PMPADDR0, val) > > - switchcase_csr_write(CSR_MCYCLE, val) > > - switchcase_csr_write(CSR_MINSTRET, val) > > - switchcase_csr_write(CSR_MHPMCOUNTER3, val) > > - switchcase_csr_write_4(CSR_MHPMCOUNTER4, val) > > - switchcase_csr_write_8(CSR_MHPMCOUNTER8, val) > > - switchcase_csr_write_16(CSR_MHPMCOUNTER16, val) > > -#if __riscv_xlen == 32 > > - switchcase_csr_write(CSR_MCYCLEH, val) > > - switchcase_csr_write(CSR_MINSTRETH, val) > > - switchcase_csr_write(CSR_MHPMCOUNTER3H, val) > > - switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val) > > - switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val) > > - switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val) > > - switchcase_csr_write(CSR_MHPMEVENT3H, val) > > - switchcase_csr_write_4(CSR_MHPMEVENT4H, val) > > - switchcase_csr_write_8(CSR_MHPMEVENT8H, val) > > - switchcase_csr_write_16(CSR_MHPMEVENT16H, val) > > -#endif > > - switchcase_csr_write(CSR_MCOUNTINHIBIT, val) > > - switchcase_csr_write(CSR_MHPMEVENT3, val) > > - switchcase_csr_write_4(CSR_MHPMEVENT4, val) > > - switchcase_csr_write_8(CSR_MHPMEVENT8, val) > > - switchcase_csr_write_16(CSR_MHPMEVENT16, val) > > - > > - default: > > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > > - break; > > - }; > > - > > -#undef switchcase_csr_write_64 > > -#undef switchcase_csr_write_32 > > -#undef switchcase_csr_write_16 > > -#undef switchcase_csr_write_8 > > -#undef switchcase_csr_write_4 > > -#undef switchcase_csr_write_2 > > -#undef switchcase_csr_write > > + volatile uint32_t opcode_buff[2]; > > + typedef void (*write_f)(unsigned long val); > > + opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */ > > + opcode_buff[1] = 0x00008067; /* ret */ > > + RISCV_FENCE_I; > > I agree we need FENCE.I in the path but this is > like a big hammer in the execution path. > > This is now much slower compared to the existing > approach because every CSR access using this > function will execute FENCE.I I passed the qemu test, and it can be run normally with or without fence.i. But I'm not sure if the exception happens on other platform or we have a more lightweight way. Regards, Xiang W > > Regards, > Anup > > > + ((write_f)opcode_buff)(val); > > } > > > > static unsigned long ctz(unsigned long x) > > -- > > 2.30.2 > > > > > > -- > > opensbi mailing list > > opensbi@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/opensbi
On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote: > > 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道: >> On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: >>> >>> Reduce the overhead of csr read and write code by producing a small >>> piece of code to perform csr read and write. >>> >>> Signed-off-by: Xiang W <wxjstz@126.com> >>> --- >>> Changes in v4: >>> - add fence.i sync instruction memory >>> >>> Changes in v3: >>> - Prevent unnecessary optimizations by the compiler >>> >>> Changes in v2: >>> - Fix thread safety related bugs as suggested by Anup >>> >>> lib/sbi/riscv_asm.c | 145 ++++---------------------------------------- >>> 1 file changed, 13 insertions(+), 132 deletions(-) >>> >>> diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c >>> index a09cf78..452cb19 100644 >>> --- a/lib/sbi/riscv_asm.c >>> +++ b/lib/sbi/riscv_asm.c >>> @@ -9,6 +9,7 @@ >>> >>> #include <sbi/riscv_asm.h> >>> #include <sbi/riscv_encoding.h> >>> +#include <sbi/riscv_barrier.h> >>> #include <sbi/sbi_error.h> >>> #include <sbi/sbi_platform.h> >>> #include <sbi/sbi_console.h> >>> @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz) >>> >>> unsigned long csr_read_num(int csr_num) >>> { >>> -#define switchcase_csr_read(__csr_num, __val) \ >>> - case __csr_num: \ >>> - __val = csr_read(__csr_num); \ >>> - break; >>> -#define switchcase_csr_read_2(__csr_num, __val) \ >>> - switchcase_csr_read(__csr_num + 0, __val) \ >>> - switchcase_csr_read(__csr_num + 1, __val) >>> -#define switchcase_csr_read_4(__csr_num, __val) \ >>> - switchcase_csr_read_2(__csr_num + 0, __val) \ >>> - switchcase_csr_read_2(__csr_num + 2, __val) >>> -#define switchcase_csr_read_8(__csr_num, __val) \ >>> - switchcase_csr_read_4(__csr_num + 0, __val) \ >>> - switchcase_csr_read_4(__csr_num + 4, __val) >>> -#define switchcase_csr_read_16(__csr_num, __val) \ >>> - switchcase_csr_read_8(__csr_num + 0, __val) \ >>> - switchcase_csr_read_8(__csr_num + 8, __val) >>> -#define switchcase_csr_read_32(__csr_num, __val) \ >>> - switchcase_csr_read_16(__csr_num + 0, __val) \ >>> - switchcase_csr_read_16(__csr_num + 16, __val) >>> -#define switchcase_csr_read_64(__csr_num, __val) \ >>> - switchcase_csr_read_32(__csr_num + 0, __val) \ >>> - switchcase_csr_read_32(__csr_num + 32, __val) >>> - >>> - unsigned long ret = 0; >>> - >>> - switch (csr_num) { >>> - switchcase_csr_read_16(CSR_PMPCFG0, ret) >>> - switchcase_csr_read_64(CSR_PMPADDR0, ret) >>> - switchcase_csr_read(CSR_MCYCLE, ret) >>> - switchcase_csr_read(CSR_MINSTRET, ret) >>> - switchcase_csr_read(CSR_MHPMCOUNTER3, ret) >>> - switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret) >>> - switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret) >>> - switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret) >>> - switchcase_csr_read(CSR_MCOUNTINHIBIT, ret) >>> - switchcase_csr_read(CSR_MHPMEVENT3, ret) >>> - switchcase_csr_read_4(CSR_MHPMEVENT4, ret) >>> - switchcase_csr_read_8(CSR_MHPMEVENT8, ret) >>> - switchcase_csr_read_16(CSR_MHPMEVENT16, ret) >>> -#if __riscv_xlen == 32 >>> - switchcase_csr_read(CSR_MCYCLEH, ret) >>> - switchcase_csr_read(CSR_MINSTRETH, ret) >>> - switchcase_csr_read(CSR_MHPMCOUNTER3H, ret) >>> - switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret) >>> - switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret) >>> - switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret) >>> - /** >>> - * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf >>> - * extension is present. The caller must ensure that. >>> - */ >>> - switchcase_csr_read(CSR_MHPMEVENT3H, ret) >>> - switchcase_csr_read_4(CSR_MHPMEVENT4H, ret) >>> - switchcase_csr_read_8(CSR_MHPMEVENT8H, ret) >>> - switchcase_csr_read_16(CSR_MHPMEVENT16H, ret) >>> -#endif >>> - >>> - default: >>> - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); >>> - break; >>> - }; >>> - >>> - return ret; >>> - >>> -#undef switchcase_csr_read_64 >>> -#undef switchcase_csr_read_32 >>> -#undef switchcase_csr_read_16 >>> -#undef switchcase_csr_read_8 >>> -#undef switchcase_csr_read_4 >>> -#undef switchcase_csr_read_2 >>> -#undef switchcase_csr_read >>> + volatile uint32_t opcode_buff[2]; >>> + typedef unsigned long (*read_f)(void); >>> + opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */ >>> + opcode_buff[1] = 0x00008067; /* ret */ >>> + RISCV_FENCE_I; >>> + return ((read_f)opcode_buff)(); >>> } >>> >>> void csr_write_num(int csr_num, unsigned long val) >>> { >>> -#define switchcase_csr_write(__csr_num, __val) \ >>> - case __csr_num: \ >>> - csr_write(__csr_num, __val); \ >>> - break; >>> -#define switchcase_csr_write_2(__csr_num, __val) \ >>> - switchcase_csr_write(__csr_num + 0, __val) \ >>> - switchcase_csr_write(__csr_num + 1, __val) >>> -#define switchcase_csr_write_4(__csr_num, __val) \ >>> - switchcase_csr_write_2(__csr_num + 0, __val) \ >>> - switchcase_csr_write_2(__csr_num + 2, __val) >>> -#define switchcase_csr_write_8(__csr_num, __val) \ >>> - switchcase_csr_write_4(__csr_num + 0, __val) \ >>> - switchcase_csr_write_4(__csr_num + 4, __val) >>> -#define switchcase_csr_write_16(__csr_num, __val) \ >>> - switchcase_csr_write_8(__csr_num + 0, __val) \ >>> - switchcase_csr_write_8(__csr_num + 8, __val) >>> -#define switchcase_csr_write_32(__csr_num, __val) \ >>> - switchcase_csr_write_16(__csr_num + 0, __val) \ >>> - switchcase_csr_write_16(__csr_num + 16, __val) >>> -#define switchcase_csr_write_64(__csr_num, __val) \ >>> - switchcase_csr_write_32(__csr_num + 0, __val) \ >>> - switchcase_csr_write_32(__csr_num + 32, __val) >>> - >>> - switch (csr_num) { >>> - switchcase_csr_write_16(CSR_PMPCFG0, val) >>> - switchcase_csr_write_64(CSR_PMPADDR0, val) >>> - switchcase_csr_write(CSR_MCYCLE, val) >>> - switchcase_csr_write(CSR_MINSTRET, val) >>> - switchcase_csr_write(CSR_MHPMCOUNTER3, val) >>> - switchcase_csr_write_4(CSR_MHPMCOUNTER4, val) >>> - switchcase_csr_write_8(CSR_MHPMCOUNTER8, val) >>> - switchcase_csr_write_16(CSR_MHPMCOUNTER16, val) >>> -#if __riscv_xlen == 32 >>> - switchcase_csr_write(CSR_MCYCLEH, val) >>> - switchcase_csr_write(CSR_MINSTRETH, val) >>> - switchcase_csr_write(CSR_MHPMCOUNTER3H, val) >>> - switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val) >>> - switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val) >>> - switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val) >>> - switchcase_csr_write(CSR_MHPMEVENT3H, val) >>> - switchcase_csr_write_4(CSR_MHPMEVENT4H, val) >>> - switchcase_csr_write_8(CSR_MHPMEVENT8H, val) >>> - switchcase_csr_write_16(CSR_MHPMEVENT16H, val) >>> -#endif >>> - switchcase_csr_write(CSR_MCOUNTINHIBIT, val) >>> - switchcase_csr_write(CSR_MHPMEVENT3, val) >>> - switchcase_csr_write_4(CSR_MHPMEVENT4, val) >>> - switchcase_csr_write_8(CSR_MHPMEVENT8, val) >>> - switchcase_csr_write_16(CSR_MHPMEVENT16, val) >>> - >>> - default: >>> - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); >>> - break; >>> - }; >>> - >>> -#undef switchcase_csr_write_64 >>> -#undef switchcase_csr_write_32 >>> -#undef switchcase_csr_write_16 >>> -#undef switchcase_csr_write_8 >>> -#undef switchcase_csr_write_4 >>> -#undef switchcase_csr_write_2 >>> -#undef switchcase_csr_write >>> + volatile uint32_t opcode_buff[2]; >>> + typedef void (*write_f)(unsigned long val); >>> + opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */ >>> + opcode_buff[1] = 0x00008067; /* ret */ >>> + RISCV_FENCE_I; >> >> I agree we need FENCE.I in the path but this is >> like a big hammer in the execution path. >> >> This is now much slower compared to the existing >> approach because every CSR access using this >> function will execute FENCE.I > I passed the qemu test, and it can be run normally with > or without fence.i. But I'm not sure if the exception > happens on other platform or we have a more lightweight > way. There is not. Optimising this kind of bad practice code is not a goal of modern architectures. After all, it totally precludes having W^X protection in OpenSBI (e.g. via the PMP, or via CHERI), especially executable stacks form a key part of *the* classic buffer overflow shell code attack. Jess
在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道: > On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote: > > > > 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道: > > > On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: > > > > > > > > Reduce the overhead of csr read and write code by producing a small > > > > piece of code to perform csr read and write. > > > > > > > > Signed-off-by: Xiang W <wxjstz@126.com> > > > > --- > > > > Changes in v4: > > > > - add fence.i sync instruction memory > > > > > > > > Changes in v3: > > > > - Prevent unnecessary optimizations by the compiler > > > > > > > > Changes in v2: > > > > - Fix thread safety related bugs as suggested by Anup > > > > > > > > lib/sbi/riscv_asm.c | 145 ++++---------------------------------------- > > > > 1 file changed, 13 insertions(+), 132 deletions(-) > > > > > > > > diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c > > > > index a09cf78..452cb19 100644 > > > > --- a/lib/sbi/riscv_asm.c > > > > +++ b/lib/sbi/riscv_asm.c > > > > @@ -9,6 +9,7 @@ > > > > > > > > #include <sbi/riscv_asm.h> > > > > #include <sbi/riscv_encoding.h> > > > > +#include <sbi/riscv_barrier.h> > > > > #include <sbi/sbi_error.h> > > > > #include <sbi/sbi_platform.h> > > > > #include <sbi/sbi_console.h> > > > > @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz) > > > > > > > > unsigned long csr_read_num(int csr_num) > > > > { > > > > -#define switchcase_csr_read(__csr_num, __val) \ > > > > - case __csr_num: \ > > > > - __val = csr_read(__csr_num); \ > > > > - break; > > > > -#define switchcase_csr_read_2(__csr_num, __val) \ > > > > - switchcase_csr_read(__csr_num + 0, __val) \ > > > > - switchcase_csr_read(__csr_num + 1, __val) > > > > -#define switchcase_csr_read_4(__csr_num, __val) \ > > > > - switchcase_csr_read_2(__csr_num + 0, __val) \ > > > > - switchcase_csr_read_2(__csr_num + 2, __val) > > > > -#define switchcase_csr_read_8(__csr_num, __val) \ > > > > - switchcase_csr_read_4(__csr_num + 0, __val) \ > > > > - switchcase_csr_read_4(__csr_num + 4, __val) > > > > -#define switchcase_csr_read_16(__csr_num, __val) \ > > > > - switchcase_csr_read_8(__csr_num + 0, __val) \ > > > > - switchcase_csr_read_8(__csr_num + 8, __val) > > > > -#define switchcase_csr_read_32(__csr_num, __val) \ > > > > - switchcase_csr_read_16(__csr_num + 0, __val) \ > > > > - switchcase_csr_read_16(__csr_num + 16, __val) > > > > -#define switchcase_csr_read_64(__csr_num, __val) \ > > > > - switchcase_csr_read_32(__csr_num + 0, __val) \ > > > > - switchcase_csr_read_32(__csr_num + 32, __val) > > > > - > > > > - unsigned long ret = 0; > > > > - > > > > - switch (csr_num) { > > > > - switchcase_csr_read_16(CSR_PMPCFG0, ret) > > > > - switchcase_csr_read_64(CSR_PMPADDR0, ret) > > > > - switchcase_csr_read(CSR_MCYCLE, ret) > > > > - switchcase_csr_read(CSR_MINSTRET, ret) > > > > - switchcase_csr_read(CSR_MHPMCOUNTER3, ret) > > > > - switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret) > > > > - switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret) > > > > - switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret) > > > > - switchcase_csr_read(CSR_MCOUNTINHIBIT, ret) > > > > - switchcase_csr_read(CSR_MHPMEVENT3, ret) > > > > - switchcase_csr_read_4(CSR_MHPMEVENT4, ret) > > > > - switchcase_csr_read_8(CSR_MHPMEVENT8, ret) > > > > - switchcase_csr_read_16(CSR_MHPMEVENT16, ret) > > > > -#if __riscv_xlen == 32 > > > > - switchcase_csr_read(CSR_MCYCLEH, ret) > > > > - switchcase_csr_read(CSR_MINSTRETH, ret) > > > > - switchcase_csr_read(CSR_MHPMCOUNTER3H, ret) > > > > - switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret) > > > > - switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret) > > > > - switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret) > > > > - /** > > > > - * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf > > > > - * extension is present. The caller must ensure that. > > > > - */ > > > > - switchcase_csr_read(CSR_MHPMEVENT3H, ret) > > > > - switchcase_csr_read_4(CSR_MHPMEVENT4H, ret) > > > > - switchcase_csr_read_8(CSR_MHPMEVENT8H, ret) > > > > - switchcase_csr_read_16(CSR_MHPMEVENT16H, ret) > > > > -#endif > > > > - > > > > - default: > > > > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > > > > - break; > > > > - }; > > > > - > > > > - return ret; > > > > - > > > > -#undef switchcase_csr_read_64 > > > > -#undef switchcase_csr_read_32 > > > > -#undef switchcase_csr_read_16 > > > > -#undef switchcase_csr_read_8 > > > > -#undef switchcase_csr_read_4 > > > > -#undef switchcase_csr_read_2 > > > > -#undef switchcase_csr_read > > > > + volatile uint32_t opcode_buff[2]; > > > > + typedef unsigned long (*read_f)(void); > > > > + opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */ > > > > + opcode_buff[1] = 0x00008067; /* ret */ > > > > + RISCV_FENCE_I; > > > > + return ((read_f)opcode_buff)(); > > > > } > > > > > > > > void csr_write_num(int csr_num, unsigned long val) > > > > { > > > > -#define switchcase_csr_write(__csr_num, __val) \ > > > > - case __csr_num: \ > > > > - csr_write(__csr_num, __val); \ > > > > - break; > > > > -#define switchcase_csr_write_2(__csr_num, __val) \ > > > > - switchcase_csr_write(__csr_num + 0, __val) \ > > > > - switchcase_csr_write(__csr_num + 1, __val) > > > > -#define switchcase_csr_write_4(__csr_num, __val) \ > > > > - switchcase_csr_write_2(__csr_num + 0, __val) \ > > > > - switchcase_csr_write_2(__csr_num + 2, __val) > > > > -#define switchcase_csr_write_8(__csr_num, __val) \ > > > > - switchcase_csr_write_4(__csr_num + 0, __val) \ > > > > - switchcase_csr_write_4(__csr_num + 4, __val) > > > > -#define switchcase_csr_write_16(__csr_num, __val) \ > > > > - switchcase_csr_write_8(__csr_num + 0, __val) \ > > > > - switchcase_csr_write_8(__csr_num + 8, __val) > > > > -#define switchcase_csr_write_32(__csr_num, __val) \ > > > > - switchcase_csr_write_16(__csr_num + 0, __val) \ > > > > - switchcase_csr_write_16(__csr_num + 16, __val) > > > > -#define switchcase_csr_write_64(__csr_num, __val) \ > > > > - switchcase_csr_write_32(__csr_num + 0, __val) \ > > > > - switchcase_csr_write_32(__csr_num + 32, __val) > > > > - > > > > - switch (csr_num) { > > > > - switchcase_csr_write_16(CSR_PMPCFG0, val) > > > > - switchcase_csr_write_64(CSR_PMPADDR0, val) > > > > - switchcase_csr_write(CSR_MCYCLE, val) > > > > - switchcase_csr_write(CSR_MINSTRET, val) > > > > - switchcase_csr_write(CSR_MHPMCOUNTER3, val) > > > > - switchcase_csr_write_4(CSR_MHPMCOUNTER4, val) > > > > - switchcase_csr_write_8(CSR_MHPMCOUNTER8, val) > > > > - switchcase_csr_write_16(CSR_MHPMCOUNTER16, val) > > > > -#if __riscv_xlen == 32 > > > > - switchcase_csr_write(CSR_MCYCLEH, val) > > > > - switchcase_csr_write(CSR_MINSTRETH, val) > > > > - switchcase_csr_write(CSR_MHPMCOUNTER3H, val) > > > > - switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val) > > > > - switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val) > > > > - switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val) > > > > - switchcase_csr_write(CSR_MHPMEVENT3H, val) > > > > - switchcase_csr_write_4(CSR_MHPMEVENT4H, val) > > > > - switchcase_csr_write_8(CSR_MHPMEVENT8H, val) > > > > - switchcase_csr_write_16(CSR_MHPMEVENT16H, val) > > > > -#endif > > > > - switchcase_csr_write(CSR_MCOUNTINHIBIT, val) > > > > - switchcase_csr_write(CSR_MHPMEVENT3, val) > > > > - switchcase_csr_write_4(CSR_MHPMEVENT4, val) > > > > - switchcase_csr_write_8(CSR_MHPMEVENT8, val) > > > > - switchcase_csr_write_16(CSR_MHPMEVENT16, val) > > > > - > > > > - default: > > > > - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); > > > > - break; > > > > - }; > > > > - > > > > -#undef switchcase_csr_write_64 > > > > -#undef switchcase_csr_write_32 > > > > -#undef switchcase_csr_write_16 > > > > -#undef switchcase_csr_write_8 > > > > -#undef switchcase_csr_write_4 > > > > -#undef switchcase_csr_write_2 > > > > -#undef switchcase_csr_write > > > > + volatile uint32_t opcode_buff[2]; > > > > + typedef void (*write_f)(unsigned long val); > > > > + opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */ > > > > + opcode_buff[1] = 0x00008067; /* ret */ > > > > + RISCV_FENCE_I; > > > > > > I agree we need FENCE.I in the path but this is > > > like a big hammer in the execution path. > > > > > > This is now much slower compared to the existing > > > approach because every CSR access using this > > > function will execute FENCE.I > > I passed the qemu test, and it can be run normally with > > or without fence.i. But I'm not sure if the exception > > happens on other platform or we have a more lightweight > > way. > > There is not. Optimising this kind of bad practice code is not a goal > of modern architectures. After all, it totally precludes having W^X > protection in OpenSBI (e.g. via the PMP, or via CHERI), especially > executable stacks form a key part of *the* classic buffer overflow > shell code attack. > > Jess Currently opensbi does not have strict RWX permission control. We only need W and X permissions not to exist at the same time to avoid security risks. If opensbi has RWX permission control, we only need to change the permission of opcode_buff before and after running. The stack default is RW permission, we can change the permission of opcode_buff to RX before the call, and change the permission to RW after the call is completed. I think these jobs can be put after opensbi has rwx permission control. Regards, Xiang W > >
On 6/21/22 23:53, Xiang W wrote: > 在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道: >> On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote: >>> 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道: >>>> On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: >>>>> <patch contents snipped> >>>> I agree we need FENCE.I in the path but this is >>>> like a big hammer in the execution path. >>>> >>>> This is now much slower compared to the existing >>>> approach because every CSR access using this >>>> function will execute FENCE.I >>> I passed the qemu test, and it can be run normally with >>> or without fence.i. But I'm not sure if the exception >>> happens on other platform or we have a more lightweight >>> way. >> There is not. Optimising this kind of bad practice code is not a goal >> of modern architectures. After all, it totally precludes having W^X >> protection in OpenSBI (e.g. via the PMP, or via CHERI), especially >> executable stacks form a key part of *the* classic buffer overflow >> shell code attack. >> >> Jess > Currently opensbi does not have strict RWX permission control. We only > need W and X permissions not to exist at the same time to avoid security > risks. If opensbi has RWX permission control, we only need to change the > permission of opcode_buff before and after running. The stack default is > RW permission, we can change the permission of opcode_buff to RX before > the call, and change the permission to RW after the call is completed. > > I think these jobs can be put after opensbi has rwx permission control. > > Regards, > Xiang W But in that case, wouldn't this codegen approach be even slower? Is there any particular reason to believe that this will be faster than the current switch-case? Some benchmark on existing silicon would be nice. The original implementation compiles down a jump table, with several branches and bound checks at the beginning. It really is just a hunch, but I find it difficult to be convinced how generating code could be faster. Optimizing the current switch-case approach into a better jump table seems to be the way to go here. Currently the compiler does not seem to take advantage of the fact that every csr* instruction is exactly 4 bytes. There also seems to be a few extra instructions for dealing with int instead of long, though I am not sure. Furthermore, having separate functions for hpmcounter, pmpcfg, etc. might help with avoiding some branches, and possibly also make code clearer at call site. Regards, dram
在 2022-06-22星期三的 15:12 +0800,dramforever写道: > > On 6/21/22 23:53, Xiang W wrote: > > 在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道: > > > On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote: > > > > 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道: > > > > > On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote: > > > > > > <patch contents snipped> > > > > > I agree we need FENCE.I in the path but this is > > > > > like a big hammer in the execution path. > > > > > > > > > > This is now much slower compared to the existing > > > > > approach because every CSR access using this > > > > > function will execute FENCE.I > > > > I passed the qemu test, and it can be run normally with > > > > or without fence.i. But I'm not sure if the exception > > > > happens on other platform or we have a more lightweight > > > > way. > > > There is not. Optimising this kind of bad practice code is not a goal > > > of modern architectures. After all, it totally precludes having W^X > > > protection in OpenSBI (e.g. via the PMP, or via CHERI), especially > > > executable stacks form a key part of *the* classic buffer overflow > > > shell code attack. > > > > > > Jess > > Currently opensbi does not have strict RWX permission control. We only > > need W and X permissions not to exist at the same time to avoid security > > risks. If opensbi has RWX permission control, we only need to change the > > permission of opcode_buff before and after running. The stack default is > > RW permission, we can change the permission of opcode_buff to RX before > > the call, and change the permission to RW after the call is completed. > > > > I think these jobs can be put after opensbi has rwx permission control. > > > > Regards, > > Xiang W > > But in that case, wouldn't this codegen approach be even slower? Is > there any particular reason to believe that this will be faster than the > current switch-case? Some benchmark on existing silicon would be nice. Yes, it will definitely be slower. This will only reduce the maintenance cost of csr_read_num/csr_write_num, without adding code to read a new csr. Regards, Xiang W > > The original implementation compiles down a jump table, with several > branches and bound checks at the beginning. It really is just a hunch, > but I find it difficult to be convinced how generating code could be faster. > > Optimizing the current switch-case approach into a better jump table > seems to be the way to go here. Currently the compiler does not seem to > take advantage of the fact that every csr* instruction is exactly 4 > bytes. There also seems to be a few extra instructions for dealing with > int instead of long, though I am not sure. Furthermore, having separate > functions for hpmcounter, pmpcfg, etc. might help with avoiding some > branches, and possibly also make code clearer at call site. > > Regards, > dram >
diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c index a09cf78..452cb19 100644 --- a/lib/sbi/riscv_asm.c +++ b/lib/sbi/riscv_asm.c @@ -9,6 +9,7 @@ #include <sbi/riscv_asm.h> #include <sbi/riscv_encoding.h> +#include <sbi/riscv_barrier.h> #include <sbi/sbi_error.h> #include <sbi/sbi_platform.h> #include <sbi/sbi_console.h> @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz) unsigned long csr_read_num(int csr_num) { -#define switchcase_csr_read(__csr_num, __val) \ - case __csr_num: \ - __val = csr_read(__csr_num); \ - break; -#define switchcase_csr_read_2(__csr_num, __val) \ - switchcase_csr_read(__csr_num + 0, __val) \ - switchcase_csr_read(__csr_num + 1, __val) -#define switchcase_csr_read_4(__csr_num, __val) \ - switchcase_csr_read_2(__csr_num + 0, __val) \ - switchcase_csr_read_2(__csr_num + 2, __val) -#define switchcase_csr_read_8(__csr_num, __val) \ - switchcase_csr_read_4(__csr_num + 0, __val) \ - switchcase_csr_read_4(__csr_num + 4, __val) -#define switchcase_csr_read_16(__csr_num, __val) \ - switchcase_csr_read_8(__csr_num + 0, __val) \ - switchcase_csr_read_8(__csr_num + 8, __val) -#define switchcase_csr_read_32(__csr_num, __val) \ - switchcase_csr_read_16(__csr_num + 0, __val) \ - switchcase_csr_read_16(__csr_num + 16, __val) -#define switchcase_csr_read_64(__csr_num, __val) \ - switchcase_csr_read_32(__csr_num + 0, __val) \ - switchcase_csr_read_32(__csr_num + 32, __val) - - unsigned long ret = 0; - - switch (csr_num) { - switchcase_csr_read_16(CSR_PMPCFG0, ret) - switchcase_csr_read_64(CSR_PMPADDR0, ret) - switchcase_csr_read(CSR_MCYCLE, ret) - switchcase_csr_read(CSR_MINSTRET, ret) - switchcase_csr_read(CSR_MHPMCOUNTER3, ret) - switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret) - switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret) - switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret) - switchcase_csr_read(CSR_MCOUNTINHIBIT, ret) - switchcase_csr_read(CSR_MHPMEVENT3, ret) - switchcase_csr_read_4(CSR_MHPMEVENT4, ret) - switchcase_csr_read_8(CSR_MHPMEVENT8, ret) - switchcase_csr_read_16(CSR_MHPMEVENT16, ret) -#if __riscv_xlen == 32 - switchcase_csr_read(CSR_MCYCLEH, ret) - switchcase_csr_read(CSR_MINSTRETH, ret) - switchcase_csr_read(CSR_MHPMCOUNTER3H, ret) - switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret) - switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret) - switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret) - /** - * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf - * extension is present. The caller must ensure that. - */ - switchcase_csr_read(CSR_MHPMEVENT3H, ret) - switchcase_csr_read_4(CSR_MHPMEVENT4H, ret) - switchcase_csr_read_8(CSR_MHPMEVENT8H, ret) - switchcase_csr_read_16(CSR_MHPMEVENT16H, ret) -#endif - - default: - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); - break; - }; - - return ret; - -#undef switchcase_csr_read_64 -#undef switchcase_csr_read_32 -#undef switchcase_csr_read_16 -#undef switchcase_csr_read_8 -#undef switchcase_csr_read_4 -#undef switchcase_csr_read_2 -#undef switchcase_csr_read + volatile uint32_t opcode_buff[2]; + typedef unsigned long (*read_f)(void); + opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */ + opcode_buff[1] = 0x00008067; /* ret */ + RISCV_FENCE_I; + return ((read_f)opcode_buff)(); } void csr_write_num(int csr_num, unsigned long val) { -#define switchcase_csr_write(__csr_num, __val) \ - case __csr_num: \ - csr_write(__csr_num, __val); \ - break; -#define switchcase_csr_write_2(__csr_num, __val) \ - switchcase_csr_write(__csr_num + 0, __val) \ - switchcase_csr_write(__csr_num + 1, __val) -#define switchcase_csr_write_4(__csr_num, __val) \ - switchcase_csr_write_2(__csr_num + 0, __val) \ - switchcase_csr_write_2(__csr_num + 2, __val) -#define switchcase_csr_write_8(__csr_num, __val) \ - switchcase_csr_write_4(__csr_num + 0, __val) \ - switchcase_csr_write_4(__csr_num + 4, __val) -#define switchcase_csr_write_16(__csr_num, __val) \ - switchcase_csr_write_8(__csr_num + 0, __val) \ - switchcase_csr_write_8(__csr_num + 8, __val) -#define switchcase_csr_write_32(__csr_num, __val) \ - switchcase_csr_write_16(__csr_num + 0, __val) \ - switchcase_csr_write_16(__csr_num + 16, __val) -#define switchcase_csr_write_64(__csr_num, __val) \ - switchcase_csr_write_32(__csr_num + 0, __val) \ - switchcase_csr_write_32(__csr_num + 32, __val) - - switch (csr_num) { - switchcase_csr_write_16(CSR_PMPCFG0, val) - switchcase_csr_write_64(CSR_PMPADDR0, val) - switchcase_csr_write(CSR_MCYCLE, val) - switchcase_csr_write(CSR_MINSTRET, val) - switchcase_csr_write(CSR_MHPMCOUNTER3, val) - switchcase_csr_write_4(CSR_MHPMCOUNTER4, val) - switchcase_csr_write_8(CSR_MHPMCOUNTER8, val) - switchcase_csr_write_16(CSR_MHPMCOUNTER16, val) -#if __riscv_xlen == 32 - switchcase_csr_write(CSR_MCYCLEH, val) - switchcase_csr_write(CSR_MINSTRETH, val) - switchcase_csr_write(CSR_MHPMCOUNTER3H, val) - switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val) - switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val) - switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val) - switchcase_csr_write(CSR_MHPMEVENT3H, val) - switchcase_csr_write_4(CSR_MHPMEVENT4H, val) - switchcase_csr_write_8(CSR_MHPMEVENT8H, val) - switchcase_csr_write_16(CSR_MHPMEVENT16H, val) -#endif - switchcase_csr_write(CSR_MCOUNTINHIBIT, val) - switchcase_csr_write(CSR_MHPMEVENT3, val) - switchcase_csr_write_4(CSR_MHPMEVENT4, val) - switchcase_csr_write_8(CSR_MHPMEVENT8, val) - switchcase_csr_write_16(CSR_MHPMEVENT16, val) - - default: - sbi_panic("%s: Unknown CSR %#x", __func__, csr_num); - break; - }; - -#undef switchcase_csr_write_64 -#undef switchcase_csr_write_32 -#undef switchcase_csr_write_16 -#undef switchcase_csr_write_8 -#undef switchcase_csr_write_4 -#undef switchcase_csr_write_2 -#undef switchcase_csr_write + volatile uint32_t opcode_buff[2]; + typedef void (*write_f)(unsigned long val); + opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */ + opcode_buff[1] = 0x00008067; /* ret */ + RISCV_FENCE_I; + ((write_f)opcode_buff)(val); } static unsigned long ctz(unsigned long x)
Reduce the overhead of csr read and write code by producing a small piece of code to perform csr read and write. Signed-off-by: Xiang W <wxjstz@126.com> --- Changes in v4: - add fence.i sync instruction memory Changes in v3: - Prevent unnecessary optimizations by the compiler Changes in v2: - Fix thread safety related bugs as suggested by Anup lib/sbi/riscv_asm.c | 145 ++++---------------------------------------- 1 file changed, 13 insertions(+), 132 deletions(-)