diff mbox series

[v4] lib: sbi: Improve csr read and write

Message ID 20220526144214.308921-1-wxjstz@126.com
State Deferred
Headers show
Series [v4] lib: sbi: Improve csr read and write | expand

Commit Message

Xiang W May 26, 2022, 2:42 p.m. UTC
Reduce the overhead of csr read and write code by producing a small
piece of code to perform csr read and write.

Signed-off-by: Xiang W <wxjstz@126.com>
---
Changes in v4:
- add fence.i sync instruction memory

Changes in v3:
- Prevent unnecessary optimizations by the compiler

Changes in v2:
- Fix thread safety related bugs as suggested by Anup

 lib/sbi/riscv_asm.c | 145 ++++----------------------------------------
 1 file changed, 13 insertions(+), 132 deletions(-)

Comments

Anup Patel May 26, 2022, 3:05 p.m. UTC | #1
On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
>
> Reduce the overhead of csr read and write code by producing a small
> piece of code to perform csr read and write.
>
> Signed-off-by: Xiang W <wxjstz@126.com>
> ---
> Changes in v4:
> - add fence.i sync instruction memory
>
> Changes in v3:
> - Prevent unnecessary optimizations by the compiler
>
> Changes in v2:
> - Fix thread safety related bugs as suggested by Anup
>
>  lib/sbi/riscv_asm.c | 145 ++++----------------------------------------
>  1 file changed, 13 insertions(+), 132 deletions(-)
>
> diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c
> index a09cf78..452cb19 100644
> --- a/lib/sbi/riscv_asm.c
> +++ b/lib/sbi/riscv_asm.c
> @@ -9,6 +9,7 @@
>
>  #include <sbi/riscv_asm.h>
>  #include <sbi/riscv_encoding.h>
> +#include <sbi/riscv_barrier.h>
>  #include <sbi/sbi_error.h>
>  #include <sbi/sbi_platform.h>
>  #include <sbi/sbi_console.h>
> @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz)
>
>  unsigned long csr_read_num(int csr_num)
>  {
> -#define switchcase_csr_read(__csr_num, __val)          \
> -       case __csr_num:                                 \
> -               __val = csr_read(__csr_num);            \
> -               break;
> -#define switchcase_csr_read_2(__csr_num, __val)        \
> -       switchcase_csr_read(__csr_num + 0, __val)       \
> -       switchcase_csr_read(__csr_num + 1, __val)
> -#define switchcase_csr_read_4(__csr_num, __val)        \
> -       switchcase_csr_read_2(__csr_num + 0, __val)     \
> -       switchcase_csr_read_2(__csr_num + 2, __val)
> -#define switchcase_csr_read_8(__csr_num, __val)        \
> -       switchcase_csr_read_4(__csr_num + 0, __val)     \
> -       switchcase_csr_read_4(__csr_num + 4, __val)
> -#define switchcase_csr_read_16(__csr_num, __val)       \
> -       switchcase_csr_read_8(__csr_num + 0, __val)     \
> -       switchcase_csr_read_8(__csr_num + 8, __val)
> -#define switchcase_csr_read_32(__csr_num, __val)       \
> -       switchcase_csr_read_16(__csr_num + 0, __val)    \
> -       switchcase_csr_read_16(__csr_num + 16, __val)
> -#define switchcase_csr_read_64(__csr_num, __val)       \
> -       switchcase_csr_read_32(__csr_num + 0, __val)    \
> -       switchcase_csr_read_32(__csr_num + 32, __val)
> -
> -       unsigned long ret = 0;
> -
> -       switch (csr_num) {
> -       switchcase_csr_read_16(CSR_PMPCFG0, ret)
> -       switchcase_csr_read_64(CSR_PMPADDR0, ret)
> -       switchcase_csr_read(CSR_MCYCLE, ret)
> -       switchcase_csr_read(CSR_MINSTRET, ret)
> -       switchcase_csr_read(CSR_MHPMCOUNTER3, ret)
> -       switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret)
> -       switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret)
> -       switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret)
> -       switchcase_csr_read(CSR_MCOUNTINHIBIT, ret)
> -       switchcase_csr_read(CSR_MHPMEVENT3, ret)
> -       switchcase_csr_read_4(CSR_MHPMEVENT4, ret)
> -       switchcase_csr_read_8(CSR_MHPMEVENT8, ret)
> -       switchcase_csr_read_16(CSR_MHPMEVENT16, ret)
> -#if __riscv_xlen == 32
> -       switchcase_csr_read(CSR_MCYCLEH, ret)
> -       switchcase_csr_read(CSR_MINSTRETH, ret)
> -       switchcase_csr_read(CSR_MHPMCOUNTER3H, ret)
> -       switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret)
> -       switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret)
> -       switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret)
> -       /**
> -        * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf
> -        * extension is present. The caller must ensure that.
> -        */
> -       switchcase_csr_read(CSR_MHPMEVENT3H, ret)
> -       switchcase_csr_read_4(CSR_MHPMEVENT4H, ret)
> -       switchcase_csr_read_8(CSR_MHPMEVENT8H, ret)
> -       switchcase_csr_read_16(CSR_MHPMEVENT16H, ret)
> -#endif
> -
> -       default:
> -               sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> -               break;
> -       };
> -
> -       return ret;
> -
> -#undef switchcase_csr_read_64
> -#undef switchcase_csr_read_32
> -#undef switchcase_csr_read_16
> -#undef switchcase_csr_read_8
> -#undef switchcase_csr_read_4
> -#undef switchcase_csr_read_2
> -#undef switchcase_csr_read
> +       volatile uint32_t opcode_buff[2];
> +       typedef unsigned long (*read_f)(void);
> +       opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */
> +       opcode_buff[1] = 0x00008067; /* ret */
> +       RISCV_FENCE_I;
> +       return ((read_f)opcode_buff)();
>  }
>
>  void csr_write_num(int csr_num, unsigned long val)
>  {
> -#define switchcase_csr_write(__csr_num, __val)         \
> -       case __csr_num:                                 \
> -               csr_write(__csr_num, __val);            \
> -               break;
> -#define switchcase_csr_write_2(__csr_num, __val)       \
> -       switchcase_csr_write(__csr_num + 0, __val)      \
> -       switchcase_csr_write(__csr_num + 1, __val)
> -#define switchcase_csr_write_4(__csr_num, __val)       \
> -       switchcase_csr_write_2(__csr_num + 0, __val)    \
> -       switchcase_csr_write_2(__csr_num + 2, __val)
> -#define switchcase_csr_write_8(__csr_num, __val)       \
> -       switchcase_csr_write_4(__csr_num + 0, __val)    \
> -       switchcase_csr_write_4(__csr_num + 4, __val)
> -#define switchcase_csr_write_16(__csr_num, __val)      \
> -       switchcase_csr_write_8(__csr_num + 0, __val)    \
> -       switchcase_csr_write_8(__csr_num + 8, __val)
> -#define switchcase_csr_write_32(__csr_num, __val)      \
> -       switchcase_csr_write_16(__csr_num + 0, __val)   \
> -       switchcase_csr_write_16(__csr_num + 16, __val)
> -#define switchcase_csr_write_64(__csr_num, __val)      \
> -       switchcase_csr_write_32(__csr_num + 0, __val)   \
> -       switchcase_csr_write_32(__csr_num + 32, __val)
> -
> -       switch (csr_num) {
> -       switchcase_csr_write_16(CSR_PMPCFG0, val)
> -       switchcase_csr_write_64(CSR_PMPADDR0, val)
> -       switchcase_csr_write(CSR_MCYCLE, val)
> -       switchcase_csr_write(CSR_MINSTRET, val)
> -       switchcase_csr_write(CSR_MHPMCOUNTER3, val)
> -       switchcase_csr_write_4(CSR_MHPMCOUNTER4, val)
> -       switchcase_csr_write_8(CSR_MHPMCOUNTER8, val)
> -       switchcase_csr_write_16(CSR_MHPMCOUNTER16, val)
> -#if __riscv_xlen == 32
> -       switchcase_csr_write(CSR_MCYCLEH, val)
> -       switchcase_csr_write(CSR_MINSTRETH, val)
> -       switchcase_csr_write(CSR_MHPMCOUNTER3H, val)
> -       switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val)
> -       switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val)
> -       switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val)
> -       switchcase_csr_write(CSR_MHPMEVENT3H, val)
> -       switchcase_csr_write_4(CSR_MHPMEVENT4H, val)
> -       switchcase_csr_write_8(CSR_MHPMEVENT8H, val)
> -       switchcase_csr_write_16(CSR_MHPMEVENT16H, val)
> -#endif
> -       switchcase_csr_write(CSR_MCOUNTINHIBIT, val)
> -       switchcase_csr_write(CSR_MHPMEVENT3, val)
> -       switchcase_csr_write_4(CSR_MHPMEVENT4, val)
> -       switchcase_csr_write_8(CSR_MHPMEVENT8, val)
> -       switchcase_csr_write_16(CSR_MHPMEVENT16, val)
> -
> -       default:
> -               sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> -               break;
> -       };
> -
> -#undef switchcase_csr_write_64
> -#undef switchcase_csr_write_32
> -#undef switchcase_csr_write_16
> -#undef switchcase_csr_write_8
> -#undef switchcase_csr_write_4
> -#undef switchcase_csr_write_2
> -#undef switchcase_csr_write
> +       volatile uint32_t opcode_buff[2];
> +       typedef void (*write_f)(unsigned long val);
> +       opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */
> +       opcode_buff[1] = 0x00008067; /* ret */
> +       RISCV_FENCE_I;

I agree we need FENCE.I in the path but this is
like a big hammer in the execution path.

This is now much slower compared to the existing
approach because every CSR access using this
function will execute FENCE.I

Regards,
Anup

> +       ((write_f)opcode_buff)(val);
>  }
>
>  static unsigned long ctz(unsigned long x)
> --
> 2.30.2
>
>
> --
> opensbi mailing list
> opensbi@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
Xiang W May 26, 2022, 3:19 p.m. UTC | #2
在 2022-05-26星期四的 20:35 +0530,Anup Patel写道:
> On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
> > 
> > Reduce the overhead of csr read and write code by producing a small
> > piece of code to perform csr read and write.
> > 
> > Signed-off-by: Xiang W <wxjstz@126.com>
> > ---
> > Changes in v4:
> > - add fence.i sync instruction memory
> > 
> > Changes in v3:
> > - Prevent unnecessary optimizations by the compiler
> > 
> > Changes in v2:
> > - Fix thread safety related bugs as suggested by Anup
> > 
> >  lib/sbi/riscv_asm.c | 145 ++++----------------------------------------
> >  1 file changed, 13 insertions(+), 132 deletions(-)
> > 
> > diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c
> > index a09cf78..452cb19 100644
> > --- a/lib/sbi/riscv_asm.c
> > +++ b/lib/sbi/riscv_asm.c
> > @@ -9,6 +9,7 @@
> > 
> >  #include <sbi/riscv_asm.h>
> >  #include <sbi/riscv_encoding.h>
> > +#include <sbi/riscv_barrier.h>
> >  #include <sbi/sbi_error.h>
> >  #include <sbi/sbi_platform.h>
> >  #include <sbi/sbi_console.h>
> > @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz)
> > 
> >  unsigned long csr_read_num(int csr_num)
> >  {
> > -#define switchcase_csr_read(__csr_num, __val)          \
> > -       case __csr_num:                                 \
> > -               __val = csr_read(__csr_num);            \
> > -               break;
> > -#define switchcase_csr_read_2(__csr_num, __val)        \
> > -       switchcase_csr_read(__csr_num + 0, __val)       \
> > -       switchcase_csr_read(__csr_num + 1, __val)
> > -#define switchcase_csr_read_4(__csr_num, __val)        \
> > -       switchcase_csr_read_2(__csr_num + 0, __val)     \
> > -       switchcase_csr_read_2(__csr_num + 2, __val)
> > -#define switchcase_csr_read_8(__csr_num, __val)        \
> > -       switchcase_csr_read_4(__csr_num + 0, __val)     \
> > -       switchcase_csr_read_4(__csr_num + 4, __val)
> > -#define switchcase_csr_read_16(__csr_num, __val)       \
> > -       switchcase_csr_read_8(__csr_num + 0, __val)     \
> > -       switchcase_csr_read_8(__csr_num + 8, __val)
> > -#define switchcase_csr_read_32(__csr_num, __val)       \
> > -       switchcase_csr_read_16(__csr_num + 0, __val)    \
> > -       switchcase_csr_read_16(__csr_num + 16, __val)
> > -#define switchcase_csr_read_64(__csr_num, __val)       \
> > -       switchcase_csr_read_32(__csr_num + 0, __val)    \
> > -       switchcase_csr_read_32(__csr_num + 32, __val)
> > -
> > -       unsigned long ret = 0;
> > -
> > -       switch (csr_num) {
> > -       switchcase_csr_read_16(CSR_PMPCFG0, ret)
> > -       switchcase_csr_read_64(CSR_PMPADDR0, ret)
> > -       switchcase_csr_read(CSR_MCYCLE, ret)
> > -       switchcase_csr_read(CSR_MINSTRET, ret)
> > -       switchcase_csr_read(CSR_MHPMCOUNTER3, ret)
> > -       switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret)
> > -       switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret)
> > -       switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret)
> > -       switchcase_csr_read(CSR_MCOUNTINHIBIT, ret)
> > -       switchcase_csr_read(CSR_MHPMEVENT3, ret)
> > -       switchcase_csr_read_4(CSR_MHPMEVENT4, ret)
> > -       switchcase_csr_read_8(CSR_MHPMEVENT8, ret)
> > -       switchcase_csr_read_16(CSR_MHPMEVENT16, ret)
> > -#if __riscv_xlen == 32
> > -       switchcase_csr_read(CSR_MCYCLEH, ret)
> > -       switchcase_csr_read(CSR_MINSTRETH, ret)
> > -       switchcase_csr_read(CSR_MHPMCOUNTER3H, ret)
> > -       switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret)
> > -       switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret)
> > -       switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret)
> > -       /**
> > -        * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf
> > -        * extension is present. The caller must ensure that.
> > -        */
> > -       switchcase_csr_read(CSR_MHPMEVENT3H, ret)
> > -       switchcase_csr_read_4(CSR_MHPMEVENT4H, ret)
> > -       switchcase_csr_read_8(CSR_MHPMEVENT8H, ret)
> > -       switchcase_csr_read_16(CSR_MHPMEVENT16H, ret)
> > -#endif
> > -
> > -       default:
> > -               sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> > -               break;
> > -       };
> > -
> > -       return ret;
> > -
> > -#undef switchcase_csr_read_64
> > -#undef switchcase_csr_read_32
> > -#undef switchcase_csr_read_16
> > -#undef switchcase_csr_read_8
> > -#undef switchcase_csr_read_4
> > -#undef switchcase_csr_read_2
> > -#undef switchcase_csr_read
> > +       volatile uint32_t opcode_buff[2];
> > +       typedef unsigned long (*read_f)(void);
> > +       opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */
> > +       opcode_buff[1] = 0x00008067; /* ret */
> > +       RISCV_FENCE_I;
> > +       return ((read_f)opcode_buff)();
> >  }
> > 
> >  void csr_write_num(int csr_num, unsigned long val)
> >  {
> > -#define switchcase_csr_write(__csr_num, __val)         \
> > -       case __csr_num:                                 \
> > -               csr_write(__csr_num, __val);            \
> > -               break;
> > -#define switchcase_csr_write_2(__csr_num, __val)       \
> > -       switchcase_csr_write(__csr_num + 0, __val)      \
> > -       switchcase_csr_write(__csr_num + 1, __val)
> > -#define switchcase_csr_write_4(__csr_num, __val)       \
> > -       switchcase_csr_write_2(__csr_num + 0, __val)    \
> > -       switchcase_csr_write_2(__csr_num + 2, __val)
> > -#define switchcase_csr_write_8(__csr_num, __val)       \
> > -       switchcase_csr_write_4(__csr_num + 0, __val)    \
> > -       switchcase_csr_write_4(__csr_num + 4, __val)
> > -#define switchcase_csr_write_16(__csr_num, __val)      \
> > -       switchcase_csr_write_8(__csr_num + 0, __val)    \
> > -       switchcase_csr_write_8(__csr_num + 8, __val)
> > -#define switchcase_csr_write_32(__csr_num, __val)      \
> > -       switchcase_csr_write_16(__csr_num + 0, __val)   \
> > -       switchcase_csr_write_16(__csr_num + 16, __val)
> > -#define switchcase_csr_write_64(__csr_num, __val)      \
> > -       switchcase_csr_write_32(__csr_num + 0, __val)   \
> > -       switchcase_csr_write_32(__csr_num + 32, __val)
> > -
> > -       switch (csr_num) {
> > -       switchcase_csr_write_16(CSR_PMPCFG0, val)
> > -       switchcase_csr_write_64(CSR_PMPADDR0, val)
> > -       switchcase_csr_write(CSR_MCYCLE, val)
> > -       switchcase_csr_write(CSR_MINSTRET, val)
> > -       switchcase_csr_write(CSR_MHPMCOUNTER3, val)
> > -       switchcase_csr_write_4(CSR_MHPMCOUNTER4, val)
> > -       switchcase_csr_write_8(CSR_MHPMCOUNTER8, val)
> > -       switchcase_csr_write_16(CSR_MHPMCOUNTER16, val)
> > -#if __riscv_xlen == 32
> > -       switchcase_csr_write(CSR_MCYCLEH, val)
> > -       switchcase_csr_write(CSR_MINSTRETH, val)
> > -       switchcase_csr_write(CSR_MHPMCOUNTER3H, val)
> > -       switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val)
> > -       switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val)
> > -       switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val)
> > -       switchcase_csr_write(CSR_MHPMEVENT3H, val)
> > -       switchcase_csr_write_4(CSR_MHPMEVENT4H, val)
> > -       switchcase_csr_write_8(CSR_MHPMEVENT8H, val)
> > -       switchcase_csr_write_16(CSR_MHPMEVENT16H, val)
> > -#endif
> > -       switchcase_csr_write(CSR_MCOUNTINHIBIT, val)
> > -       switchcase_csr_write(CSR_MHPMEVENT3, val)
> > -       switchcase_csr_write_4(CSR_MHPMEVENT4, val)
> > -       switchcase_csr_write_8(CSR_MHPMEVENT8, val)
> > -       switchcase_csr_write_16(CSR_MHPMEVENT16, val)
> > -
> > -       default:
> > -               sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> > -               break;
> > -       };
> > -
> > -#undef switchcase_csr_write_64
> > -#undef switchcase_csr_write_32
> > -#undef switchcase_csr_write_16
> > -#undef switchcase_csr_write_8
> > -#undef switchcase_csr_write_4
> > -#undef switchcase_csr_write_2
> > -#undef switchcase_csr_write
> > +       volatile uint32_t opcode_buff[2];
> > +       typedef void (*write_f)(unsigned long val);
> > +       opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */
> > +       opcode_buff[1] = 0x00008067; /* ret */
> > +       RISCV_FENCE_I;
> 
> I agree we need FENCE.I in the path but this is
> like a big hammer in the execution path.
> 
> This is now much slower compared to the existing
> approach because every CSR access using this
> function will execute FENCE.I
I passed the qemu test, and it can be run normally with
or without fence.i. But I'm not sure if the exception
happens on other platform or we have a more lightweight
way.

Regards,
Xiang W
> 
> Regards,
> Anup
> 
> > +       ((write_f)opcode_buff)(val);
> >  }
> > 
> >  static unsigned long ctz(unsigned long x)
> > --
> > 2.30.2
> > 
> > 
> > --
> > opensbi mailing list
> > opensbi@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/opensbi
Jessica Clarke May 26, 2022, 3:28 p.m. UTC | #3
On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote:
> 
> 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道:
>> On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
>>> 
>>> Reduce the overhead of csr read and write code by producing a small
>>> piece of code to perform csr read and write.
>>> 
>>> Signed-off-by: Xiang W <wxjstz@126.com>
>>> ---
>>> Changes in v4:
>>> - add fence.i sync instruction memory
>>> 
>>> Changes in v3:
>>> - Prevent unnecessary optimizations by the compiler
>>> 
>>> Changes in v2:
>>> - Fix thread safety related bugs as suggested by Anup
>>> 
>>> lib/sbi/riscv_asm.c | 145 ++++----------------------------------------
>>> 1 file changed, 13 insertions(+), 132 deletions(-)
>>> 
>>> diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c
>>> index a09cf78..452cb19 100644
>>> --- a/lib/sbi/riscv_asm.c
>>> +++ b/lib/sbi/riscv_asm.c
>>> @@ -9,6 +9,7 @@
>>> 
>>> #include <sbi/riscv_asm.h>
>>> #include <sbi/riscv_encoding.h>
>>> +#include <sbi/riscv_barrier.h>
>>> #include <sbi/sbi_error.h>
>>> #include <sbi/sbi_platform.h>
>>> #include <sbi/sbi_console.h>
>>> @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz)
>>> 
>>> unsigned long csr_read_num(int csr_num)
>>> {
>>> -#define switchcase_csr_read(__csr_num, __val)  \
>>> -  case __csr_num:  \
>>> -  __val = csr_read(__csr_num);  \
>>> -  break;
>>> -#define switchcase_csr_read_2(__csr_num, __val)  \
>>> -  switchcase_csr_read(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read(__csr_num + 1, __val)
>>> -#define switchcase_csr_read_4(__csr_num, __val)  \
>>> -  switchcase_csr_read_2(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read_2(__csr_num + 2, __val)
>>> -#define switchcase_csr_read_8(__csr_num, __val)  \
>>> -  switchcase_csr_read_4(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read_4(__csr_num + 4, __val)
>>> -#define switchcase_csr_read_16(__csr_num, __val)  \
>>> -  switchcase_csr_read_8(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read_8(__csr_num + 8, __val)
>>> -#define switchcase_csr_read_32(__csr_num, __val)  \
>>> -  switchcase_csr_read_16(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read_16(__csr_num + 16, __val)
>>> -#define switchcase_csr_read_64(__csr_num, __val)  \
>>> -  switchcase_csr_read_32(__csr_num + 0, __val)  \
>>> -  switchcase_csr_read_32(__csr_num + 32, __val)
>>> -
>>> -  unsigned long ret = 0;
>>> -
>>> -  switch (csr_num) {
>>> -  switchcase_csr_read_16(CSR_PMPCFG0, ret)
>>> -  switchcase_csr_read_64(CSR_PMPADDR0, ret)
>>> -  switchcase_csr_read(CSR_MCYCLE, ret)
>>> -  switchcase_csr_read(CSR_MINSTRET, ret)
>>> -  switchcase_csr_read(CSR_MHPMCOUNTER3, ret)
>>> -  switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret)
>>> -  switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret)
>>> -  switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret)
>>> -  switchcase_csr_read(CSR_MCOUNTINHIBIT, ret)
>>> -  switchcase_csr_read(CSR_MHPMEVENT3, ret)
>>> -  switchcase_csr_read_4(CSR_MHPMEVENT4, ret)
>>> -  switchcase_csr_read_8(CSR_MHPMEVENT8, ret)
>>> -  switchcase_csr_read_16(CSR_MHPMEVENT16, ret)
>>> -#if __riscv_xlen == 32
>>> -  switchcase_csr_read(CSR_MCYCLEH, ret)
>>> -  switchcase_csr_read(CSR_MINSTRETH, ret)
>>> -  switchcase_csr_read(CSR_MHPMCOUNTER3H, ret)
>>> -  switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret)
>>> -  switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret)
>>> -  switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret)
>>> -  /**
>>> -  * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf
>>> -  * extension is present. The caller must ensure that.
>>> -  */
>>> -  switchcase_csr_read(CSR_MHPMEVENT3H, ret)
>>> -  switchcase_csr_read_4(CSR_MHPMEVENT4H, ret)
>>> -  switchcase_csr_read_8(CSR_MHPMEVENT8H, ret)
>>> -  switchcase_csr_read_16(CSR_MHPMEVENT16H, ret)
>>> -#endif
>>> -
>>> -  default:
>>> -  sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
>>> -  break;
>>> -  };
>>> -
>>> -  return ret;
>>> -
>>> -#undef switchcase_csr_read_64
>>> -#undef switchcase_csr_read_32
>>> -#undef switchcase_csr_read_16
>>> -#undef switchcase_csr_read_8
>>> -#undef switchcase_csr_read_4
>>> -#undef switchcase_csr_read_2
>>> -#undef switchcase_csr_read
>>> +  volatile uint32_t opcode_buff[2];
>>> +  typedef unsigned long (*read_f)(void);
>>> +  opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */
>>> +  opcode_buff[1] = 0x00008067; /* ret */
>>> +  RISCV_FENCE_I;
>>> +  return ((read_f)opcode_buff)();
>>> }
>>> 
>>> void csr_write_num(int csr_num, unsigned long val)
>>> {
>>> -#define switchcase_csr_write(__csr_num, __val)  \
>>> -  case __csr_num:  \
>>> -  csr_write(__csr_num, __val);  \
>>> -  break;
>>> -#define switchcase_csr_write_2(__csr_num, __val)  \
>>> -  switchcase_csr_write(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write(__csr_num + 1, __val)
>>> -#define switchcase_csr_write_4(__csr_num, __val)  \
>>> -  switchcase_csr_write_2(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write_2(__csr_num + 2, __val)
>>> -#define switchcase_csr_write_8(__csr_num, __val)  \
>>> -  switchcase_csr_write_4(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write_4(__csr_num + 4, __val)
>>> -#define switchcase_csr_write_16(__csr_num, __val)  \
>>> -  switchcase_csr_write_8(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write_8(__csr_num + 8, __val)
>>> -#define switchcase_csr_write_32(__csr_num, __val)  \
>>> -  switchcase_csr_write_16(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write_16(__csr_num + 16, __val)
>>> -#define switchcase_csr_write_64(__csr_num, __val)  \
>>> -  switchcase_csr_write_32(__csr_num + 0, __val)  \
>>> -  switchcase_csr_write_32(__csr_num + 32, __val)
>>> -
>>> -  switch (csr_num) {
>>> -  switchcase_csr_write_16(CSR_PMPCFG0, val)
>>> -  switchcase_csr_write_64(CSR_PMPADDR0, val)
>>> -  switchcase_csr_write(CSR_MCYCLE, val)
>>> -  switchcase_csr_write(CSR_MINSTRET, val)
>>> -  switchcase_csr_write(CSR_MHPMCOUNTER3, val)
>>> -  switchcase_csr_write_4(CSR_MHPMCOUNTER4, val)
>>> -  switchcase_csr_write_8(CSR_MHPMCOUNTER8, val)
>>> -  switchcase_csr_write_16(CSR_MHPMCOUNTER16, val)
>>> -#if __riscv_xlen == 32
>>> -  switchcase_csr_write(CSR_MCYCLEH, val)
>>> -  switchcase_csr_write(CSR_MINSTRETH, val)
>>> -  switchcase_csr_write(CSR_MHPMCOUNTER3H, val)
>>> -  switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val)
>>> -  switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val)
>>> -  switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val)
>>> -  switchcase_csr_write(CSR_MHPMEVENT3H, val)
>>> -  switchcase_csr_write_4(CSR_MHPMEVENT4H, val)
>>> -  switchcase_csr_write_8(CSR_MHPMEVENT8H, val)
>>> -  switchcase_csr_write_16(CSR_MHPMEVENT16H, val)
>>> -#endif
>>> -  switchcase_csr_write(CSR_MCOUNTINHIBIT, val)
>>> -  switchcase_csr_write(CSR_MHPMEVENT3, val)
>>> -  switchcase_csr_write_4(CSR_MHPMEVENT4, val)
>>> -  switchcase_csr_write_8(CSR_MHPMEVENT8, val)
>>> -  switchcase_csr_write_16(CSR_MHPMEVENT16, val)
>>> -
>>> -  default:
>>> -  sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
>>> -  break;
>>> -  };
>>> -
>>> -#undef switchcase_csr_write_64
>>> -#undef switchcase_csr_write_32
>>> -#undef switchcase_csr_write_16
>>> -#undef switchcase_csr_write_8
>>> -#undef switchcase_csr_write_4
>>> -#undef switchcase_csr_write_2
>>> -#undef switchcase_csr_write
>>> +  volatile uint32_t opcode_buff[2];
>>> +  typedef void (*write_f)(unsigned long val);
>>> +  opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */
>>> +  opcode_buff[1] = 0x00008067; /* ret */
>>> +  RISCV_FENCE_I;
>> 
>> I agree we need FENCE.I in the path but this is
>> like a big hammer in the execution path.
>> 
>> This is now much slower compared to the existing
>> approach because every CSR access using this
>> function will execute FENCE.I
> I passed the qemu test, and it can be run normally with
> or without fence.i. But I'm not sure if the exception
> happens on other platform or we have a more lightweight
> way.

There is not. Optimising this kind of bad practice code is not a goal
of modern architectures. After all, it totally precludes having W^X
protection in OpenSBI (e.g. via the PMP, or via CHERI), especially
executable stacks form a key part of *the* classic buffer overflow
shell code attack.

Jess
Xiang W June 21, 2022, 3:53 p.m. UTC | #4
在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道:
> On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote:
> > 
> > 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道:
> > > On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
> > > > 
> > > > Reduce the overhead of csr read and write code by producing a small
> > > > piece of code to perform csr read and write.
> > > > 
> > > > Signed-off-by: Xiang W <wxjstz@126.com>
> > > > ---
> > > > Changes in v4:
> > > > - add fence.i sync instruction memory
> > > > 
> > > > Changes in v3:
> > > > - Prevent unnecessary optimizations by the compiler
> > > > 
> > > > Changes in v2:
> > > > - Fix thread safety related bugs as suggested by Anup
> > > > 
> > > > lib/sbi/riscv_asm.c | 145 ++++----------------------------------------
> > > > 1 file changed, 13 insertions(+), 132 deletions(-)
> > > > 
> > > > diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c
> > > > index a09cf78..452cb19 100644
> > > > --- a/lib/sbi/riscv_asm.c
> > > > +++ b/lib/sbi/riscv_asm.c
> > > > @@ -9,6 +9,7 @@
> > > > 
> > > > #include <sbi/riscv_asm.h>
> > > > #include <sbi/riscv_encoding.h>
> > > > +#include <sbi/riscv_barrier.h>
> > > > #include <sbi/sbi_error.h>
> > > > #include <sbi/sbi_platform.h>
> > > > #include <sbi/sbi_console.h>
> > > > @@ -93,142 +94,22 @@ void misa_string(int xlen, char *out, unsigned int out_sz)
> > > > 
> > > > unsigned long csr_read_num(int csr_num)
> > > > {
> > > > -#define switchcase_csr_read(__csr_num, __val)  \
> > > > -  case __csr_num:  \
> > > > -  __val = csr_read(__csr_num);  \
> > > > -  break;
> > > > -#define switchcase_csr_read_2(__csr_num, __val)  \
> > > > -  switchcase_csr_read(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read(__csr_num + 1, __val)
> > > > -#define switchcase_csr_read_4(__csr_num, __val)  \
> > > > -  switchcase_csr_read_2(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read_2(__csr_num + 2, __val)
> > > > -#define switchcase_csr_read_8(__csr_num, __val)  \
> > > > -  switchcase_csr_read_4(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read_4(__csr_num + 4, __val)
> > > > -#define switchcase_csr_read_16(__csr_num, __val)  \
> > > > -  switchcase_csr_read_8(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read_8(__csr_num + 8, __val)
> > > > -#define switchcase_csr_read_32(__csr_num, __val)  \
> > > > -  switchcase_csr_read_16(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read_16(__csr_num + 16, __val)
> > > > -#define switchcase_csr_read_64(__csr_num, __val)  \
> > > > -  switchcase_csr_read_32(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_read_32(__csr_num + 32, __val)
> > > > -
> > > > -  unsigned long ret = 0;
> > > > -
> > > > -  switch (csr_num) {
> > > > -  switchcase_csr_read_16(CSR_PMPCFG0, ret)
> > > > -  switchcase_csr_read_64(CSR_PMPADDR0, ret)
> > > > -  switchcase_csr_read(CSR_MCYCLE, ret)
> > > > -  switchcase_csr_read(CSR_MINSTRET, ret)
> > > > -  switchcase_csr_read(CSR_MHPMCOUNTER3, ret)
> > > > -  switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret)
> > > > -  switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret)
> > > > -  switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret)
> > > > -  switchcase_csr_read(CSR_MCOUNTINHIBIT, ret)
> > > > -  switchcase_csr_read(CSR_MHPMEVENT3, ret)
> > > > -  switchcase_csr_read_4(CSR_MHPMEVENT4, ret)
> > > > -  switchcase_csr_read_8(CSR_MHPMEVENT8, ret)
> > > > -  switchcase_csr_read_16(CSR_MHPMEVENT16, ret)
> > > > -#if __riscv_xlen == 32
> > > > -  switchcase_csr_read(CSR_MCYCLEH, ret)
> > > > -  switchcase_csr_read(CSR_MINSTRETH, ret)
> > > > -  switchcase_csr_read(CSR_MHPMCOUNTER3H, ret)
> > > > -  switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret)
> > > > -  switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret)
> > > > -  switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret)
> > > > -  /**
> > > > -  * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf
> > > > -  * extension is present. The caller must ensure that.
> > > > -  */
> > > > -  switchcase_csr_read(CSR_MHPMEVENT3H, ret)
> > > > -  switchcase_csr_read_4(CSR_MHPMEVENT4H, ret)
> > > > -  switchcase_csr_read_8(CSR_MHPMEVENT8H, ret)
> > > > -  switchcase_csr_read_16(CSR_MHPMEVENT16H, ret)
> > > > -#endif
> > > > -
> > > > -  default:
> > > > -  sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> > > > -  break;
> > > > -  };
> > > > -
> > > > -  return ret;
> > > > -
> > > > -#undef switchcase_csr_read_64
> > > > -#undef switchcase_csr_read_32
> > > > -#undef switchcase_csr_read_16
> > > > -#undef switchcase_csr_read_8
> > > > -#undef switchcase_csr_read_4
> > > > -#undef switchcase_csr_read_2
> > > > -#undef switchcase_csr_read
> > > > +  volatile uint32_t opcode_buff[2];
> > > > +  typedef unsigned long (*read_f)(void);
> > > > +  opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */
> > > > +  opcode_buff[1] = 0x00008067; /* ret */
> > > > +  RISCV_FENCE_I;
> > > > +  return ((read_f)opcode_buff)();
> > > > }
> > > > 
> > > > void csr_write_num(int csr_num, unsigned long val)
> > > > {
> > > > -#define switchcase_csr_write(__csr_num, __val)  \
> > > > -  case __csr_num:  \
> > > > -  csr_write(__csr_num, __val);  \
> > > > -  break;
> > > > -#define switchcase_csr_write_2(__csr_num, __val)  \
> > > > -  switchcase_csr_write(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write(__csr_num + 1, __val)
> > > > -#define switchcase_csr_write_4(__csr_num, __val)  \
> > > > -  switchcase_csr_write_2(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write_2(__csr_num + 2, __val)
> > > > -#define switchcase_csr_write_8(__csr_num, __val)  \
> > > > -  switchcase_csr_write_4(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write_4(__csr_num + 4, __val)
> > > > -#define switchcase_csr_write_16(__csr_num, __val)  \
> > > > -  switchcase_csr_write_8(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write_8(__csr_num + 8, __val)
> > > > -#define switchcase_csr_write_32(__csr_num, __val)  \
> > > > -  switchcase_csr_write_16(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write_16(__csr_num + 16, __val)
> > > > -#define switchcase_csr_write_64(__csr_num, __val)  \
> > > > -  switchcase_csr_write_32(__csr_num + 0, __val)  \
> > > > -  switchcase_csr_write_32(__csr_num + 32, __val)
> > > > -
> > > > -  switch (csr_num) {
> > > > -  switchcase_csr_write_16(CSR_PMPCFG0, val)
> > > > -  switchcase_csr_write_64(CSR_PMPADDR0, val)
> > > > -  switchcase_csr_write(CSR_MCYCLE, val)
> > > > -  switchcase_csr_write(CSR_MINSTRET, val)
> > > > -  switchcase_csr_write(CSR_MHPMCOUNTER3, val)
> > > > -  switchcase_csr_write_4(CSR_MHPMCOUNTER4, val)
> > > > -  switchcase_csr_write_8(CSR_MHPMCOUNTER8, val)
> > > > -  switchcase_csr_write_16(CSR_MHPMCOUNTER16, val)
> > > > -#if __riscv_xlen == 32
> > > > -  switchcase_csr_write(CSR_MCYCLEH, val)
> > > > -  switchcase_csr_write(CSR_MINSTRETH, val)
> > > > -  switchcase_csr_write(CSR_MHPMCOUNTER3H, val)
> > > > -  switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val)
> > > > -  switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val)
> > > > -  switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val)
> > > > -  switchcase_csr_write(CSR_MHPMEVENT3H, val)
> > > > -  switchcase_csr_write_4(CSR_MHPMEVENT4H, val)
> > > > -  switchcase_csr_write_8(CSR_MHPMEVENT8H, val)
> > > > -  switchcase_csr_write_16(CSR_MHPMEVENT16H, val)
> > > > -#endif
> > > > -  switchcase_csr_write(CSR_MCOUNTINHIBIT, val)
> > > > -  switchcase_csr_write(CSR_MHPMEVENT3, val)
> > > > -  switchcase_csr_write_4(CSR_MHPMEVENT4, val)
> > > > -  switchcase_csr_write_8(CSR_MHPMEVENT8, val)
> > > > -  switchcase_csr_write_16(CSR_MHPMEVENT16, val)
> > > > -
> > > > -  default:
> > > > -  sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
> > > > -  break;
> > > > -  };
> > > > -
> > > > -#undef switchcase_csr_write_64
> > > > -#undef switchcase_csr_write_32
> > > > -#undef switchcase_csr_write_16
> > > > -#undef switchcase_csr_write_8
> > > > -#undef switchcase_csr_write_4
> > > > -#undef switchcase_csr_write_2
> > > > -#undef switchcase_csr_write
> > > > +  volatile uint32_t opcode_buff[2];
> > > > +  typedef void (*write_f)(unsigned long val);
> > > > +  opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */
> > > > +  opcode_buff[1] = 0x00008067; /* ret */
> > > > +  RISCV_FENCE_I;
> > > 
> > > I agree we need FENCE.I in the path but this is
> > > like a big hammer in the execution path.
> > > 
> > > This is now much slower compared to the existing
> > > approach because every CSR access using this
> > > function will execute FENCE.I
> > I passed the qemu test, and it can be run normally with
> > or without fence.i. But I'm not sure if the exception
> > happens on other platform or we have a more lightweight
> > way.
> 
> There is not. Optimising this kind of bad practice code is not a goal
> of modern architectures. After all, it totally precludes having W^X
> protection in OpenSBI (e.g. via the PMP, or via CHERI), especially
> executable stacks form a key part of *the* classic buffer overflow
> shell code attack.
> 
> Jess

Currently opensbi does not have strict RWX permission control. We only 
need W and X permissions not to exist at the same time to avoid security
risks. If opensbi has RWX permission control, we only need to change the
permission of opcode_buff before and after running. The stack default is
RW permission, we can change the permission of opcode_buff to RX before
the call, and change the permission to RW after the call is completed.

I think these jobs can be put after opensbi has rwx permission control.

Regards,
Xiang W
> 
>
dramforever June 22, 2022, 7:12 a.m. UTC | #5
On 6/21/22 23:53, Xiang W wrote:
> 在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道:
>> On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote:
>>> 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道:
>>>> On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
>>>>> <patch contents snipped>
>>>> I agree we need FENCE.I in the path but this is
>>>> like a big hammer in the execution path.
>>>>
>>>> This is now much slower compared to the existing
>>>> approach because every CSR access using this
>>>> function will execute FENCE.I
>>> I passed the qemu test, and it can be run normally with
>>> or without fence.i. But I'm not sure if the exception
>>> happens on other platform or we have a more lightweight
>>> way.
>> There is not. Optimising this kind of bad practice code is not a goal
>> of modern architectures. After all, it totally precludes having W^X
>> protection in OpenSBI (e.g. via the PMP, or via CHERI), especially
>> executable stacks form a key part of *the* classic buffer overflow
>> shell code attack.
>>
>> Jess
> Currently opensbi does not have strict RWX permission control. We only 
> need W and X permissions not to exist at the same time to avoid security
> risks. If opensbi has RWX permission control, we only need to change the
> permission of opcode_buff before and after running. The stack default is
> RW permission, we can change the permission of opcode_buff to RX before
> the call, and change the permission to RW after the call is completed.
>
> I think these jobs can be put after opensbi has rwx permission control.
>
> Regards,
> Xiang W

But in that case, wouldn't this codegen approach be even slower? Is
there any particular reason to believe that this will be faster than the
current switch-case? Some benchmark on existing silicon would be nice.

The original implementation compiles down a jump table, with several
branches and bound checks at the beginning. It really is just a hunch,
but I find it difficult to be convinced how generating code could be faster.

Optimizing the current switch-case approach into a better jump table
seems to be the way to go here. Currently the compiler does not seem to
take advantage of the fact that every csr* instruction is exactly 4
bytes. There also seems to be a few extra instructions for dealing with
int instead of long, though I am not sure. Furthermore, having separate
functions for hpmcounter, pmpcfg, etc. might help with avoiding some
branches, and possibly also make code clearer at call site.

Regards,
dram
Xiang W June 22, 2022, 2:57 p.m. UTC | #6
在 2022-06-22星期三的 15:12 +0800,dramforever写道:
> 
> On 6/21/22 23:53, Xiang W wrote:
> > 在 2022-05-26星期四的 16:28 +0100,Jessica Clarke写道:
> > > On 26 May 2022, at 16:19, Xiang W <wxjstz@126.com> wrote:
> > > > 在 2022-05-26星期四的 20:35 +0530,Anup Patel写道:
> > > > > On Thu, May 26, 2022 at 8:12 PM Xiang W <wxjstz@126.com> wrote:
> > > > > > <patch contents snipped>
> > > > > I agree we need FENCE.I in the path but this is
> > > > > like a big hammer in the execution path.
> > > > > 
> > > > > This is now much slower compared to the existing
> > > > > approach because every CSR access using this
> > > > > function will execute FENCE.I
> > > > I passed the qemu test, and it can be run normally with
> > > > or without fence.i. But I'm not sure if the exception
> > > > happens on other platform or we have a more lightweight
> > > > way.
> > > There is not. Optimising this kind of bad practice code is not a goal
> > > of modern architectures. After all, it totally precludes having W^X
> > > protection in OpenSBI (e.g. via the PMP, or via CHERI), especially
> > > executable stacks form a key part of *the* classic buffer overflow
> > > shell code attack.
> > > 
> > > Jess
> > Currently opensbi does not have strict RWX permission control. We only 
> > need W and X permissions not to exist at the same time to avoid security
> > risks. If opensbi has RWX permission control, we only need to change the
> > permission of opcode_buff before and after running. The stack default is
> > RW permission, we can change the permission of opcode_buff to RX before
> > the call, and change the permission to RW after the call is completed.
> > 
> > I think these jobs can be put after opensbi has rwx permission control.
> > 
> > Regards,
> > Xiang W
> 
> But in that case, wouldn't this codegen approach be even slower? Is
> there any particular reason to believe that this will be faster than the
> current switch-case? Some benchmark on existing silicon would be nice.

Yes, it will definitely be slower. This will only reduce the maintenance
cost of csr_read_num/csr_write_num, without adding code to read a new csr.

Regards,
Xiang W
> 
> The original implementation compiles down a jump table, with several
> branches and bound checks at the beginning. It really is just a hunch,
> but I find it difficult to be convinced how generating code could be faster.
> 
> Optimizing the current switch-case approach into a better jump table
> seems to be the way to go here. Currently the compiler does not seem to
> take advantage of the fact that every csr* instruction is exactly 4
> bytes. There also seems to be a few extra instructions for dealing with
> int instead of long, though I am not sure. Furthermore, having separate
> functions for hpmcounter, pmpcfg, etc. might help with avoiding some
> branches, and possibly also make code clearer at call site.
> 
> Regards,
> dram
>
diff mbox series

Patch

diff --git a/lib/sbi/riscv_asm.c b/lib/sbi/riscv_asm.c
index a09cf78..452cb19 100644
--- a/lib/sbi/riscv_asm.c
+++ b/lib/sbi/riscv_asm.c
@@ -9,6 +9,7 @@ 
 
 #include <sbi/riscv_asm.h>
 #include <sbi/riscv_encoding.h>
+#include <sbi/riscv_barrier.h>
 #include <sbi/sbi_error.h>
 #include <sbi/sbi_platform.h>
 #include <sbi/sbi_console.h>
@@ -93,142 +94,22 @@  void misa_string(int xlen, char *out, unsigned int out_sz)
 
 unsigned long csr_read_num(int csr_num)
 {
-#define switchcase_csr_read(__csr_num, __val)		\
-	case __csr_num:					\
-		__val = csr_read(__csr_num);		\
-		break;
-#define switchcase_csr_read_2(__csr_num, __val)	\
-	switchcase_csr_read(__csr_num + 0, __val)	\
-	switchcase_csr_read(__csr_num + 1, __val)
-#define switchcase_csr_read_4(__csr_num, __val)	\
-	switchcase_csr_read_2(__csr_num + 0, __val)	\
-	switchcase_csr_read_2(__csr_num + 2, __val)
-#define switchcase_csr_read_8(__csr_num, __val)	\
-	switchcase_csr_read_4(__csr_num + 0, __val)	\
-	switchcase_csr_read_4(__csr_num + 4, __val)
-#define switchcase_csr_read_16(__csr_num, __val)	\
-	switchcase_csr_read_8(__csr_num + 0, __val)	\
-	switchcase_csr_read_8(__csr_num + 8, __val)
-#define switchcase_csr_read_32(__csr_num, __val)	\
-	switchcase_csr_read_16(__csr_num + 0, __val)	\
-	switchcase_csr_read_16(__csr_num + 16, __val)
-#define switchcase_csr_read_64(__csr_num, __val)	\
-	switchcase_csr_read_32(__csr_num + 0, __val)	\
-	switchcase_csr_read_32(__csr_num + 32, __val)
-
-	unsigned long ret = 0;
-
-	switch (csr_num) {
-	switchcase_csr_read_16(CSR_PMPCFG0, ret)
-	switchcase_csr_read_64(CSR_PMPADDR0, ret)
-	switchcase_csr_read(CSR_MCYCLE, ret)
-	switchcase_csr_read(CSR_MINSTRET, ret)
-	switchcase_csr_read(CSR_MHPMCOUNTER3, ret)
-	switchcase_csr_read_4(CSR_MHPMCOUNTER4, ret)
-	switchcase_csr_read_8(CSR_MHPMCOUNTER8, ret)
-	switchcase_csr_read_16(CSR_MHPMCOUNTER16, ret)
-	switchcase_csr_read(CSR_MCOUNTINHIBIT, ret)
-	switchcase_csr_read(CSR_MHPMEVENT3, ret)
-	switchcase_csr_read_4(CSR_MHPMEVENT4, ret)
-	switchcase_csr_read_8(CSR_MHPMEVENT8, ret)
-	switchcase_csr_read_16(CSR_MHPMEVENT16, ret)
-#if __riscv_xlen == 32
-	switchcase_csr_read(CSR_MCYCLEH, ret)
-	switchcase_csr_read(CSR_MINSTRETH, ret)
-	switchcase_csr_read(CSR_MHPMCOUNTER3H, ret)
-	switchcase_csr_read_4(CSR_MHPMCOUNTER4H, ret)
-	switchcase_csr_read_8(CSR_MHPMCOUNTER8H, ret)
-	switchcase_csr_read_16(CSR_MHPMCOUNTER16H, ret)
-	/**
-	 * The CSR range MHPMEVENT[3-16]H are available only if sscofpmf
-	 * extension is present. The caller must ensure that.
-	 */
-	switchcase_csr_read(CSR_MHPMEVENT3H, ret)
-	switchcase_csr_read_4(CSR_MHPMEVENT4H, ret)
-	switchcase_csr_read_8(CSR_MHPMEVENT8H, ret)
-	switchcase_csr_read_16(CSR_MHPMEVENT16H, ret)
-#endif
-
-	default:
-		sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
-		break;
-	};
-
-	return ret;
-
-#undef switchcase_csr_read_64
-#undef switchcase_csr_read_32
-#undef switchcase_csr_read_16
-#undef switchcase_csr_read_8
-#undef switchcase_csr_read_4
-#undef switchcase_csr_read_2
-#undef switchcase_csr_read
+	volatile uint32_t opcode_buff[2];
+	typedef unsigned long (*read_f)(void);
+	opcode_buff[0] = (csr_num << 20) | 0x00002573; /* csrr a0, csr */
+	opcode_buff[1] = 0x00008067; /* ret */
+	RISCV_FENCE_I;
+	return ((read_f)opcode_buff)();
 }
 
 void csr_write_num(int csr_num, unsigned long val)
 {
-#define switchcase_csr_write(__csr_num, __val)		\
-	case __csr_num:					\
-		csr_write(__csr_num, __val);		\
-		break;
-#define switchcase_csr_write_2(__csr_num, __val)	\
-	switchcase_csr_write(__csr_num + 0, __val)	\
-	switchcase_csr_write(__csr_num + 1, __val)
-#define switchcase_csr_write_4(__csr_num, __val)	\
-	switchcase_csr_write_2(__csr_num + 0, __val)	\
-	switchcase_csr_write_2(__csr_num + 2, __val)
-#define switchcase_csr_write_8(__csr_num, __val)	\
-	switchcase_csr_write_4(__csr_num + 0, __val)	\
-	switchcase_csr_write_4(__csr_num + 4, __val)
-#define switchcase_csr_write_16(__csr_num, __val)	\
-	switchcase_csr_write_8(__csr_num + 0, __val)	\
-	switchcase_csr_write_8(__csr_num + 8, __val)
-#define switchcase_csr_write_32(__csr_num, __val)	\
-	switchcase_csr_write_16(__csr_num + 0, __val)	\
-	switchcase_csr_write_16(__csr_num + 16, __val)
-#define switchcase_csr_write_64(__csr_num, __val)	\
-	switchcase_csr_write_32(__csr_num + 0, __val)	\
-	switchcase_csr_write_32(__csr_num + 32, __val)
-
-	switch (csr_num) {
-	switchcase_csr_write_16(CSR_PMPCFG0, val)
-	switchcase_csr_write_64(CSR_PMPADDR0, val)
-	switchcase_csr_write(CSR_MCYCLE, val)
-	switchcase_csr_write(CSR_MINSTRET, val)
-	switchcase_csr_write(CSR_MHPMCOUNTER3, val)
-	switchcase_csr_write_4(CSR_MHPMCOUNTER4, val)
-	switchcase_csr_write_8(CSR_MHPMCOUNTER8, val)
-	switchcase_csr_write_16(CSR_MHPMCOUNTER16, val)
-#if __riscv_xlen == 32
-	switchcase_csr_write(CSR_MCYCLEH, val)
-	switchcase_csr_write(CSR_MINSTRETH, val)
-	switchcase_csr_write(CSR_MHPMCOUNTER3H, val)
-	switchcase_csr_write_4(CSR_MHPMCOUNTER4H, val)
-	switchcase_csr_write_8(CSR_MHPMCOUNTER8H, val)
-	switchcase_csr_write_16(CSR_MHPMCOUNTER16H, val)
-	switchcase_csr_write(CSR_MHPMEVENT3H, val)
-	switchcase_csr_write_4(CSR_MHPMEVENT4H, val)
-	switchcase_csr_write_8(CSR_MHPMEVENT8H, val)
-	switchcase_csr_write_16(CSR_MHPMEVENT16H, val)
-#endif
-	switchcase_csr_write(CSR_MCOUNTINHIBIT, val)
-	switchcase_csr_write(CSR_MHPMEVENT3, val)
-	switchcase_csr_write_4(CSR_MHPMEVENT4, val)
-	switchcase_csr_write_8(CSR_MHPMEVENT8, val)
-	switchcase_csr_write_16(CSR_MHPMEVENT16, val)
-
-	default:
-		sbi_panic("%s: Unknown CSR %#x", __func__, csr_num);
-		break;
-	};
-
-#undef switchcase_csr_write_64
-#undef switchcase_csr_write_32
-#undef switchcase_csr_write_16
-#undef switchcase_csr_write_8
-#undef switchcase_csr_write_4
-#undef switchcase_csr_write_2
-#undef switchcase_csr_write
+	volatile uint32_t opcode_buff[2];
+	typedef void (*write_f)(unsigned long val);
+	opcode_buff[0] = (csr_num << 20) | 0x00051073; /* csrw csr, a0 */
+	opcode_buff[1] = 0x00008067; /* ret */
+	RISCV_FENCE_I;
+	((write_f)opcode_buff)(val);
 }
 
 static unsigned long ctz(unsigned long x)