diff mbox series

target/riscv: reduce overhead of MSTATUS_SUM change

Message ID 20230321063746.151107-1-fei2.wu@intel.com
State New
Headers show
Series target/riscv: reduce overhead of MSTATUS_SUM change | expand

Commit Message

Wu, Fei March 21, 2023, 6:37 a.m. UTC
From: Fei Wu <fei2.wu@intel.com>

Kernel needs to access user mode memory e.g. during syscalls, the window
is usually opened up for a very limited time through MSTATUS.SUM, the
overhead is too much if tlb_flush() gets called for every SUM change.
This patch saves addresses accessed when SUM=1, and flushs only these
pages when SUM changes to 0. If the buffer is not large enough to save
all the pages during SUM=1, it will fall back to tlb_flush when
necessary.

The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
most of the time kernel accesses 1 or 2 pages, it's very rare to see
more than 4 pages accessed.

It's not necessary to save/restore these new added status, as
tlb_flush() is always called after restore.

Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
other syscalls benefit a lot from this one too.

Signed-off-by: Fei Wu <fei2.wu@intel.com>
Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
---
 target/riscv/cpu.h        |  4 ++++
 target/riscv/cpu_helper.c |  7 +++++++
 target/riscv/csr.c        | 14 +++++++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)

Comments

Weiwei Li March 21, 2023, 8:28 a.m. UTC | #1
On 2023/3/21 14:37, fei2.wu@intel.com wrote:
> From: Fei Wu <fei2.wu@intel.com>
>
> Kernel needs to access user mode memory e.g. during syscalls, the window
> is usually opened up for a very limited time through MSTATUS.SUM, the
> overhead is too much if tlb_flush() gets called for every SUM change.
> This patch saves addresses accessed when SUM=1, and flushs only these
> pages when SUM changes to 0. If the buffer is not large enough to save
> all the pages during SUM=1, it will fall back to tlb_flush when
> necessary.
>
> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
> most of the time kernel accesses 1 or 2 pages, it's very rare to see
> more than 4 pages accessed.
>
> It's not necessary to save/restore these new added status, as
> tlb_flush() is always called after restore.
>
> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
> other syscalls benefit a lot from this one too.
>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> ---
>   target/riscv/cpu.h        |  4 ++++
>   target/riscv/cpu_helper.c |  7 +++++++
>   target/riscv/csr.c        | 14 +++++++++++++-
>   3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 638e47c75a..926dbce59f 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -383,6 +383,10 @@ struct CPUArchState {
>       uint64_t kvm_timer_compare;
>       uint64_t kvm_timer_state;
>       uint64_t kvm_timer_frequency;
> +
> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
> +    uint64_t sum_u_count;
> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>   };
>   
>   OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index f88c503cf4..5ad0418eb6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1068,6 +1068,13 @@ restart:
>                       (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>                   *prot |= PAGE_WRITE;
>               }
> +            if ((pte & PTE_U) && (mode & PRV_S) &&
> +                    get_field(env->mstatus, MSTATUS_SUM)) {
> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
> +                    env->sum_u_addr[env->sum_u_count] = addr;
> +                }
> +                ++env->sum_u_count;
> +            }
>               return TRANSLATE_SUCCESS;
>           }
>       }
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index ab566639e5..74b7638c8a 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1246,9 +1246,21 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
>   
>       /* flush tlb on mstatus fields that affect VM */
>       if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
> -            MSTATUS_MPRV | MSTATUS_SUM)) {
> +            MSTATUS_MPRV)) {
>           tlb_flush(env_cpu(env));
> +        env->sum_u_count = 0;
> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
> +            tlb_flush(env_cpu(env));
> +        } else {
> +            for (int i = 0; i < env->sum_u_count; ++i) {
> +                tlb_flush_page_by_mmuidx(env_cpu(env), env->sum_u_addr[i],
> +                                         1 << PRV_S | 1 << PRV_M);
> +            }
> +        }
> +        env->sum_u_count = 0;
>       }

Whether tlb should  be flushed when SUM is changed from 0 to 1?

Regards,

Weiwei Li

> +
>       mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>           MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>           MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Wu, Fei March 21, 2023, 8:40 a.m. UTC | #2
On 3/21/2023 4:28 PM, liweiwei wrote:
> 
> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>> From: Fei Wu <fei2.wu@intel.com>
>>
>> Kernel needs to access user mode memory e.g. during syscalls, the window
>> is usually opened up for a very limited time through MSTATUS.SUM, the
>> overhead is too much if tlb_flush() gets called for every SUM change.
>> This patch saves addresses accessed when SUM=1, and flushs only these
>> pages when SUM changes to 0. If the buffer is not large enough to save
>> all the pages during SUM=1, it will fall back to tlb_flush when
>> necessary.
>>
>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>> more than 4 pages accessed.
>>
>> It's not necessary to save/restore these new added status, as
>> tlb_flush() is always called after restore.
>>
>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>> other syscalls benefit a lot from this one too.
>>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>> ---
>>   target/riscv/cpu.h        |  4 ++++
>>   target/riscv/cpu_helper.c |  7 +++++++
>>   target/riscv/csr.c        | 14 +++++++++++++-
>>   3 files changed, 24 insertions(+), 1 deletion(-)
>>
>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>> index 638e47c75a..926dbce59f 100644
>> --- a/target/riscv/cpu.h
>> +++ b/target/riscv/cpu.h
>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>       uint64_t kvm_timer_compare;
>>       uint64_t kvm_timer_state;
>>       uint64_t kvm_timer_frequency;
>> +
>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>> +    uint64_t sum_u_count;
>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>   };
>>     OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index f88c503cf4..5ad0418eb6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -1068,6 +1068,13 @@ restart:
>>                       (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>>                   *prot |= PAGE_WRITE;
>>               }
>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>> +                }
>> +                ++env->sum_u_count;
>> +            }
>>               return TRANSLATE_SUCCESS;
>>           }
>>       }
>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>> index ab566639e5..74b7638c8a 100644
>> --- a/target/riscv/csr.c
>> +++ b/target/riscv/csr.c
>> @@ -1246,9 +1246,21 @@ static RISCVException
>> write_mstatus(CPURISCVState *env, int csrno,
>>         /* flush tlb on mstatus fields that affect VM */
>>       if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>> +            MSTATUS_MPRV)) {
>>           tlb_flush(env_cpu(env));
>> +        env->sum_u_count = 0;
>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>> +            tlb_flush(env_cpu(env));
>> +        } else {
>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>> env->sum_u_addr[i],
>> +                                         1 << PRV_S | 1 << PRV_M);
>> +            }
>> +        }
>> +        env->sum_u_count = 0;
>>       }
> 
> Whether tlb should  be flushed when SUM is changed from 0 to 1?
> 
When SUM is changed from 0 to 1, all the existing tlb entries remain
valid as the permission is elevated instead of reduced, so I don't think
it's necessary to flush tlb.

Thanks,
Fei.

> Regards,
> 
> Weiwei Li
> 
>> +
>>       mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>           MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>           MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
>
Weiwei Li March 21, 2023, 8:50 a.m. UTC | #3
On 2023/3/21 16:40, Wu, Fei wrote:
> On 3/21/2023 4:28 PM, liweiwei wrote:
>> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>>> From: Fei Wu <fei2.wu@intel.com>
>>>
>>> Kernel needs to access user mode memory e.g. during syscalls, the window
>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>> necessary.
>>>
>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>> more than 4 pages accessed.
>>>
>>> It's not necessary to save/restore these new added status, as
>>> tlb_flush() is always called after restore.
>>>
>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>> other syscalls benefit a lot from this one too.
>>>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>>> ---
>>>    target/riscv/cpu.h        |  4 ++++
>>>    target/riscv/cpu_helper.c |  7 +++++++
>>>    target/riscv/csr.c        | 14 +++++++++++++-
>>>    3 files changed, 24 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>> index 638e47c75a..926dbce59f 100644
>>> --- a/target/riscv/cpu.h
>>> +++ b/target/riscv/cpu.h
>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>        uint64_t kvm_timer_compare;
>>>        uint64_t kvm_timer_state;
>>>        uint64_t kvm_timer_frequency;
>>> +
>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>> +    uint64_t sum_u_count;
>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>    };
>>>      OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>> index f88c503cf4..5ad0418eb6 100644
>>> --- a/target/riscv/cpu_helper.c
>>> +++ b/target/riscv/cpu_helper.c
>>> @@ -1068,6 +1068,13 @@ restart:
>>>                        (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>>>                    *prot |= PAGE_WRITE;
>>>                }
>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>> +                }
>>> +                ++env->sum_u_count;
>>> +            }
>>>                return TRANSLATE_SUCCESS;
>>>            }
>>>        }
>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>> index ab566639e5..74b7638c8a 100644
>>> --- a/target/riscv/csr.c
>>> +++ b/target/riscv/csr.c
>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>> write_mstatus(CPURISCVState *env, int csrno,
>>>          /* flush tlb on mstatus fields that affect VM */
>>>        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>> +            MSTATUS_MPRV)) {
>>>            tlb_flush(env_cpu(env));
>>> +        env->sum_u_count = 0;
>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>> +            tlb_flush(env_cpu(env));
>>> +        } else {
>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>> env->sum_u_addr[i],
>>> +                                         1 << PRV_S | 1 << PRV_M);
>>> +            }
>>> +        }
>>> +        env->sum_u_count = 0;
>>>        }
>> Whether tlb should  be flushed when SUM is changed from 0 to 1?
>>
> When SUM is changed from 0 to 1, all the existing tlb entries remain
> valid as the permission is elevated instead of reduced, so I don't think
> it's necessary to flush tlb.

If elevated not unchanged, I think the tlb also needs update, since new 
permitted access rights may be added to the tlb.

Regards,

Weiwei Li

>
> Thanks,
> Fei.
>
>> Regards,
>>
>> Weiwei Li
>>
>>> +
>>>        mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>>            MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>            MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Wu, Fei March 21, 2023, 9:14 a.m. UTC | #4
On 3/21/2023 4:50 PM, liweiwei wrote:
> 
> On 2023/3/21 16:40, Wu, Fei wrote:
>> On 3/21/2023 4:28 PM, liweiwei wrote:
>>> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>>>> From: Fei Wu <fei2.wu@intel.com>
>>>>
>>>> Kernel needs to access user mode memory e.g. during syscalls, the
>>>> window
>>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>>> necessary.
>>>>
>>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>>> more than 4 pages accessed.
>>>>
>>>> It's not necessary to save/restore these new added status, as
>>>> tlb_flush() is always called after restore.
>>>>
>>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>>> other syscalls benefit a lot from this one too.
>>>>
>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>>>> ---
>>>>    target/riscv/cpu.h        |  4 ++++
>>>>    target/riscv/cpu_helper.c |  7 +++++++
>>>>    target/riscv/csr.c        | 14 +++++++++++++-
>>>>    3 files changed, 24 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>>> index 638e47c75a..926dbce59f 100644
>>>> --- a/target/riscv/cpu.h
>>>> +++ b/target/riscv/cpu.h
>>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>>        uint64_t kvm_timer_compare;
>>>>        uint64_t kvm_timer_state;
>>>>        uint64_t kvm_timer_frequency;
>>>> +
>>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>>> +    uint64_t sum_u_count;
>>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>>    };
>>>>      OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>>> index f88c503cf4..5ad0418eb6 100644
>>>> --- a/target/riscv/cpu_helper.c
>>>> +++ b/target/riscv/cpu_helper.c
>>>> @@ -1068,6 +1068,13 @@ restart:
>>>>                        (access_type == MMU_DATA_STORE || (pte &
>>>> PTE_D))) {
>>>>                    *prot |= PAGE_WRITE;
>>>>                }
>>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>>> +                }
>>>> +                ++env->sum_u_count;
>>>> +            }
>>>>                return TRANSLATE_SUCCESS;
>>>>            }
>>>>        }
>>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>>> index ab566639e5..74b7638c8a 100644
>>>> --- a/target/riscv/csr.c
>>>> +++ b/target/riscv/csr.c
>>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>>> write_mstatus(CPURISCVState *env, int csrno,
>>>>          /* flush tlb on mstatus fields that affect VM */
>>>>        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>>> +            MSTATUS_MPRV)) {
>>>>            tlb_flush(env_cpu(env));
>>>> +        env->sum_u_count = 0;
>>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>>> +            tlb_flush(env_cpu(env));
>>>> +        } else {
>>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>>> env->sum_u_addr[i],
>>>> +                                         1 << PRV_S | 1 << PRV_M);
>>>> +            }
>>>> +        }
>>>> +        env->sum_u_count = 0;
>>>>        }
>>> Whether tlb should  be flushed when SUM is changed from 0 to 1?
>>>
>> When SUM is changed from 0 to 1, all the existing tlb entries remain
>> valid as the permission is elevated instead of reduced, so I don't think
>> it's necessary to flush tlb.
> 
> If elevated not unchanged, I think the tlb also needs update, since new
> permitted access rights may be added to the tlb.
> 
Assume the following flow, if the new rights have been added to tlb
during SUM=0, they're visible and still valid after setting SUM=1 again.
Could you please add a specific counter example in this flow?


enable uaccess (set SUM = 1)
... (access user mem from S mode)
disable uaccess (set SUM = 0)

... (update TLB_SUM_0)

    <-- flush tlb or not right before enabling uaccess?
enable uaccess (set SUM = 1)
    <-- okay to access TLB_SUM_0?
disable uaccess (set SUM = 0)


Thanks,
Fei.

> Regards,
> 
> Weiwei Li
> 
>>
>> Thanks,
>> Fei.
>>
>>> Regards,
>>>
>>> Weiwei Li
>>>
>>>> +
>>>>        mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>>>            MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>>            MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
>
Weiwei Li March 21, 2023, 9:47 a.m. UTC | #5
On 2023/3/21 17:14, Wu, Fei wrote:
> On 3/21/2023 4:50 PM, liweiwei wrote:
>> On 2023/3/21 16:40, Wu, Fei wrote:
>>> On 3/21/2023 4:28 PM, liweiwei wrote:
>>>> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>>>>> From: Fei Wu <fei2.wu@intel.com>
>>>>>
>>>>> Kernel needs to access user mode memory e.g. during syscalls, the
>>>>> window
>>>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>>>> necessary.
>>>>>
>>>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>>>> more than 4 pages accessed.
>>>>>
>>>>> It's not necessary to save/restore these new added status, as
>>>>> tlb_flush() is always called after restore.
>>>>>
>>>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>>>> other syscalls benefit a lot from this one too.
>>>>>
>>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>>>>> ---
>>>>>     target/riscv/cpu.h        |  4 ++++
>>>>>     target/riscv/cpu_helper.c |  7 +++++++
>>>>>     target/riscv/csr.c        | 14 +++++++++++++-
>>>>>     3 files changed, 24 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>>>> index 638e47c75a..926dbce59f 100644
>>>>> --- a/target/riscv/cpu.h
>>>>> +++ b/target/riscv/cpu.h
>>>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>>>         uint64_t kvm_timer_compare;
>>>>>         uint64_t kvm_timer_state;
>>>>>         uint64_t kvm_timer_frequency;
>>>>> +
>>>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>>>> +    uint64_t sum_u_count;
>>>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>>>     };
>>>>>       OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>>>> index f88c503cf4..5ad0418eb6 100644
>>>>> --- a/target/riscv/cpu_helper.c
>>>>> +++ b/target/riscv/cpu_helper.c
>>>>> @@ -1068,6 +1068,13 @@ restart:
>>>>>                         (access_type == MMU_DATA_STORE || (pte &
>>>>> PTE_D))) {
>>>>>                     *prot |= PAGE_WRITE;
>>>>>                 }
>>>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>>>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>>>> +                }
>>>>> +                ++env->sum_u_count;
>>>>> +            }
>>>>>                 return TRANSLATE_SUCCESS;
>>>>>             }
>>>>>         }
>>>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>>>> index ab566639e5..74b7638c8a 100644
>>>>> --- a/target/riscv/csr.c
>>>>> +++ b/target/riscv/csr.c
>>>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>>>> write_mstatus(CPURISCVState *env, int csrno,
>>>>>           /* flush tlb on mstatus fields that affect VM */
>>>>>         if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>>>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>>>> +            MSTATUS_MPRV)) {
>>>>>             tlb_flush(env_cpu(env));
>>>>> +        env->sum_u_count = 0;
>>>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>> +            tlb_flush(env_cpu(env));
>>>>> +        } else {
>>>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>>>> env->sum_u_addr[i],
>>>>> +                                         1 << PRV_S | 1 << PRV_M);
>>>>> +            }
>>>>> +        }
>>>>> +        env->sum_u_count = 0;
>>>>>         }
>>>> Whether tlb should  be flushed when SUM is changed from 0 to 1?
>>>>
>>> When SUM is changed from 0 to 1, all the existing tlb entries remain
>>> valid as the permission is elevated instead of reduced, so I don't think
>>> it's necessary to flush tlb.
>> If elevated not unchanged, I think the tlb also needs update, since new
>> permitted access rights may be added to the tlb.
>>
> Assume the following flow, if the new rights have been added to tlb
> during SUM=0, they're visible and still valid after setting SUM=1 again.
> Could you please add a specific counter example in this flow?
>
Assuming addr0 cannot be access from S mode when SUM = 0, but can be 
accessed from S mode if SUM=1,

and there is a tlb entry for it when SUM = 0

> enable uaccess (set SUM = 1)
if we don't flush it when we change SUM to 1 in this step
> ... (access user mem from S mode)
when we access addr0 here, tlb will be hit( not updated) and the access 
will trigger fault instead of allowing the access
> disable uaccess (set SUM = 0)
>
> ... (update TLB_SUM_0)
>
>      <-- flush tlb or not right before enabling uaccess?
> enable uaccess (set SUM = 1)
>      <-- okay to access TLB_SUM_0?
> disable uaccess (set SUM = 0)

So, I think the question is whether the rights in TLB entry can be 
elevated. Or whether there is legal tlb entry for this addr0 when SUM = 0?

If not,  all my above assumption will not be right.

Regards,

Weiwei Li

>
> Thanks,
> Fei.
>



>> Regards,
>>
>> Weiwei Li
>>
>>> Thanks,
>>> Fei.
>>>
>>>> Regards,
>>>>
>>>> Weiwei Li
>>>>
>>>>> +
>>>>>         mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>>>>             MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>>>             MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Wu, Fei March 21, 2023, noon UTC | #6
On 3/21/2023 5:47 PM, liweiwei wrote:
> 
> On 2023/3/21 17:14, Wu, Fei wrote:
>> On 3/21/2023 4:50 PM, liweiwei wrote:
>>> On 2023/3/21 16:40, Wu, Fei wrote:
>>>> On 3/21/2023 4:28 PM, liweiwei wrote:
>>>>> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>>>>>> From: Fei Wu <fei2.wu@intel.com>
>>>>>>
>>>>>> Kernel needs to access user mode memory e.g. during syscalls, the
>>>>>> window
>>>>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>>>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>>>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>>>>> pages when SUM changes to 0. If the buffer is not large enough to
>>>>>> save
>>>>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>>>>> necessary.
>>>>>>
>>>>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>>>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>>>>> more than 4 pages accessed.
>>>>>>
>>>>>> It's not necessary to save/restore these new added status, as
>>>>>> tlb_flush() is always called after restore.
>>>>>>
>>>>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407.
>>>>>> Many
>>>>>> other syscalls benefit a lot from this one too.
>>>>>>
>>>>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>>>>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>>>>>> ---
>>>>>>     target/riscv/cpu.h        |  4 ++++
>>>>>>     target/riscv/cpu_helper.c |  7 +++++++
>>>>>>     target/riscv/csr.c        | 14 +++++++++++++-
>>>>>>     3 files changed, 24 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>>>>> index 638e47c75a..926dbce59f 100644
>>>>>> --- a/target/riscv/cpu.h
>>>>>> +++ b/target/riscv/cpu.h
>>>>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>>>>         uint64_t kvm_timer_compare;
>>>>>>         uint64_t kvm_timer_state;
>>>>>>         uint64_t kvm_timer_frequency;
>>>>>> +
>>>>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>>>>> +    uint64_t sum_u_count;
>>>>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>>>>     };
>>>>>>       OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>>>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>>>>> index f88c503cf4..5ad0418eb6 100644
>>>>>> --- a/target/riscv/cpu_helper.c
>>>>>> +++ b/target/riscv/cpu_helper.c
>>>>>> @@ -1068,6 +1068,13 @@ restart:
>>>>>>                         (access_type == MMU_DATA_STORE || (pte &
>>>>>> PTE_D))) {
>>>>>>                     *prot |= PAGE_WRITE;
>>>>>>                 }
>>>>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>>>>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>>>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>>>>> +                }
>>>>>> +                ++env->sum_u_count;
>>>>>> +            }
>>>>>>                 return TRANSLATE_SUCCESS;
>>>>>>             }
>>>>>>         }
>>>>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>>>>> index ab566639e5..74b7638c8a 100644
>>>>>> --- a/target/riscv/csr.c
>>>>>> +++ b/target/riscv/csr.c
>>>>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>>>>> write_mstatus(CPURISCVState *env, int csrno,
>>>>>>           /* flush tlb on mstatus fields that affect VM */
>>>>>>         if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP |
>>>>>> MSTATUS_MPV |
>>>>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>>>>> +            MSTATUS_MPRV)) {
>>>>>>             tlb_flush(env_cpu(env));
>>>>>> +        env->sum_u_count = 0;
>>>>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>>>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>>> +            tlb_flush(env_cpu(env));
>>>>>> +        } else {
>>>>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>>>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>>>>> env->sum_u_addr[i],
>>>>>> +                                         1 << PRV_S | 1 << PRV_M);
>>>>>> +            }
>>>>>> +        }
>>>>>> +        env->sum_u_count = 0;
>>>>>>         }
>>>>> Whether tlb should  be flushed when SUM is changed from 0 to 1?
>>>>>
>>>> When SUM is changed from 0 to 1, all the existing tlb entries remain
>>>> valid as the permission is elevated instead of reduced, so I don't
>>>> think
>>>> it's necessary to flush tlb.
>>> If elevated not unchanged, I think the tlb also needs update, since new
>>> permitted access rights may be added to the tlb.
>>>
>> Assume the following flow, if the new rights have been added to tlb
>> during SUM=0, they're visible and still valid after setting SUM=1 again.
>> Could you please add a specific counter example in this flow?
>>
> Assuming addr0 cannot be access from S mode when SUM = 0, but can be
> accessed from S mode if SUM=1,
> 
> and there is a tlb entry for it when SUM = 0
> 
>> enable uaccess (set SUM = 1)
> if we don't flush it when we change SUM to 1 in this step
>> ... (access user mem from S mode)
> when we access addr0 here, tlb will be hit( not updated) and the access
> will trigger fault instead of allowing the access
>> disable uaccess (set SUM = 0)
>>
>> ... (update TLB_SUM_0)
>>
>>      <-- flush tlb or not right before enabling uaccess?
>> enable uaccess (set SUM = 1)
>>      <-- okay to access TLB_SUM_0?
>> disable uaccess (set SUM = 0)
> 
> So, I think the question is whether the rights in TLB entry can be
> elevated. Or whether there is legal tlb entry for this addr0 when SUM = 0?
> 
I think there is no such tlb entry:
* If it's loaded into tlb when SUM = 0. This is impossible as it's not
accessible when SUM = 0, it's a fault instead of being loaded into tlb.
* If it's loaded into tlb when SUM = 1. It will be flushed when SUM sets
to 0.

Thanks,
Fei.

> If not,  all my above assumption will not be right.
> 
> Regards,
> 
> Weiwei Li
> 
>>
>> Thanks,
>> Fei.
>>
> 
> 
> 
>>> Regards,
>>>
>>> Weiwei Li
>>>
>>>> Thanks,
>>>> Fei.
>>>>
>>>>> Regards,
>>>>>
>>>>> Weiwei Li
>>>>>
>>>>>> +
>>>>>>         mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE |
>>>>>> MSTATUS_MPIE |
>>>>>>             MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>>>>             MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
>
Weiwei Li March 21, 2023, 12:46 p.m. UTC | #7
On 2023/3/21 20:00, Wu, Fei wrote:
> On 3/21/2023 5:47 PM, liweiwei wrote:
>> On 2023/3/21 17:14, Wu, Fei wrote:
>>> On 3/21/2023 4:50 PM, liweiwei wrote:
>>>> On 2023/3/21 16:40, Wu, Fei wrote:
>>>>> On 3/21/2023 4:28 PM, liweiwei wrote:
>>>>>> On 2023/3/21 14:37,fei2.wu@intel.com  wrote:
>>>>>>> From: Fei Wu<fei2.wu@intel.com>
>>>>>>>
>>>>>>> Kernel needs to access user mode memory e.g. during syscalls, the
>>>>>>> window
>>>>>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>>>>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>>>>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>>>>>> pages when SUM changes to 0. If the buffer is not large enough to
>>>>>>> save
>>>>>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>>>>>> necessary.
>>>>>>>
>>>>>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>>>>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>>>>>> more than 4 pages accessed.
>>>>>>>
>>>>>>> It's not necessary to save/restore these new added status, as
>>>>>>> tlb_flush() is always called after restore.
>>>>>>>
>>>>>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407.
>>>>>>> Many
>>>>>>> other syscalls benefit a lot from this one too.
>>>>>>>
>>>>>>> Signed-off-by: Fei Wu<fei2.wu@intel.com>
>>>>>>> Reviewed-by: LIU Zhiwei<zhiwei_liu@linux.alibaba.com>
>>>>>>> ---
>>>>>>>      target/riscv/cpu.h        |  4 ++++
>>>>>>>      target/riscv/cpu_helper.c |  7 +++++++
>>>>>>>      target/riscv/csr.c        | 14 +++++++++++++-
>>>>>>>      3 files changed, 24 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>>>>>> index 638e47c75a..926dbce59f 100644
>>>>>>> --- a/target/riscv/cpu.h
>>>>>>> +++ b/target/riscv/cpu.h
>>>>>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>>>>>          uint64_t kvm_timer_compare;
>>>>>>>          uint64_t kvm_timer_state;
>>>>>>>          uint64_t kvm_timer_frequency;
>>>>>>> +
>>>>>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>>>>>> +    uint64_t sum_u_count;
>>>>>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>>>>>      };
>>>>>>>        OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>>>>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>>>>>> index f88c503cf4..5ad0418eb6 100644
>>>>>>> --- a/target/riscv/cpu_helper.c
>>>>>>> +++ b/target/riscv/cpu_helper.c
>>>>>>> @@ -1068,6 +1068,13 @@ restart:
>>>>>>>                          (access_type == MMU_DATA_STORE || (pte &
>>>>>>> PTE_D))) {
>>>>>>>                      *prot |= PAGE_WRITE;
>>>>>>>                  }
>>>>>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
It's more readable to use "mode == PRV_S" instead of  "mode & PRV_S" here.
>>>>>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>>>>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>>>>>> +                }
>>>>>>> +                ++env->sum_u_count;
>>>>>>> +            }
>>>>>>>                  return TRANSLATE_SUCCESS;
>>>>>>>              }
>>>>>>>          }
>>>>>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>>>>>> index ab566639e5..74b7638c8a 100644
>>>>>>> --- a/target/riscv/csr.c
>>>>>>> +++ b/target/riscv/csr.c
>>>>>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>>>>>> write_mstatus(CPURISCVState *env, int csrno,
>>>>>>>            /* flush tlb on mstatus fields that affect VM */
>>>>>>>          if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP |
>>>>>>> MSTATUS_MPV |
>>>>>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>>>>>> +            MSTATUS_MPRV)) {
>>>>>>>              tlb_flush(env_cpu(env));
>>>>>>> +        env->sum_u_count = 0;
>>>>>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>>>>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>>>>>> +            tlb_flush(env_cpu(env));
>>>>>>> +        } else {
>>>>>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>>>>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>>>>>> env->sum_u_addr[i],
>>>>>>> +                                         1 << PRV_S | 1 << PRV_M);
>>>>>>> +            }
>>>>>>> +        }
>>>>>>> +        env->sum_u_count = 0;
>>>>>>>          }
>>>>>> Whether tlb should  be flushed when SUM is changed from 0 to 1?
>>>>>>
>>>>> When SUM is changed from 0 to 1, all the existing tlb entries remain
>>>>> valid as the permission is elevated instead of reduced, so I don't
>>>>> think
>>>>> it's necessary to flush tlb.
>>>> If elevated not unchanged, I think the tlb also needs update, since new
>>>> permitted access rights may be added to the tlb.
>>>>
>>> Assume the following flow, if the new rights have been added to tlb
>>> during SUM=0, they're visible and still valid after setting SUM=1 again.
>>> Could you please add a specific counter example in this flow?
>>>
>> Assuming addr0 cannot be access from S mode when SUM = 0, but can be
>> accessed from S mode if SUM=1,
>>
>> and there is a tlb entry for it when SUM = 0
>>
>>> enable uaccess (set SUM = 1)
>> if we don't flush it when we change SUM to 1 in this step
>>> ... (access user mem from S mode)
>> when we access addr0 here, tlb will be hit( not updated) and the access
>> will trigger fault instead of allowing the access
>>> disable uaccess (set SUM = 0)
>>>
>>> ... (update TLB_SUM_0)
>>>
>>>       <-- flush tlb or not right before enabling uaccess?
>>> enable uaccess (set SUM = 1)
>>>       <-- okay to access TLB_SUM_0?
>>> disable uaccess (set SUM = 0)
>> So, I think the question is whether the rights in TLB entry can be
>> elevated. Or whether there is legal tlb entry for this addr0 when SUM = 0?
>>
> I think there is no such tlb entry:
> * If it's loaded into tlb when SUM = 0. This is impossible as it's not
> accessible when SUM = 0, it's a fault instead of being loaded into tlb.
> * If it's loaded into tlb when SUM = 1. It will be flushed when SUM sets
> to 0.
>
> Thanks,
> Fei.
>
OK. It's acceptable to me.

Reviewed-by: Weiwei Li <liweiwei@iscas.ac.cn>

Weiwei Li

>> If not,  all my above assumption will not be right.
>>
>> Regards,
>>
>> Weiwei Li
>>
>>> Thanks,
>>> Fei.
>>>
>>
>>
>>>> Regards,
>>>>
>>>> Weiwei Li
>>>>
>>>>> Thanks,
>>>>> Fei.
>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Weiwei Li
>>>>>>
>>>>>>> +
>>>>>>>          mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE |
>>>>>>> MSTATUS_MPIE |
>>>>>>>              MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>>>>>              MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Weiwei Li March 21, 2023, 12:58 p.m. UTC | #8
On 2023/3/21 14:37, fei2.wu@intel.com wrote:
> From: Fei Wu <fei2.wu@intel.com>
>
> Kernel needs to access user mode memory e.g. during syscalls, the window
> is usually opened up for a very limited time through MSTATUS.SUM, the
> overhead is too much if tlb_flush() gets called for every SUM change.
> This patch saves addresses accessed when SUM=1, and flushs only these
> pages when SUM changes to 0. If the buffer is not large enough to save
> all the pages during SUM=1, it will fall back to tlb_flush when
> necessary.
>
> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
> most of the time kernel accesses 1 or 2 pages, it's very rare to see
> more than 4 pages accessed.
>
> It's not necessary to save/restore these new added status, as
> tlb_flush() is always called after restore.
>
> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
> other syscalls benefit a lot from this one too.
>
> Signed-off-by: Fei Wu <fei2.wu@intel.com>
> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
> ---
>   target/riscv/cpu.h        |  4 ++++
>   target/riscv/cpu_helper.c |  7 +++++++
>   target/riscv/csr.c        | 14 +++++++++++++-
>   3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 638e47c75a..926dbce59f 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -383,6 +383,10 @@ struct CPUArchState {
>       uint64_t kvm_timer_compare;
>       uint64_t kvm_timer_state;
>       uint64_t kvm_timer_frequency;
> +
> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
> +    uint64_t sum_u_count;
> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>   };
>   
>   OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index f88c503cf4..5ad0418eb6 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1068,6 +1068,13 @@ restart:
>                       (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>                   *prot |= PAGE_WRITE;
>               }
> +            if ((pte & PTE_U) && (mode & PRV_S) &&
> +                    get_field(env->mstatus, MSTATUS_SUM)) {
> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
> +                    env->sum_u_addr[env->sum_u_count] = addr;
> +                }
> +                ++env->sum_u_count;
> +            }
>               return TRANSLATE_SUCCESS;
>           }
>       }
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index ab566639e5..74b7638c8a 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1246,9 +1246,21 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno,
>   
>       /* flush tlb on mstatus fields that affect VM */
>       if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
> -            MSTATUS_MPRV | MSTATUS_SUM)) {
> +            MSTATUS_MPRV)) {
>           tlb_flush(env_cpu(env));
> +        env->sum_u_count = 0;
> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
> +            tlb_flush(env_cpu(env));

SUM seems only affect S mode TLB. Maybe we can only flush S mode TLB here.

> +        } else {
> +            for (int i = 0; i < env->sum_u_count; ++i) {
> +                tlb_flush_page_by_mmuidx(env_cpu(env), env->sum_u_addr[i],
> +                                         1 << PRV_S | 1 << PRV_M);

Similar case here.

Regards,

Weiwei Li

> +            }
> +        }
> +        env->sum_u_count = 0;
>       }
> +
>       mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>           MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>           MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Wu, Fei March 21, 2023, 1:22 p.m. UTC | #9
On 3/21/2023 8:58 PM, liweiwei wrote:
> 
> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>> From: Fei Wu <fei2.wu@intel.com>
>>
>> Kernel needs to access user mode memory e.g. during syscalls, the window
>> is usually opened up for a very limited time through MSTATUS.SUM, the
>> overhead is too much if tlb_flush() gets called for every SUM change.
>> This patch saves addresses accessed when SUM=1, and flushs only these
>> pages when SUM changes to 0. If the buffer is not large enough to save
>> all the pages during SUM=1, it will fall back to tlb_flush when
>> necessary.
>>
>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>> more than 4 pages accessed.
>>
>> It's not necessary to save/restore these new added status, as
>> tlb_flush() is always called after restore.
>>
>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>> other syscalls benefit a lot from this one too.
>>
>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>> ---
>>   target/riscv/cpu.h        |  4 ++++
>>   target/riscv/cpu_helper.c |  7 +++++++
>>   target/riscv/csr.c        | 14 +++++++++++++-
>>   3 files changed, 24 insertions(+), 1 deletion(-)
>>
>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>> index 638e47c75a..926dbce59f 100644
>> --- a/target/riscv/cpu.h
>> +++ b/target/riscv/cpu.h
>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>       uint64_t kvm_timer_compare;
>>       uint64_t kvm_timer_state;
>>       uint64_t kvm_timer_frequency;
>> +
>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>> +    uint64_t sum_u_count;
>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>   };
>>     OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>> index f88c503cf4..5ad0418eb6 100644
>> --- a/target/riscv/cpu_helper.c
>> +++ b/target/riscv/cpu_helper.c
>> @@ -1068,6 +1068,13 @@ restart:
>>                       (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>>                   *prot |= PAGE_WRITE;
>>               }
>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>> +                }
>> +                ++env->sum_u_count;
>> +            }
>>               return TRANSLATE_SUCCESS;
>>           }
>>       }
>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>> index ab566639e5..74b7638c8a 100644
>> --- a/target/riscv/csr.c
>> +++ b/target/riscv/csr.c
>> @@ -1246,9 +1246,21 @@ static RISCVException
>> write_mstatus(CPURISCVState *env, int csrno,
>>         /* flush tlb on mstatus fields that affect VM */
>>       if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>> +            MSTATUS_MPRV)) {
>>           tlb_flush(env_cpu(env));
>> +        env->sum_u_count = 0;
>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>> +            tlb_flush(env_cpu(env));
> 
> SUM seems only affect S mode TLB. Maybe we can only flush S mode TLB here.
> 
It's also in effect when MPRV=1 and MPP=S in M mode, we can only flush
the tlb of PRV_S and PRV_M.

Thanks,
Fei.

>> +        } else {
>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>> env->sum_u_addr[i],
>> +                                         1 << PRV_S | 1 << PRV_M);
> 
> Similar case here.
> 
> Regards,
> 
> Weiwei Li
> 
>> +            }
>> +        }
>> +        env->sum_u_count = 0;
>>       }
>> +
>>       mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>           MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>           MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
>
Weiwei Li March 21, 2023, 1:27 p.m. UTC | #10
On 2023/3/21 21:22, Wu, Fei wrote:
> On 3/21/2023 8:58 PM, liweiwei wrote:
>> On 2023/3/21 14:37, fei2.wu@intel.com wrote:
>>> From: Fei Wu <fei2.wu@intel.com>
>>>
>>> Kernel needs to access user mode memory e.g. during syscalls, the window
>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>> necessary.
>>>
>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>> more than 4 pages accessed.
>>>
>>> It's not necessary to save/restore these new added status, as
>>> tlb_flush() is always called after restore.
>>>
>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>> other syscalls benefit a lot from this one too.
>>>
>>> Signed-off-by: Fei Wu <fei2.wu@intel.com>
>>> Reviewed-by: LIU Zhiwei <zhiwei_liu@linux.alibaba.com>
>>> ---
>>>    target/riscv/cpu.h        |  4 ++++
>>>    target/riscv/cpu_helper.c |  7 +++++++
>>>    target/riscv/csr.c        | 14 +++++++++++++-
>>>    3 files changed, 24 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
>>> index 638e47c75a..926dbce59f 100644
>>> --- a/target/riscv/cpu.h
>>> +++ b/target/riscv/cpu.h
>>> @@ -383,6 +383,10 @@ struct CPUArchState {
>>>        uint64_t kvm_timer_compare;
>>>        uint64_t kvm_timer_state;
>>>        uint64_t kvm_timer_frequency;
>>> +
>>> +#define MAX_CACHED_SUM_U_ADDR_NUM 4
>>> +    uint64_t sum_u_count;
>>> +    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
>>>    };
>>>      OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
>>> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
>>> index f88c503cf4..5ad0418eb6 100644
>>> --- a/target/riscv/cpu_helper.c
>>> +++ b/target/riscv/cpu_helper.c
>>> @@ -1068,6 +1068,13 @@ restart:
>>>                        (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
>>>                    *prot |= PAGE_WRITE;
>>>                }
>>> +            if ((pte & PTE_U) && (mode & PRV_S) &&
>>> +                    get_field(env->mstatus, MSTATUS_SUM)) {
>>> +                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
>>> +                    env->sum_u_addr[env->sum_u_count] = addr;
>>> +                }
>>> +                ++env->sum_u_count;
>>> +            }
>>>                return TRANSLATE_SUCCESS;
>>>            }
>>>        }
>>> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
>>> index ab566639e5..74b7638c8a 100644
>>> --- a/target/riscv/csr.c
>>> +++ b/target/riscv/csr.c
>>> @@ -1246,9 +1246,21 @@ static RISCVException
>>> write_mstatus(CPURISCVState *env, int csrno,
>>>          /* flush tlb on mstatus fields that affect VM */
>>>        if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>>> -            MSTATUS_MPRV | MSTATUS_SUM)) {
>>> +            MSTATUS_MPRV)) {
>>>            tlb_flush(env_cpu(env));
>>> +        env->sum_u_count = 0;
>>> +    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
>>> +        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
>>> +            tlb_flush(env_cpu(env));
>> SUM seems only affect S mode TLB. Maybe we can only flush S mode TLB here.
>>
> It's also in effect when MPRV=1 and MPP=S in M mode, we can only flush
> the tlb of PRV_S and PRV_M.

OK. Good point.

Regards,

Weiwei Li

>
> Thanks,
> Fei.
>
>>> +        } else {
>>> +            for (int i = 0; i < env->sum_u_count; ++i) {
>>> +                tlb_flush_page_by_mmuidx(env_cpu(env),
>>> env->sum_u_addr[i],
>>> +                                         1 << PRV_S | 1 << PRV_M);
>> Similar case here.
>>
>> Regards,
>>
>> Weiwei Li
>>
>>> +            }
>>> +        }
>>> +        env->sum_u_count = 0;
>>>        }
>>> +
>>>        mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
>>>            MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
>>>            MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |
Richard Henderson March 21, 2023, 4:10 p.m. UTC | #11
On 3/20/23 23:37, fei2.wu@intel.com wrote:
> From: Fei Wu <fei2.wu@intel.com>
> 
> Kernel needs to access user mode memory e.g. during syscalls, the window
> is usually opened up for a very limited time through MSTATUS.SUM, the
> overhead is too much if tlb_flush() gets called for every SUM change.
> This patch saves addresses accessed when SUM=1, and flushs only these
> pages when SUM changes to 0. If the buffer is not large enough to save
> all the pages during SUM=1, it will fall back to tlb_flush when
> necessary.
> 
> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
> most of the time kernel accesses 1 or 2 pages, it's very rare to see
> more than 4 pages accessed.
> 
> It's not necessary to save/restore these new added status, as
> tlb_flush() is always called after restore.
> 
> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
> other syscalls benefit a lot from this one too.

This is not the correct approach.

You should be making use of different softmmu indexes, similar to how ARM uses a separate 
index for PAN (privileged access never) mode.  If I read the manual properly, PAN == !SUM.

When you do this, you need no additional flushing.


r~
LIU Zhiwei March 22, 2023, 1:58 a.m. UTC | #12
On 2023/3/22 0:10, Richard Henderson wrote:
> On 3/20/23 23:37, fei2.wu@intel.com wrote:
>> From: Fei Wu <fei2.wu@intel.com>
>>
>> Kernel needs to access user mode memory e.g. during syscalls, the window
>> is usually opened up for a very limited time through MSTATUS.SUM, the
>> overhead is too much if tlb_flush() gets called for every SUM change.
>> This patch saves addresses accessed when SUM=1, and flushs only these
>> pages when SUM changes to 0. If the buffer is not large enough to save
>> all the pages during SUM=1, it will fall back to tlb_flush when
>> necessary.
>>
>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>> more than 4 pages accessed.
>>
>> It's not necessary to save/restore these new added status, as
>> tlb_flush() is always called after restore.
>>
>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>> other syscalls benefit a lot from this one too.
>
> This is not the correct approach.
>
> You should be making use of different softmmu indexes, similar to how 
> ARM uses a separate index for PAN (privileged access never) mode.  If 
> I read the manual properly, PAN == !SUM.
>
> When you do this, you need no additional flushing.

Hi Fei,

Let's follow Richard's advice.

Zhiwei

>
>
> r~
Wu, Fei March 22, 2023, 2:47 a.m. UTC | #13
On 3/22/2023 9:58 AM, LIU Zhiwei wrote:
> 
> On 2023/3/22 0:10, Richard Henderson wrote:
>> On 3/20/23 23:37, fei2.wu@intel.com wrote:
>>> From: Fei Wu <fei2.wu@intel.com>
>>>
>>> Kernel needs to access user mode memory e.g. during syscalls, the window
>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>> necessary.
>>>
>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>> more than 4 pages accessed.
>>>
>>> It's not necessary to save/restore these new added status, as
>>> tlb_flush() is always called after restore.
>>>
>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>> other syscalls benefit a lot from this one too.
>>
>> This is not the correct approach.
>>
>> You should be making use of different softmmu indexes, similar to how
>> ARM uses a separate index for PAN (privileged access never) mode.  If
>> I read the manual properly, PAN == !SUM.
>>
>> When you do this, you need no additional flushing.
> 
> Hi Fei,
> 
> Let's follow Richard's advice.
>Yes, I'm thinking about how to do it, and thank Richard for the advice.

My question is:
* If we ensure this separate index (S+SUM) has no overlapping tlb
entries with S-mode (ignore M-mode so far), during SUM=1, we have to
look into both (S+SUM) and S index for kernel address translation, that
should be not desired.

* If all the tlb operations are against (S+SUM) during SUM=1, then
(S+SUM) could contain some duplicated tlb entries of kernel address in S
index, the duplication means extra tlb lookup and fill. Also if we want
to flush tlb entry of specific addr0, we have to flush both index.

I will take a look at how arm handles this.

Thanks,
Fei.

> Zhiwei
> 
>>
>>
>> r~
LIU Zhiwei March 22, 2023, 3:16 a.m. UTC | #14
On 2023/3/22 10:47, Wu, Fei wrote:
> On 3/22/2023 9:58 AM, LIU Zhiwei wrote:
>> On 2023/3/22 0:10, Richard Henderson wrote:
>>> On 3/20/23 23:37, fei2.wu@intel.com wrote:
>>>> From: Fei Wu <fei2.wu@intel.com>
>>>>
>>>> Kernel needs to access user mode memory e.g. during syscalls, the window
>>>> is usually opened up for a very limited time through MSTATUS.SUM, the
>>>> overhead is too much if tlb_flush() gets called for every SUM change.
>>>> This patch saves addresses accessed when SUM=1, and flushs only these
>>>> pages when SUM changes to 0. If the buffer is not large enough to save
>>>> all the pages during SUM=1, it will fall back to tlb_flush when
>>>> necessary.
>>>>
>>>> The buffer size is set to 4 since in this MSTATUS.SUM open-up window,
>>>> most of the time kernel accesses 1 or 2 pages, it's very rare to see
>>>> more than 4 pages accessed.
>>>>
>>>> It's not necessary to save/restore these new added status, as
>>>> tlb_flush() is always called after restore.
>>>>
>>>> Result of 'pipe 10' from unixbench boosts from 223656 to 1327407. Many
>>>> other syscalls benefit a lot from this one too.
>>> This is not the correct approach.
>>>
>>> You should be making use of different softmmu indexes, similar to how
>>> ARM uses a separate index for PAN (privileged access never) mode.  If
>>> I read the manual properly, PAN == !SUM.
>>>
>>> When you do this, you need no additional flushing.
>> Hi Fei,
>>
>> Let's follow Richard's advice.
>> Yes, I'm thinking about how to do it, and thank Richard for the advice.
> My question is:
> * If we ensure this separate index (S+SUM) has no overlapping tlb
> entries with S-mode (ignore M-mode so far), during SUM=1,
Yes, every mmu index will have their own cache.
> we have to
> look into both (S+SUM) and S index for kernel address translation, that
> should be not desired.
No, we  have to choose one, because each access will be constrained with 
a mmu idex.
>
> * If all the tlb operations are against (S+SUM) during SUM=1, then
> (S+SUM) could contain some duplicated tlb entries of kernel address in S
> index, the duplication means extra tlb lookup and fill. Also if we want
> to flush tlb entry of specific addr0, we have to flush both index.

This is not the case.

Zhiwei

>
> I will take a look at how arm handles this.
>
> Thanks,
> Fei.
>
>> Zhiwei
>>
>>>
>>> r~
Richard Henderson March 22, 2023, 3:31 a.m. UTC | #15
On 3/21/23 19:47, Wu, Fei wrote:
>>> You should be making use of different softmmu indexes, similar to how
>>> ARM uses a separate index for PAN (privileged access never) mode.  If
>>> I read the manual properly, PAN == !SUM.
>>>
>>> When you do this, you need no additional flushing.
>>
>> Hi Fei,
>>
>> Let's follow Richard's advice.
>> Yes, I'm thinking about how to do it, and thank Richard for the advice.
> 
> My question is:
> * If we ensure this separate index (S+SUM) has no overlapping tlb
> entries with S-mode (ignore M-mode so far), during SUM=1, we have to
> look into both (S+SUM) and S index for kernel address translation, that
> should be not desired.

This is an incorrect assumption.  S+SUM may very well have overlapping tlb entries with S.
With SUM=1, you *only* look in S+SUM index; with SUM=0, you *only* look in S index.

The only difference is a check in get_physical_address is no longer against MSTATUS_SUM 
directly, but against the mmu_index.

> * If all the tlb operations are against (S+SUM) during SUM=1, then
> (S+SUM) could contain some duplicated tlb entries of kernel address in S
> index, the duplication means extra tlb lookup and fill.

Yes, if the same address is probed via S and S+SUM, there is a duplicated lookup.  But 
this is harmless.


> Also if we want
> to flush tlb entry of specific addr0, we have to flush both index.

Yes, this is also true.  But so far target/riscv is making no use of per-mmuidx flushing. 
At the moment you're *only* using tlb_flush(cpu), which flushes every mmuidx.  Nor are you 
making use of per-page flushing.

So, really, no change required at all there.


r~
Wu, Fei March 22, 2023, 3:36 a.m. UTC | #16
On 3/22/2023 11:31 AM, Richard Henderson wrote:
> On 3/21/23 19:47, Wu, Fei wrote:
>>>> You should be making use of different softmmu indexes, similar to how
>>>> ARM uses a separate index for PAN (privileged access never) mode.  If
>>>> I read the manual properly, PAN == !SUM.
>>>>
>>>> When you do this, you need no additional flushing.
>>>
>>> Hi Fei,
>>>
>>> Let's follow Richard's advice.
>>> Yes, I'm thinking about how to do it, and thank Richard for the advice.
>>
>> My question is:
>> * If we ensure this separate index (S+SUM) has no overlapping tlb
>> entries with S-mode (ignore M-mode so far), during SUM=1, we have to
>> look into both (S+SUM) and S index for kernel address translation, that
>> should be not desired.
> 
> This is an incorrect assumption.  S+SUM may very well have overlapping
> tlb entries with S.
> With SUM=1, you *only* look in S+SUM index; with SUM=0, you *only* look
> in S index.
> 
> The only difference is a check in get_physical_address is no longer
> against MSTATUS_SUM directly, but against the mmu_index.
> 
>> * If all the tlb operations are against (S+SUM) during SUM=1, then
>> (S+SUM) could contain some duplicated tlb entries of kernel address in S
>> index, the duplication means extra tlb lookup and fill.
> 
> Yes, if the same address is probed via S and S+SUM, there is a
> duplicated lookup.  But this is harmless.
> 
> 
>> Also if we want
>> to flush tlb entry of specific addr0, we have to flush both index.
> 
> Yes, this is also true.  But so far target/riscv is making no use of
> per-mmuidx flushing. At the moment you're *only* using tlb_flush(cpu),
> which flushes every mmuidx.  Nor are you making use of per-page flushing.
> 
> So, really, no change required at all there.
> 
Got it, let me try this method.

Thanks,
Fei.

> 
> r~
Wu, Fei March 22, 2023, 6:40 a.m. UTC | #17
On 3/22/2023 11:36 AM, Wu, Fei wrote:
> On 3/22/2023 11:31 AM, Richard Henderson wrote:
>> On 3/21/23 19:47, Wu, Fei wrote:
>>>>> You should be making use of different softmmu indexes, similar to how
>>>>> ARM uses a separate index for PAN (privileged access never) mode.  If
>>>>> I read the manual properly, PAN == !SUM.
>>>>>
>>>>> When you do this, you need no additional flushing.
>>>>
>>>> Hi Fei,
>>>>
>>>> Let's follow Richard's advice.
>>>> Yes, I'm thinking about how to do it, and thank Richard for the advice.
>>>
>>> My question is:
>>> * If we ensure this separate index (S+SUM) has no overlapping tlb
>>> entries with S-mode (ignore M-mode so far), during SUM=1, we have to
>>> look into both (S+SUM) and S index for kernel address translation, that
>>> should be not desired.
>>
>> This is an incorrect assumption.  S+SUM may very well have overlapping
>> tlb entries with S.
>> With SUM=1, you *only* look in S+SUM index; with SUM=0, you *only* look
>> in S index.
>>
>> The only difference is a check in get_physical_address is no longer
>> against MSTATUS_SUM directly, but against the mmu_index.
>>
>>> * If all the tlb operations are against (S+SUM) during SUM=1, then
>>> (S+SUM) could contain some duplicated tlb entries of kernel address in S
>>> index, the duplication means extra tlb lookup and fill.
>>
>> Yes, if the same address is probed via S and S+SUM, there is a
>> duplicated lookup.  But this is harmless.
>>
>>
>>> Also if we want
>>> to flush tlb entry of specific addr0, we have to flush both index.
>>
>> Yes, this is also true.  But so far target/riscv is making no use of
>> per-mmuidx flushing. At the moment you're *only* using tlb_flush(cpu),
>> which flushes every mmuidx.  Nor are you making use of per-page flushing.
>>
>> So, really, no change required at all there.
>>
> Got it, let me try this method.
> 
There seems no room in flags for this extra index, all 3 bits for
mem_idx have been used in target/riscv/cpu.h. We need some trick.

#define TB_FLAGS_PRIV_MMU_MASK                3
#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)

FIELD(TB_FLAGS, MEM_IDX, 0, 3)
FIELD(TB_FLAGS, LMUL, 3, 3)

Thanks,
Fei.

> Thanks,
> Fei.
> 
>>
>> r~
>
LIU Zhiwei March 22, 2023, 6:50 a.m. UTC | #18
On 2023/3/22 14:40, Wu, Fei wrote:
> On 3/22/2023 11:36 AM, Wu, Fei wrote:
>> On 3/22/2023 11:31 AM, Richard Henderson wrote:
>>> On 3/21/23 19:47, Wu, Fei wrote:
>>>>>> You should be making use of different softmmu indexes, similar to how
>>>>>> ARM uses a separate index for PAN (privileged access never) mode.  If
>>>>>> I read the manual properly, PAN == !SUM.
>>>>>>
>>>>>> When you do this, you need no additional flushing.
>>>>> Hi Fei,
>>>>>
>>>>> Let's follow Richard's advice.
>>>>> Yes, I'm thinking about how to do it, and thank Richard for the advice.
>>>> My question is:
>>>> * If we ensure this separate index (S+SUM) has no overlapping tlb
>>>> entries with S-mode (ignore M-mode so far), during SUM=1, we have to
>>>> look into both (S+SUM) and S index for kernel address translation, that
>>>> should be not desired.
>>> This is an incorrect assumption.  S+SUM may very well have overlapping
>>> tlb entries with S.
>>> With SUM=1, you *only* look in S+SUM index; with SUM=0, you *only* look
>>> in S index.
>>>
>>> The only difference is a check in get_physical_address is no longer
>>> against MSTATUS_SUM directly, but against the mmu_index.
>>>
>>>> * If all the tlb operations are against (S+SUM) during SUM=1, then
>>>> (S+SUM) could contain some duplicated tlb entries of kernel address in S
>>>> index, the duplication means extra tlb lookup and fill.
>>> Yes, if the same address is probed via S and S+SUM, there is a
>>> duplicated lookup.  But this is harmless.
>>>
>>>
>>>> Also if we want
>>>> to flush tlb entry of specific addr0, we have to flush both index.
>>> Yes, this is also true.  But so far target/riscv is making no use of
>>> per-mmuidx flushing. At the moment you're *only* using tlb_flush(cpu),
>>> which flushes every mmuidx.  Nor are you making use of per-page flushing.
>>>
>>> So, really, no change required at all there.
>>>
>> Got it, let me try this method.
>>
> There seems no room in flags for this extra index, all 3 bits for
> mem_idx have been used in target/riscv/cpu.h. We need some trick.
>
> #define TB_FLAGS_PRIV_MMU_MASK                3
> #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)

#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 3)

Renumber the new mmu index to 5 (Probably by extending the function riscv_cpu_mmu_index)

Zhiwei

> FIELD(TB_FLAGS, MEM_IDX, 0, 3)
> FIELD(TB_FLAGS, LMUL, 3, 3)
>
> Thanks,
> Fei.
>
>> Thanks,
>> Fei.
>>
>>> r~
diff mbox series

Patch

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 638e47c75a..926dbce59f 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -383,6 +383,10 @@  struct CPUArchState {
     uint64_t kvm_timer_compare;
     uint64_t kvm_timer_state;
     uint64_t kvm_timer_frequency;
+
+#define MAX_CACHED_SUM_U_ADDR_NUM 4
+    uint64_t sum_u_count;
+    uint64_t sum_u_addr[MAX_CACHED_SUM_U_ADDR_NUM];
 };
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index f88c503cf4..5ad0418eb6 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -1068,6 +1068,13 @@  restart:
                     (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
                 *prot |= PAGE_WRITE;
             }
+            if ((pte & PTE_U) && (mode & PRV_S) &&
+                    get_field(env->mstatus, MSTATUS_SUM)) {
+                if (env->sum_u_count < MAX_CACHED_SUM_U_ADDR_NUM) {
+                    env->sum_u_addr[env->sum_u_count] = addr;
+                }
+                ++env->sum_u_count;
+            }
             return TRANSLATE_SUCCESS;
         }
     }
diff --git a/target/riscv/csr.c b/target/riscv/csr.c
index ab566639e5..74b7638c8a 100644
--- a/target/riscv/csr.c
+++ b/target/riscv/csr.c
@@ -1246,9 +1246,21 @@  static RISCVException write_mstatus(CPURISCVState *env, int csrno,
 
     /* flush tlb on mstatus fields that affect VM */
     if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
-            MSTATUS_MPRV | MSTATUS_SUM)) {
+            MSTATUS_MPRV)) {
         tlb_flush(env_cpu(env));
+        env->sum_u_count = 0;
+    } else if ((mstatus & MSTATUS_SUM) && !(val & MSTATUS_SUM)) {
+        if (env->sum_u_count > MAX_CACHED_SUM_U_ADDR_NUM) {
+            tlb_flush(env_cpu(env));
+        } else {
+            for (int i = 0; i < env->sum_u_count; ++i) {
+                tlb_flush_page_by_mmuidx(env_cpu(env), env->sum_u_addr[i],
+                                         1 << PRV_S | 1 << PRV_M);
+            }
+        }
+        env->sum_u_count = 0;
     }
+
     mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
         MSTATUS_SPP | MSTATUS_MPRV | MSTATUS_SUM |
         MSTATUS_MPP | MSTATUS_MXR | MSTATUS_TVM | MSTATUS_TSR |