diff mbox

[1/2] KVM: PPC: e6500: Handle LRAT error exception

Message ID 5603F684.9020501@freescale.com
State Changes Requested
Headers show

Commit Message

Tudor Laurentiu Sept. 24, 2015, 1:11 p.m. UTC
Handle LRAT error exception with support for
lrat mapping and invalidation.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[Laurentiu.Tudor@freescale.com: addressed review feedback,
 refactoring, cleanup & other fixes]
Signed-off-by: Laurentiu Tudor <Laurentiu.Tudor@freescale.com>
---
 arch/powerpc/include/asm/kvm_host.h   |   1 +
 arch/powerpc/include/asm/kvm_ppc.h    |   2 +
 arch/powerpc/include/asm/mmu-book3e.h |  12 ++++
 arch/powerpc/include/asm/reg_booke.h  |  14 +++++
 arch/powerpc/kernel/asm-offsets.c     |   1 +
 arch/powerpc/kvm/booke.c              |  41 +++++++++++++
 arch/powerpc/kvm/bookehv_interrupts.S |   9 ++-
 arch/powerpc/kvm/e500_mmu_host.c      | 106 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/e500mc.c             |   4 ++
 arch/powerpc/mm/fsl_booke_mmu.c       |  10 ++++
 10 files changed, 199 insertions(+), 1 deletion(-)

Comments

Scott Wood Sept. 25, 2015, 12:10 a.m. UTC | #1
On Thu, 2015-09-24 at 16:11 +0300, Laurentiu Tudor wrote:
> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S 
> b/arch/powerpc/kvm/bookehv_interrupts.S
> index 81bd8a07..1e9fa2a 100644
> --- a/arch/powerpc/kvm/bookehv_interrupts.S
> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
> @@ -62,6 +62,7 @@
>  #define NEED_EMU             0x00000001 /* emulation -- save nv regs */
>  #define NEED_DEAR            0x00000002 /* save faulting DEAR */
>  #define NEED_ESR             0x00000004 /* save faulting ESR */
> +#define NEED_LPER            0x00000008 /* save faulting LPER */
>  
>  /*
>   * On entry:
> @@ -159,6 +160,12 @@
>       PPC_STL r9, VCPU_FAULT_DEAR(r4)
>       .endif
>  
> +     /* Only supported on 64-bit cores for now */
> +     .if     \flags & NEED_LPER
> +     mfspr   r7, SPRN_LPER
> +     std     r7, VCPU_FAULT_LPER(r4)
> +     .endif

What's the harm in using PPC_STL anyway?


>  /*
>   * For input register values, see 
> arch/powerpc/include/asm/kvm_booke_hv_asm.h
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
> b/arch/powerpc/kvm/e500_mmu_host.c
> index 12d5c67..99ad88a 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -96,6 +96,112 @@ static inline void __write_host_tlbe(struct 
> kvm_book3e_206_tlb_entry *stlbe,
>                                     stlbe->mas2, stlbe->mas7_3);
>  }
>  
> +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
> +static int lrat_next(void)
> +{

Will anything break by removing the CONFIG_64BIT condition, even if we don't 
have a 32-bit target that uses this?

> +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
> +{
> +     struct kvm_memory_slot *slot;
> +     unsigned long pfn;
> +     unsigned long hva;
> +     struct vm_area_struct *vma;
> +     unsigned long psize;
> +     int tsize;
> +     unsigned long tsize_pages;
> +
> +     slot = gfn_to_memslot(vcpu->kvm, gfn);
> +     if (!slot) {
> +             pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
> +                                __func__, (long)gfn);
> +             return;
> +     }
> +
> +     hva = slot->userspace_addr;

What if the faulting address is somewhere in the middle of the slot?  
Shouldn't you use gfn_to_hva_memslot() like kvmppc_e500_shadow_map()?  In 
fact there's probably a lot of logic that should be shared between these two 
functions.

> +     down_read(&current->mm->mmap_sem);
> +     vma = find_vma(current->mm, hva);
> +     if (vma && (hva >= vma->vm_start)) {
> +             psize = vma_kernel_pagesize(vma);

What if it's VM_PFNMAP?

> +     } else {
> +             pr_err_ratelimited("%s: couldn't find virtual memory address for gfn 
> %lx!\n",
> +                                __func__, (long)gfn);
> +             up_read(&current->mm->mmap_sem);
> +             return;
> +     }
> +     up_read(&current->mm->mmap_sem);
> +
> +     pfn = gfn_to_pfn_memslot(slot, gfn);
> +     if (is_error_noslot_pfn(pfn)) {
> +             pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
> +                                __func__, (long)gfn);
> +             return;
> +     }
> +
> +     tsize = __ilog2(psize) - 10;
> +     tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);

1UL << ...

kvmppc_e500_shadow_map needs the same fix.

> +     gfn &= ~(tsize_pages - 1);
> +     pfn &= ~(tsize_pages - 1);
> +
> +     write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, true);
> +
> +     kvm_release_pfn_clean(pfn);
> +}
> +
> +void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
> +{
> +     uint32_t mas0, mas1 = 0;
> +     int esel;
> +     unsigned long flags;
> +
> +     local_irq_save(flags);
> +
> +     /* LRAT does not have a dedicated instruction for invalidation */
> +     for (esel = 0; esel < get_paca()->tcd_ptr->lrat_max; esel++) {
> +             mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
> +             mtspr(SPRN_MAS0, mas0);
> +             asm volatile("isync; tlbre" : : : "memory");
> +             mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
> +             mtspr(SPRN_MAS1, mas1);
> +             asm volatile("isync; tlbwe" : : : "memory");
> +     }
> +     /* Must clear mas8 for other host tlbwe's */
> +     mtspr(SPRN_MAS8, 0);
> +     isync();
> +
> +     local_irq_restore(flags);
> +}
> +#endif /* CONFIG_64BIT && CONFIG_KVM_BOOKE_HV */
> +
>  /*
>   * Acquire a mas0 with victim hint, as if we just took a TLB miss.
>   *
> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
> index cda695d..5856f8f 100644
> --- a/arch/powerpc/kvm/e500mc.c
> +++ b/arch/powerpc/kvm/e500mc.c
> @@ -99,6 +99,10 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 
> *vcpu_e500)
>       asm volatile("tlbilxlpid");
>       mtspr(SPRN_MAS5, 0);
>       local_irq_restore(flags);
> +
> +#ifdef PPC64
> +     kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
> +#endif

Don't you mean CONFIG_PPC64 (or CONFIG_64BIT to be consistent)?

>  }
>  
>  void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
> diff --git a/arch/powerpc/mm/fsl_booke_mmu.c 
> b/arch/powerpc/mm/fsl_booke_mmu.c
> index 9c90e66..b0da4b9 100644
> --- a/arch/powerpc/mm/fsl_booke_mmu.c
> +++ b/arch/powerpc/mm/fsl_booke_mmu.c
> @@ -194,6 +194,16 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t 
> phys, unsigned long virt,
>       get_paca()->tcd.esel_next = i;
>       get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
>       get_paca()->tcd.esel_first = i;
> +
> +#ifdef CONFIG_KVM_BOOKE_HV
> +     get_paca()->tcd.lrat_next = 0;
> +     if (((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V2) &&
> +         (mfspr(SPRN_MMUCFG) & MMUCFG_LRAT)) {
> +             get_paca()->tcd.lrat_max = mfspr(SPRN_LRATCFG) & LRATCFG_NENTRY;
> +     } else {
> +             get_paca()->tcd.lrat_max = 0;
> +     }

Unnecessary braces

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tudor Laurentiu Sept. 30, 2015, 10:32 a.m. UTC | #2
On 09/25/2015 03:10 AM, Scott Wood wrote:
> On Thu, 2015-09-24 at 16:11 +0300, Laurentiu Tudor wrote:
>> diff --git a/arch/powerpc/kvm/bookehv_interrupts.S 
>> b/arch/powerpc/kvm/bookehv_interrupts.S
>> index 81bd8a07..1e9fa2a 100644
>> --- a/arch/powerpc/kvm/bookehv_interrupts.S
>> +++ b/arch/powerpc/kvm/bookehv_interrupts.S
>> @@ -62,6 +62,7 @@
>>  #define NEED_EMU             0x00000001 /* emulation -- save nv regs */
>>  #define NEED_DEAR            0x00000002 /* save faulting DEAR */
>>  #define NEED_ESR             0x00000004 /* save faulting ESR */
>> +#define NEED_LPER            0x00000008 /* save faulting LPER */
>>  
>>  /*
>>   * On entry:
>> @@ -159,6 +160,12 @@
>>       PPC_STL r9, VCPU_FAULT_DEAR(r4)
>>       .endif
>>  
>> +     /* Only supported on 64-bit cores for now */
>> +     .if     \flags & NEED_LPER
>> +     mfspr   r7, SPRN_LPER
>> +     std     r7, VCPU_FAULT_LPER(r4)
>> +     .endif
> 
> What's the harm in using PPC_STL anyway?

Will do so.
 
> 
>>  /*
>>   * For input register values, see 
>> arch/powerpc/include/asm/kvm_booke_hv_asm.h
>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
>> b/arch/powerpc/kvm/e500_mmu_host.c
>> index 12d5c67..99ad88a 100644
>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>> @@ -96,6 +96,112 @@ static inline void __write_host_tlbe(struct 
>> kvm_book3e_206_tlb_entry *stlbe,
>>                                     stlbe->mas2, stlbe->mas7_3);
>>  }
>>  
>> +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
>> +static int lrat_next(void)
>> +{
> 
> Will anything break by removing the CONFIG_64BIT condition, even if we don't 
> have a 32-bit target that uses this?

Not completly certain but i remember getting compile or link errors
on 32-bit e500mc or e500v2. I can recheck if you want.

>> +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
>> +{
>> +     struct kvm_memory_slot *slot;
>> +     unsigned long pfn;
>> +     unsigned long hva;
>> +     struct vm_area_struct *vma;
>> +     unsigned long psize;
>> +     int tsize;
>> +     unsigned long tsize_pages;
>> +
>> +     slot = gfn_to_memslot(vcpu->kvm, gfn);
>> +     if (!slot) {
>> +             pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
>> +                                __func__, (long)gfn);
>> +             return;
>> +     }
>> +
>> +     hva = slot->userspace_addr;
> 
> What if the faulting address is somewhere in the middle of the slot?  
> Shouldn't you use gfn_to_hva_memslot() like kvmppc_e500_shadow_map()?  In 
> fact there's probably a lot of logic that should be shared between these two 
> functions.

So if my understanding is correct most of the gfn -> pfn translation
stuff done in kvmppc_e500_shadow_map() should also be present in here.
If that's the case maybe i should first extract this code (which includes
VM_PFNMAP handling) in a separate function and call it from both kvmppc_lrat_map()
and kvmppc_e500_shadow_map(). 

 
>> +     down_read(&current->mm->mmap_sem);
>> +     vma = find_vma(current->mm, hva);
>> +     if (vma && (hva >= vma->vm_start)) {
>> +             psize = vma_kernel_pagesize(vma);
> 
> What if it's VM_PFNMAP?
> 
>> +     } else {
>> +             pr_err_ratelimited("%s: couldn't find virtual memory address for gfn 
>> %lx!\n",
>> +                                __func__, (long)gfn);
>> +             up_read(&current->mm->mmap_sem);
>> +             return;
>> +     }
>> +     up_read(&current->mm->mmap_sem);
>> +
>> +     pfn = gfn_to_pfn_memslot(slot, gfn);
>> +     if (is_error_noslot_pfn(pfn)) {
>> +             pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
>> +                                __func__, (long)gfn);
>> +             return;
>> +     }
>> +
>> +     tsize = __ilog2(psize) - 10;
>> +     tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
> 
> 1UL << ...
> 
> kvmppc_e500_shadow_map needs the same fix.

I'll make a distinct patch with the kvmppc_e500_shadow_map() fix.

>> +     gfn &= ~(tsize_pages - 1);
>> +     pfn &= ~(tsize_pages - 1);
>> +
>> +     write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, true);
>> +
>> +     kvm_release_pfn_clean(pfn);
>> +}
>> +
>> +void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
>> +{
>> +     uint32_t mas0, mas1 = 0;
>> +     int esel;
>> +     unsigned long flags;
>> +
>> +     local_irq_save(flags);
>> +
>> +     /* LRAT does not have a dedicated instruction for invalidation */
>> +     for (esel = 0; esel < get_paca()->tcd_ptr->lrat_max; esel++) {
>> +             mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
>> +             mtspr(SPRN_MAS0, mas0);
>> +             asm volatile("isync; tlbre" : : : "memory");
>> +             mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
>> +             mtspr(SPRN_MAS1, mas1);
>> +             asm volatile("isync; tlbwe" : : : "memory");
>> +     }
>> +     /* Must clear mas8 for other host tlbwe's */
>> +     mtspr(SPRN_MAS8, 0);
>> +     isync();
>> +
>> +     local_irq_restore(flags);
>> +}
>> +#endif /* CONFIG_64BIT && CONFIG_KVM_BOOKE_HV */
>> +
>>  /*
>>   * Acquire a mas0 with victim hint, as if we just took a TLB miss.
>>   *
>> diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
>> index cda695d..5856f8f 100644
>> --- a/arch/powerpc/kvm/e500mc.c
>> +++ b/arch/powerpc/kvm/e500mc.c
>> @@ -99,6 +99,10 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 
>> *vcpu_e500)
>>       asm volatile("tlbilxlpid");
>>       mtspr(SPRN_MAS5, 0);
>>       local_irq_restore(flags);
>> +
>> +#ifdef PPC64
>> +     kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
>> +#endif
> 
> Don't you mean CONFIG_PPC64 (or CONFIG_64BIT to be consistent)?

Absolutely. Thanks for spotting this.

---
Best Regards, Laurentiu
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tudor Laurentiu Sept. 30, 2015, 11:27 a.m. UTC | #3
On 09/30/2015 01:32 PM, Laurentiu Tudor wrote:
> On 09/25/2015 03:10 AM, Scott Wood wrote:
>> On Thu, 2015-09-24 at 16:11 +0300, Laurentiu Tudor wrote:

[snip]

>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 12d5c67..99ad88a 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -96,6 +96,112 @@ static inline void __write_host_tlbe(struct 
>>> kvm_book3e_206_tlb_entry *stlbe,
>>>                                     stlbe->mas2, stlbe->mas7_3);
>>>  }
>>>  
>>> +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
>>> +static int lrat_next(void)
>>> +{
>>
>> Will anything break by removing the CONFIG_64BIT condition, even if we don't 
>> have a 32-bit target that uses this?
> 
> Not completly certain but i remember getting compile or link errors
> on 32-bit e500mc or e500v2. I can recheck if you want.
> 
>>> +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
>>> +{
>>> +     struct kvm_memory_slot *slot;
>>> +     unsigned long pfn;
>>> +     unsigned long hva;
>>> +     struct vm_area_struct *vma;
>>> +     unsigned long psize;
>>> +     int tsize;
>>> +     unsigned long tsize_pages;
>>> +
>>> +     slot = gfn_to_memslot(vcpu->kvm, gfn);
>>> +     if (!slot) {
>>> +             pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
>>> +                                __func__, (long)gfn);
>>> +             return;
>>> +     }
>>> +
>>> +     hva = slot->userspace_addr;
>>
>> What if the faulting address is somewhere in the middle of the slot?  
>> Shouldn't you use gfn_to_hva_memslot() like kvmppc_e500_shadow_map()?  In 
>> fact there's probably a lot of logic that should be shared between these two 
>> functions.
> 
> So if my understanding is correct most of the gfn -> pfn translation
> stuff done in kvmppc_e500_shadow_map() should also be present in here.
> If that's the case maybe i should first extract this code (which includes
> VM_PFNMAP handling) in a separate function and call it from both kvmppc_lrat_map()
> and kvmppc_e500_shadow_map(). 
> 

Off-topic, but just noticed that kvmppc_e500_shadow_map() is marked as inline.
Was that on purpose? Is inlining such a large function worth anything?

---
Best Regards, Laurentiu
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood Sept. 30, 2015, 4:55 p.m. UTC | #4
On Wed, 2015-09-30 at 14:27 +0300, Laurentiu Tudor wrote:
> On 09/30/2015 01:32 PM, Laurentiu Tudor wrote:
> > On 09/25/2015 03:10 AM, Scott Wood wrote:
> > > On Thu, 2015-09-24 at 16:11 +0300, Laurentiu Tudor wrote:
> 
> [snip]
> 
> > > > b/arch/powerpc/kvm/e500_mmu_host.c
> > > > index 12d5c67..99ad88a 100644
> > > > --- a/arch/powerpc/kvm/e500_mmu_host.c
> > > > +++ b/arch/powerpc/kvm/e500_mmu_host.c
> > > > @@ -96,6 +96,112 @@ static inline void __write_host_tlbe(struct 
> > > > kvm_book3e_206_tlb_entry *stlbe,
> > > >                                     stlbe->mas2, stlbe->mas7_3);
> > > >  }
> > > >  
> > > > +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
> > > > +static int lrat_next(void)
> > > > +{
> > > 
> > > Will anything break by removing the CONFIG_64BIT condition, even if we 
> > > don't 
> > > have a 32-bit target that uses this?
> > 
> > Not completly certain but i remember getting compile or link errors
> > on 32-bit e500mc or e500v2. I can recheck if you want.
> > 
> > > > +void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
> > > > +{
> > > > +     struct kvm_memory_slot *slot;
> > > > +     unsigned long pfn;
> > > > +     unsigned long hva;
> > > > +     struct vm_area_struct *vma;
> > > > +     unsigned long psize;
> > > > +     int tsize;
> > > > +     unsigned long tsize_pages;
> > > > +
> > > > +     slot = gfn_to_memslot(vcpu->kvm, gfn);
> > > > +     if (!slot) {
> > > > +             pr_err_ratelimited("%s: couldn't find memslot for gfn 
> > > > %lx!\n",
> > > > +                                __func__, (long)gfn);
> > > > +             return;
> > > > +     }
> > > > +
> > > > +     hva = slot->userspace_addr;
> > > 
> > > What if the faulting address is somewhere in the middle of the slot?  
> > > Shouldn't you use gfn_to_hva_memslot() like kvmppc_e500_shadow_map()?  
> > > In 
> > > fact there's probably a lot of logic that should be shared between 
> > > these two 
> > > functions.
> > 
> > So if my understanding is correct most of the gfn -> pfn translation
> > stuff done in kvmppc_e500_shadow_map() should also be present in here.
> > If that's the case maybe i should first extract this code (which includes
> > VM_PFNMAP handling) in a separate function and call it from both 
> > kvmppc_lrat_map()
> > and kvmppc_e500_shadow_map(). 
> > 
> 
> Off-topic, but just noticed that kvmppc_e500_shadow_map() is marked as 
> inline.
> Was that on purpose? Is inlining such a large function worth anything?

I don't remember the intent.  It was probably a lot smaller back then.  That 
said, it's only used two places, with probably pretty good temporal 
separation between performance-intensive uses of one versus the other (so not 
a huge icache concern), and a pretty good portion of the function will be 
optimized out in the caller with tlbsel == 0.

-Scott

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tudor Laurentiu Oct. 1, 2015, 9:12 a.m. UTC | #5
On 09/30/2015 01:32 PM, Laurentiu Tudor wrote:
> On 09/25/2015 03:10 AM, Scott Wood wrote:
>> On Thu, 2015-09-24 at 16:11 +0300, Laurentiu Tudor wrote:

[snip]

>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c 
>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 12d5c67..99ad88a 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -96,6 +96,112 @@ static inline void __write_host_tlbe(struct 
>>> kvm_book3e_206_tlb_entry *stlbe,
>>>                                     stlbe->mas2, stlbe->mas7_3);
>>>  }
>>>  
>>> +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
>>> +static int lrat_next(void)
>>> +{
>>
>> Will anything break by removing the CONFIG_64BIT condition, even if we don't 
>> have a 32-bit target that uses this?
> 
> Not completly certain but i remember getting compile or link errors
> on 32-bit e500mc or e500v2. I can recheck if you want.
>

I double-checked this and indeed it doesn't compile on 32-bit because
lrat_next() calls get_paca().

---
Best Regards, Laurentiu

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e187b6a..b207a32 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -444,6 +444,7 @@  struct kvm_vcpu_arch {
 	u32 eplc;
 	u32 epsc;
 	u32 oldpir;
+	u64 fault_lper;
 #endif
 
 #if defined(CONFIG_BOOKE)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c6ef05b..ac3574f 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -117,6 +117,8 @@  extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
 extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
 			enum xlate_instdata xlid, enum xlate_readwrite xlrw,
 			struct kvmppc_pte *pte);
+extern void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn);
+extern void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index cd4f04a..9dbad3c 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -40,6 +40,8 @@ 
 
 /* MAS registers bit definitions */
 
+#define MAS0_ATSEL		0x80000000
+#define MAS0_ATSEL_SHIFT	31
 #define MAS0_TLBSEL_MASK	0x30000000
 #define MAS0_TLBSEL_SHIFT	28
 #define MAS0_TLBSEL(x)		(((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
@@ -55,6 +57,7 @@ 
 #define MAS0_WQ_CLR_RSRV       	0x00002000
 
 #define MAS1_VALID		0x80000000
+#define MAS1_VALID_SHIFT	31
 #define MAS1_IPROT		0x40000000
 #define MAS1_TID(x)		(((x) << 16) & 0x3FFF0000)
 #define MAS1_IND		0x00002000
@@ -220,6 +223,12 @@ 
 #define TLBILX_T_CLASS2			6
 #define TLBILX_T_CLASS3			7
 
+/* LRATCFG bits */
+#define LRATCFG_ASSOC		0xFF000000
+#define LRATCFG_LASIZE		0x00FE0000
+#define LRATCFG_LPID		0x00002000
+#define LRATCFG_NENTRY		0x00000FFF
+
 #ifndef __ASSEMBLY__
 #include <asm/bug.h>
 
@@ -297,6 +306,9 @@  struct tlb_core_data {
 
 	/* For software way selection, as on Freescale TLB1 */
 	u8 esel_next, esel_max, esel_first;
+#ifdef CONFIG_KVM_BOOKE_HV
+	u8 lrat_next, lrat_max;
+#endif
 };
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2fef74b..265f5ec 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -55,6 +55,8 @@ 
 
 /* Special Purpose Registers (SPRNs)*/
 #define SPRN_DECAR	0x036	/* Decrementer Auto Reload Register */
+#define SPRN_LPER	0x038	/* Logical Page Exception Register */
+#define SPRN_LPERU	0x039	/* Logical Page Exception Register Upper */
 #define SPRN_IVPR	0x03F	/* Interrupt Vector Prefix Register */
 #define SPRN_USPRG0	0x100	/* User Special Purpose Register General 0 */
 #define SPRN_SPRG3R	0x103	/* Special Purpose Register General 3 Read */
@@ -76,6 +78,7 @@ 
 #define SPRN_DVC2	0x13F	/* Data Value Compare Register 2 */
 #define SPRN_LPID	0x152	/* Logical Partition ID */
 #define SPRN_MAS8	0x155	/* MMU Assist Register 8 */
+#define SPRN_LRATCFG	0x156	/* LRAT Configuration Register */
 #define SPRN_TLB0PS	0x158	/* TLB 0 Page Size Register */
 #define SPRN_TLB1PS	0x159	/* TLB 1 Page Size Register */
 #define SPRN_MAS5_MAS6	0x15c	/* MMU Assist Register 5 || 6 */
@@ -371,6 +374,9 @@ 
 #define ESR_ILK		0x00100000	/* Instr. Cache Locking */
 #define ESR_PUO		0x00040000	/* Unimplemented Operation exception */
 #define ESR_BO		0x00020000	/* Byte Ordering */
+#define ESR_DATA	0x00000400	/* Page Table Data Access */
+#define ESR_TLBI	0x00000200	/* Page Table TLB Ineligible */
+#define ESR_PT		0x00000100	/* Page Table Translation */
 #define ESR_SPV		0x00000080	/* Signal Processing operation */
 
 /* Bit definitions related to the DBCR0. */
@@ -669,6 +675,14 @@ 
 #define EPC_EPID	0x00003fff
 #define EPC_EPID_SHIFT	0
 
+/* Bit definitions for LPER */
+#define LPER_ALPN		0x000FFFFFFFFFF000ULL
+#define LPER_ALPN_SHIFT		12
+#define LPER_WIMGE		0x00000F80
+#define LPER_WIMGE_SHIFT	7
+#define LPER_LPS		0x0000000F
+#define LPER_LPS_SHIFT		0
+
 /*
  * The IBM-403 is an even more odd special case, as it is much
  * older than the IBM-405 series.  We put these down here incase someone
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index de62392..b73ceb0 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -745,6 +745,7 @@  int main(void)
 	DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
 	DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
 	DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+	DEFINE(VCPU_FAULT_LPER, offsetof(struct kvm_vcpu, arch.fault_lper));
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ae458f0..2b9e5b1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1322,6 +1322,47 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 	}
 
+#if defined(PPC64) && defined(CONFIG_KVM_BOOKE_HV)
+	case BOOKE_INTERRUPT_LRAT_ERROR:
+	{
+		gfn_t gfn;
+
+		/*
+		 * Guest TLB management instructions (EPCR.DGTMI == 0) is not
+		 * supported for now
+		 */
+		if (!(vcpu->arch.fault_esr & ESR_PT)) {
+			WARN_ONCE(1, "%s: Guest TLB management instructions not supported!\n",
+				  __func__);
+			break;
+		}
+
+		gfn = (vcpu->arch.fault_lper & LPER_ALPN) >> LPER_ALPN_SHIFT;
+
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			kvmppc_lrat_map(vcpu, gfn);
+			r = RESUME_GUEST;
+		} else if (vcpu->arch.fault_esr & ESR_DATA) {
+			vcpu->arch.paddr_accessed = (gfn << PAGE_SHIFT)
+				| (vcpu->arch.fault_dear & (PAGE_SIZE - 1));
+			vcpu->arch.vaddr_accessed =
+				vcpu->arch.fault_dear;
+
+			r = kvmppc_emulate_mmio(run, vcpu);
+			kvmppc_account_exit(vcpu, MMIO_EXITS);
+		} else {
+			kvmppc_booke_queue_irqprio(vcpu,
+						BOOKE_IRQPRIO_MACHINE_CHECK);
+			r = RESUME_GUEST;
+		}
+
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		break;
+	}
+#endif
+
 	case BOOKE_INTERRUPT_DEBUG: {
 		r = kvmppc_handle_debug(run, vcpu);
 		if (r == RESUME_HOST)
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 81bd8a07..1e9fa2a 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -62,6 +62,7 @@ 
 #define NEED_EMU		0x00000001 /* emulation -- save nv regs */
 #define NEED_DEAR		0x00000002 /* save faulting DEAR */
 #define NEED_ESR		0x00000004 /* save faulting ESR */
+#define NEED_LPER		0x00000008 /* save faulting LPER */
 
 /*
  * On entry:
@@ -159,6 +160,12 @@ 
 	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
 	.endif
 
+	/* Only supported on 64-bit cores for now */
+	.if	\flags & NEED_LPER
+	mfspr	r7, SPRN_LPER
+	std	r7, VCPU_FAULT_LPER(r4)
+	.endif
+
 	b	kvmppc_resume_host
 .endm
 
@@ -279,7 +286,7 @@  kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
 kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
 	SPRN_CSRR0, SPRN_CSRR1, 0
 kvm_handler BOOKE_INTERRUPT_LRAT_ERROR, EX_PARAMS(GEN), \
-	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR | NEED_LPER)
 #else
 /*
  * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 12d5c67..99ad88a 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -96,6 +96,112 @@  static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
 	                              stlbe->mas2, stlbe->mas7_3);
 }
 
+#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_BOOKE_HV)
+static int lrat_next(void)
+{
+	int next, this;
+	struct tlb_core_data *tcd = get_paca()->tcd_ptr;
+
+	this = tcd->lrat_next;
+	next = this + 1;
+	if (unlikely(next >= tcd->lrat_max))
+		next = 0;
+	tcd->lrat_next = next;
+
+	return this;
+}
+
+static void write_host_lrate(int tsize, gfn_t gfn, unsigned long pfn,
+			     uint32_t lpid, bool valid)
+{
+	struct kvm_book3e_206_tlb_entry stlbe;
+	unsigned long flags;
+
+	stlbe.mas1 = (valid ? MAS1_VALID : 0) | MAS1_TSIZE(tsize);
+	stlbe.mas2 = ((u64)gfn << PAGE_SHIFT);
+	stlbe.mas7_3 = ((u64)pfn << PAGE_SHIFT);
+	stlbe.mas8 = MAS8_TGS | lpid;
+
+	local_irq_save(flags);
+
+	__write_host_tlbe(&stlbe, MAS0_ATSEL | MAS0_ESEL(lrat_next()), lpid);
+
+	local_irq_restore(flags);
+}
+
+void kvmppc_lrat_map(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot;
+	unsigned long pfn;
+	unsigned long hva;
+	struct vm_area_struct *vma;
+	unsigned long psize;
+	int tsize;
+	unsigned long tsize_pages;
+
+	slot = gfn_to_memslot(vcpu->kvm, gfn);
+	if (!slot) {
+		pr_err_ratelimited("%s: couldn't find memslot for gfn %lx!\n",
+				   __func__, (long)gfn);
+		return;
+	}
+
+	hva = slot->userspace_addr;
+
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, hva);
+	if (vma && (hva >= vma->vm_start)) {
+		psize = vma_kernel_pagesize(vma);
+	} else {
+		pr_err_ratelimited("%s: couldn't find virtual memory address for gfn %lx!\n",
+				   __func__, (long)gfn);
+		up_read(&current->mm->mmap_sem);
+		return;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	pfn = gfn_to_pfn_memslot(slot, gfn);
+	if (is_error_noslot_pfn(pfn)) {
+		pr_err_ratelimited("%s: couldn't get real page for gfn %lx!\n",
+				   __func__, (long)gfn);
+		return;
+	}
+
+	tsize = __ilog2(psize) - 10;
+	tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
+	gfn &= ~(tsize_pages - 1);
+	pfn &= ~(tsize_pages - 1);
+
+	write_host_lrate(tsize, gfn, pfn, vcpu->kvm->arch.lpid, true);
+
+	kvm_release_pfn_clean(pfn);
+}
+
+void kvmppc_lrat_invalidate(struct kvm_vcpu *vcpu)
+{
+	uint32_t mas0, mas1 = 0;
+	int esel;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	/* LRAT does not have a dedicated instruction for invalidation */
+	for (esel = 0; esel < get_paca()->tcd_ptr->lrat_max; esel++) {
+		mas0 = MAS0_ATSEL | MAS0_ESEL(esel);
+		mtspr(SPRN_MAS0, mas0);
+		asm volatile("isync; tlbre" : : : "memory");
+		mas1 = mfspr(SPRN_MAS1) & ~MAS1_VALID;
+		mtspr(SPRN_MAS1, mas1);
+		asm volatile("isync; tlbwe" : : : "memory");
+	}
+	/* Must clear mas8 for other host tlbwe's */
+	mtspr(SPRN_MAS8, 0);
+	isync();
+
+	local_irq_restore(flags);
+}
+#endif /* CONFIG_64BIT && CONFIG_KVM_BOOKE_HV */
+
 /*
  * Acquire a mas0 with victim hint, as if we just took a TLB miss.
  *
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index cda695d..5856f8f 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -99,6 +99,10 @@  void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
 	asm volatile("tlbilxlpid");
 	mtspr(SPRN_MAS5, 0);
 	local_irq_restore(flags);
+
+#ifdef PPC64
+	kvmppc_lrat_invalidate(&vcpu_e500->vcpu);
+#endif
 }
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 9c90e66..b0da4b9 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -194,6 +194,16 @@  static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
 	get_paca()->tcd.esel_next = i;
 	get_paca()->tcd.esel_max = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
 	get_paca()->tcd.esel_first = i;
+
+#ifdef CONFIG_KVM_BOOKE_HV
+	get_paca()->tcd.lrat_next = 0;
+	if (((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V2) &&
+	    (mfspr(SPRN_MMUCFG) & MMUCFG_LRAT)) {
+		get_paca()->tcd.lrat_max = mfspr(SPRN_LRATCFG) & LRATCFG_NENTRY;
+	} else {
+		get_paca()->tcd.lrat_max = 0;
+	}
+#endif
 #endif
 
 	return amount_mapped;