[v4,4/8] KVM: PPC: Ultravisor: Use UV_WRITE_PATE ucall to register a PATE
diff mbox series

Message ID 20190628200825.31049-5-cclaudio@linux.ibm.com
State Superseded
Headers show
Series
  • kvmppc: Paravirtualize KVM to support ultravisor
Related show

Commit Message

Claudio Carvalho June 28, 2019, 8:08 p.m. UTC
From: Michael Anderson <andmike@linux.ibm.com>

When running under an ultravisor, the ultravisor controls the real
partition table and has it in secure memory where the hypervisor can't
access it, and therefore we (the HV) have to do a ucall whenever we want
to update an entry.

The HV still keeps a copy of its view of the partition table in normal
memory so that the nest MMU can access it.

Both partition tables will have PATE entries for HV and normal virtual
machines.

Suggested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Ram Pai <linuxram@us.ibm.com>
[ Write the pate in HV's table before doing that in UV's ]
Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
---
 arch/powerpc/include/asm/ultravisor-api.h |  5 +++-
 arch/powerpc/include/asm/ultravisor.h     | 14 ++++++++++
 arch/powerpc/mm/book3s64/hash_utils.c     |  3 +-
 arch/powerpc/mm/book3s64/pgtable.c        | 34 +++++++++++++++++++++--
 arch/powerpc/mm/book3s64/radix_pgtable.c  |  9 ++++--
 5 files changed, 57 insertions(+), 8 deletions(-)

Comments

janani July 8, 2019, 5:57 p.m. UTC | #1
On 2019-06-28 15:08, Claudio Carvalho wrote:
> From: Michael Anderson <andmike@linux.ibm.com>
> 
> When running under an ultravisor, the ultravisor controls the real
> partition table and has it in secure memory where the hypervisor can't
> access it, and therefore we (the HV) have to do a ucall whenever we 
> want
> to update an entry.
> 
> The HV still keeps a copy of its view of the partition table in normal
> memory so that the nest MMU can access it.
> 
> Both partition tables will have PATE entries for HV and normal virtual
> machines.
> 
> Suggested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
> Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> [ Write the pate in HV's table before doing that in UV's ]
> Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
  Reviewed-by: Janani Janakiraman <janani@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/ultravisor-api.h |  5 +++-
>  arch/powerpc/include/asm/ultravisor.h     | 14 ++++++++++
>  arch/powerpc/mm/book3s64/hash_utils.c     |  3 +-
>  arch/powerpc/mm/book3s64/pgtable.c        | 34 +++++++++++++++++++++--
>  arch/powerpc/mm/book3s64/radix_pgtable.c  |  9 ++++--
>  5 files changed, 57 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/ultravisor-api.h
> b/arch/powerpc/include/asm/ultravisor-api.h
> index 49e766adabc7..141940771add 100644
> --- a/arch/powerpc/include/asm/ultravisor-api.h
> +++ b/arch/powerpc/include/asm/ultravisor-api.h
> @@ -15,6 +15,9 @@
>  #define U_SUCCESS		H_SUCCESS
>  #define U_FUNCTION		H_FUNCTION
>  #define U_PARAMETER		H_PARAMETER
> +#define U_PERMISSION		H_PERMISSION
> 
> -#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
> +/* opcodes */
> +#define UV_WRITE_PATE			0xF104
> 
> +#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
> diff --git a/arch/powerpc/include/asm/ultravisor.h
> b/arch/powerpc/include/asm/ultravisor.h
> index a78a2dacfd0b..996c1efd6c6d 100644
> --- a/arch/powerpc/include/asm/ultravisor.h
> +++ b/arch/powerpc/include/asm/ultravisor.h
> @@ -12,6 +12,8 @@
> 
>  #if !defined(__ASSEMBLY__)
> 
> +#include <linux/types.h>
> +
>  /* Internal functions */
>  extern int early_init_dt_scan_ultravisor(unsigned long node, const 
> char *uname,
>  					 int depth, void *data);
> @@ -28,8 +30,20 @@ extern int early_init_dt_scan_ultravisor(unsigned
> long node, const char *uname,
>   */
>  #if defined(CONFIG_PPC_POWERNV)
>  long ucall(unsigned long opcode, unsigned long *retbuf, ...);
> +#else
> +static long ucall(unsigned long opcode, unsigned long *retbuf, ...)
> +{
> +	return U_NOT_AVAILABLE;
> +}
>  #endif
> 
> +static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
> +{
> +	unsigned long retbuf[UCALL_BUFSIZE];
> +
> +	return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
> +}
> +
>  #endif /* !__ASSEMBLY__ */
> 
>  #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c
> b/arch/powerpc/mm/book3s64/hash_utils.c
> index 1ff451892d7f..220a4e133240 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -1080,9 +1080,10 @@ void hash__early_init_mmu_secondary(void)
> 
>  		if (!cpu_has_feature(CPU_FTR_ARCH_300))
>  			mtspr(SPRN_SDR1, _SDR1);
> -		else
> +		else if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>  			mtspr(SPRN_PTCR,
>  			      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
> +
>  	}
>  	/* Initialize SLB */
>  	slb_initialize();
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c
> b/arch/powerpc/mm/book3s64/pgtable.c
> index ad3dd977c22d..224c5c7c2e3d 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -16,6 +16,8 @@
>  #include <asm/tlb.h>
>  #include <asm/trace.h>
>  #include <asm/powernv.h>
> +#include <asm/firmware.h>
> +#include <asm/ultravisor.h>
> 
>  #include <mm/mmu_decl.h>
>  #include <trace/events/thp.h>
> @@ -206,12 +208,25 @@ void __init mmu_partition_table_init(void)
>  	 * 64 K size.
>  	 */
>  	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
> -	mtspr(SPRN_PTCR, ptcr);
> +	/*
> +	 * If ultravisor is available, it is responsible for creating and
> +	 * managing partition table
> +	 */
> +	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +		mtspr(SPRN_PTCR, ptcr);
> +
> +	/*
> +	 * Since nestMMU cannot access secure memory. Create
> +	 * and manage our own partition table. This table
> +	 * contains entries for nonsecure and hypervisor
> +	 * partition.
> +	 */
>  	powernv_set_nmmu_ptcr(ptcr);
>  }
> 
> -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long 
> dw0,
> -				   unsigned long dw1)
> +static void __mmu_partition_table_set_entry(unsigned int lpid,
> +					    unsigned long dw0,
> +					    unsigned long dw1)
>  {
>  	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
> 
> @@ -238,6 +253,19 @@ void mmu_partition_table_set_entry(unsigned int
> lpid, unsigned long dw0,
>  	/* do we need fixup here ?*/
>  	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
>  }
> +
> +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long 
> dw0,
> +				  unsigned long dw1)
> +{
> +	__mmu_partition_table_set_entry(lpid, dw0, dw1);
> +
> +	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
> +		uv_register_pate(lpid, dw0, dw1);
> +		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
> +			dw0, dw1);
> +	}
> +}
> +
>  EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
> 
>  static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 8904aa1243d8..da6a6b76a040 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -656,8 +656,10 @@ void radix__early_init_mmu_secondary(void)
>  		lpcr = mfspr(SPRN_LPCR);
>  		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
> 
> -		mtspr(SPRN_PTCR,
> -		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +			mtspr(SPRN_PTCR, __pa(partition_tb) |
> +			      (PATB_SIZE_SHIFT - 12));
> +
>  		radix_init_amor();
>  	}
> 
> @@ -673,7 +675,8 @@ void radix__mmu_cleanup_all(void)
>  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
>  		lpcr = mfspr(SPRN_LPCR);
>  		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
> -		mtspr(SPRN_PTCR, 0);
> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +			mtspr(SPRN_PTCR, 0);
>  		powernv_set_nmmu_ptcr(0);
>  		radix__flush_tlb_all();
>  	}
Michael Ellerman July 11, 2019, 12:57 p.m. UTC | #2
Claudio Carvalho <cclaudio@linux.ibm.com> writes:
> From: Michael Anderson <andmike@linux.ibm.com>
>
> When running under an ultravisor, the ultravisor controls the real
> partition table and has it in secure memory where the hypervisor can't
> access it, and therefore we (the HV) have to do a ucall whenever we want
> to update an entry.
>
> The HV still keeps a copy of its view of the partition table in normal
> memory so that the nest MMU can access it.
>
> Both partition tables will have PATE entries for HV and normal virtual

Can you expand novel acronyms on their first usage please, ie. PATE.

> machines.
>
> Suggested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>

"Suggested" implies this is optional, but it's not :)

I'm not sure exactly what Ryan's input was, but feel free to make up a
better tag :)

> Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> [ Write the pate in HV's table before doing that in UV's ]
> Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/ultravisor-api.h |  5 +++-
>  arch/powerpc/include/asm/ultravisor.h     | 14 ++++++++++
>  arch/powerpc/mm/book3s64/hash_utils.c     |  3 +-
>  arch/powerpc/mm/book3s64/pgtable.c        | 34 +++++++++++++++++++++--
>  arch/powerpc/mm/book3s64/radix_pgtable.c  |  9 ++++--
>  5 files changed, 57 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
> index 49e766adabc7..141940771add 100644
> --- a/arch/powerpc/include/asm/ultravisor-api.h
> +++ b/arch/powerpc/include/asm/ultravisor-api.h
> @@ -15,6 +15,9 @@
>  #define U_SUCCESS		H_SUCCESS
>  #define U_FUNCTION		H_FUNCTION
>  #define U_PARAMETER		H_PARAMETER
> +#define U_PERMISSION		H_PERMISSION
>  
> -#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */

What happened there? Just a diff artifact?

> +/* opcodes */
> +#define UV_WRITE_PATE			0xF104
>  
> +#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
> diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
> index a78a2dacfd0b..996c1efd6c6d 100644
> --- a/arch/powerpc/include/asm/ultravisor.h
> +++ b/arch/powerpc/include/asm/ultravisor.h
> @@ -12,6 +12,8 @@
>  
>  #if !defined(__ASSEMBLY__)
>  
> +#include <linux/types.h>
> +
>  /* Internal functions */
>  extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
>  					 int depth, void *data);
> @@ -28,8 +30,20 @@ extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
>   */
>  #if defined(CONFIG_PPC_POWERNV)
>  long ucall(unsigned long opcode, unsigned long *retbuf, ...);
> +#else
> +static long ucall(unsigned long opcode, unsigned long *retbuf, ...)
          ^
          inline

> +{
> +	return U_NOT_AVAILABLE;
> +}
>  #endif

That should have been in the previous patch.

> +static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
> +{
> +	unsigned long retbuf[UCALL_BUFSIZE];
> +
> +	return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);

You probably want a ucall_norets().

> +}
> +
>  #endif /* !__ASSEMBLY__ */
>  
>  #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
> diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
> index 1ff451892d7f..220a4e133240 100644
> --- a/arch/powerpc/mm/book3s64/hash_utils.c
> +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> @@ -1080,9 +1080,10 @@ void hash__early_init_mmu_secondary(void)
>  
>  		if (!cpu_has_feature(CPU_FTR_ARCH_300))
>  			mtspr(SPRN_SDR1, _SDR1);
> -		else
> +		else if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>  			mtspr(SPRN_PTCR,
>  			      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));

Needs a comment for the else case.

>  	}
>  	/* Initialize SLB */
>  	slb_initialize();
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
> index ad3dd977c22d..224c5c7c2e3d 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -16,6 +16,8 @@
>  #include <asm/tlb.h>
>  #include <asm/trace.h>
>  #include <asm/powernv.h>
> +#include <asm/firmware.h>
> +#include <asm/ultravisor.h>
>  
>  #include <mm/mmu_decl.h>
>  #include <trace/events/thp.h>
> @@ -206,12 +208,25 @@ void __init mmu_partition_table_init(void)
>  	 * 64 K size.
>  	 */
>  	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
> -	mtspr(SPRN_PTCR, ptcr);
> +	/*
> +	 * If ultravisor is available, it is responsible for creating and
> +	 * managing partition table
> +	 */

But we just created the partition table?!

This comment and the one below would probably make more sense if they
were merged and appeared further up, where we allocate the partition
table.
      
> +	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +		mtspr(SPRN_PTCR, ptcr);
> +
> +	/*
> +	 * Since nestMMU cannot access secure memory. Create
> +	 * and manage our own partition table. This table

But we just said the UV was responsible for it :)

> +	 * contains entries for nonsecure and hypervisor
> +	 * partition.
> +	 */
>  	powernv_set_nmmu_ptcr(ptcr);
>  }
>  
> -void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
> -				   unsigned long dw1)
> +static void __mmu_partition_table_set_entry(unsigned int lpid,
> +					    unsigned long dw0,
> +					    unsigned long dw1)
>  {
>  	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
>  
> @@ -238,6 +253,19 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
>  	/* do we need fixup here ?*/
>  	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
>  }
> +
> +void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
> +				  unsigned long dw1)
> +{
> +	__mmu_partition_table_set_entry(lpid, dw0, dw1);
> +
> +	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
> +		uv_register_pate(lpid, dw0, dw1);
> +		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
> +			dw0, dw1);
> +	}
> +}

I agree with Alexey that this patch and the next should be merged and
the result cleaned up a bit.

> +

No extra blank please.

>  EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);

>  
>  static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 8904aa1243d8..da6a6b76a040 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -656,8 +656,10 @@ void radix__early_init_mmu_secondary(void)
>  		lpcr = mfspr(SPRN_LPCR);
>  		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
>  
> -		mtspr(SPRN_PTCR,
> -		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +			mtspr(SPRN_PTCR, __pa(partition_tb) |
> +			      (PATB_SIZE_SHIFT - 12));
> +
>  		radix_init_amor();
>  	}
>  
> @@ -673,7 +675,8 @@ void radix__mmu_cleanup_all(void)
>  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
>  		lpcr = mfspr(SPRN_LPCR);
>  		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
> -		mtspr(SPRN_PTCR, 0);
> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +			mtspr(SPRN_PTCR, 0);
>  		powernv_set_nmmu_ptcr(0);
>  		radix__flush_tlb_all();
>  	}

There's four of these case where we skip touching the PTCR, which is
right on the borderline of warranting an accessor. I guess we can do it
as a cleanup later.

cheers
Ryan Grimm July 17, 2019, 2:59 p.m. UTC | #3
On Thu, 2019-07-11 at 22:57 +1000, Michael Ellerman wrote:
> Claudio Carvalho <cclaudio@linux.ibm.com> writes:
> > From: Michael Anderson <andmike@linux.ibm.com>
> > 
> > When running under an ultravisor, the ultravisor controls the real
> > partition table and has it in secure memory where the hypervisor
> > can't
> > access it, and therefore we (the HV) have to do a ucall whenever we
> > want
> > to update an entry.
> > 
> > The HV still keeps a copy of its view of the partition table in
> > normal
> > memory so that the nest MMU can access it.
> > 
> > Both partition tables will have PATE entries for HV and normal
> > virtual
> 
> Can you expand novel acronyms on their first usage please, ie. PATE.
> 

Agreed.  This confused me a while ago.  It is "Partition Table Entry",
correct?

> > machines.
> > 
> > Suggested-by: Ryan Grimm <grimm@linux.vnet.ibm.com>
> 

Please remove this and add 

Reviewed-by: Ryan Grimm <grimm@linux.ibm.com>


> "Suggested" implies this is optional, but it's not :)
> 
> I'm not sure exactly what Ryan's input was, but feel free to make up
> a
> better tag :)
> 
> > Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
> > Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> > Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> > [ Write the pate in HV's table before doing that in UV's ]
> > Signed-off-by: Claudio Carvalho <cclaudio@linux.ibm.com>
> > ---
> >  arch/powerpc/include/asm/ultravisor-api.h |  5 +++-
> >  arch/powerpc/include/asm/ultravisor.h     | 14 ++++++++++
> >  arch/powerpc/mm/book3s64/hash_utils.c     |  3 +-
> >  arch/powerpc/mm/book3s64/pgtable.c        | 34
> > +++++++++++++++++++++--
> >  arch/powerpc/mm/book3s64/radix_pgtable.c  |  9 ++++--
> >  5 files changed, 57 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/ultravisor-api.h
> > b/arch/powerpc/include/asm/ultravisor-api.h
> > index 49e766adabc7..141940771add 100644
> > --- a/arch/powerpc/include/asm/ultravisor-api.h
> > +++ b/arch/powerpc/include/asm/ultravisor-api.h
> > @@ -15,6 +15,9 @@
> >  #define U_SUCCESS		H_SUCCESS
> >  #define U_FUNCTION		H_FUNCTION
> >  #define U_PARAMETER		H_PARAMETER
> > +#define U_PERMISSION		H_PERMISSION
> >  
> > -#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
> 
> What happened there? Just a diff artifact?
> 
> > +/* opcodes */
> > +#define UV_WRITE_PATE			0xF104
> >  
> > +#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
> > diff --git a/arch/powerpc/include/asm/ultravisor.h
> > b/arch/powerpc/include/asm/ultravisor.h
> > index a78a2dacfd0b..996c1efd6c6d 100644
> > --- a/arch/powerpc/include/asm/ultravisor.h
> > +++ b/arch/powerpc/include/asm/ultravisor.h
> > @@ -12,6 +12,8 @@
> >  
> >  #if !defined(__ASSEMBLY__)
> >  
> > +#include <linux/types.h>
> > +
> >  /* Internal functions */
> >  extern int early_init_dt_scan_ultravisor(unsigned long node, const
> > char *uname,
> >  					 int depth, void *data);
> > @@ -28,8 +30,20 @@ extern int
> > early_init_dt_scan_ultravisor(unsigned long node, const char
> > *uname,
> >   */
> >  #if defined(CONFIG_PPC_POWERNV)
> >  long ucall(unsigned long opcode, unsigned long *retbuf, ...);
> > +#else
> > +static long ucall(unsigned long opcode, unsigned long *retbuf,
> > ...)
> 
>           ^
>           inline
> 
> > +{
> > +	return U_NOT_AVAILABLE;
> > +}
> >  #endif
> 
> That should have been in the previous patch.
> 
> > +static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
> > +{
> > +	unsigned long retbuf[UCALL_BUFSIZE];
> > +
> > +	return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
> 
> You probably want a ucall_norets().
> 
> > +}
> > +
> >  #endif /* !__ASSEMBLY__ */
> >  
> >  #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
> > diff --git a/arch/powerpc/mm/book3s64/hash_utils.c
> > b/arch/powerpc/mm/book3s64/hash_utils.c
> > index 1ff451892d7f..220a4e133240 100644
> > --- a/arch/powerpc/mm/book3s64/hash_utils.c
> > +++ b/arch/powerpc/mm/book3s64/hash_utils.c
> > @@ -1080,9 +1080,10 @@ void hash__early_init_mmu_secondary(void)
> >  
> >  		if (!cpu_has_feature(CPU_FTR_ARCH_300))
> >  			mtspr(SPRN_SDR1, _SDR1);
> > -		else
> > +		else if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> >  			mtspr(SPRN_PTCR,
> >  			      __pa(partition_tb) | (PATB_SIZE_SHIFT -
> > 12));
> 
> Needs a comment for the else case.
> 
> >  	}
> >  	/* Initialize SLB */
> >  	slb_initialize();
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index ad3dd977c22d..224c5c7c2e3d 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -16,6 +16,8 @@
> >  #include <asm/tlb.h>
> >  #include <asm/trace.h>
> >  #include <asm/powernv.h>
> > +#include <asm/firmware.h>
> > +#include <asm/ultravisor.h>
> >  
> >  #include <mm/mmu_decl.h>
> >  #include <trace/events/thp.h>
> > @@ -206,12 +208,25 @@ void __init mmu_partition_table_init(void)
> >  	 * 64 K size.
> >  	 */
> >  	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
> > -	mtspr(SPRN_PTCR, ptcr);
> > +	/*
> > +	 * If ultravisor is available, it is responsible for creating
> > and
> > +	 * managing partition table
> > +	 */
> 
> But we just created the partition table?!
> 
> This comment and the one below would probably make more sense if they
> were merged and appeared further up, where we allocate the partition
> table.
>       
> > +	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> > +		mtspr(SPRN_PTCR, ptcr);
> > +
> > +	/*
> > +	 * Since nestMMU cannot access secure memory. Create
> > +	 * and manage our own partition table. This table
> 
> But we just said the UV was responsible for it :)
> 
> > +	 * contains entries for nonsecure and hypervisor
> > +	 * partition.
> > +	 */
> >  	powernv_set_nmmu_ptcr(ptcr);
> >  }
> >  
> > -void mmu_partition_table_set_entry(unsigned int lpid, unsigned
> > long dw0,
> > -				   unsigned long dw1)
> > +static void __mmu_partition_table_set_entry(unsigned int lpid,
> > +					    unsigned long dw0,
> > +					    unsigned long dw1)
> >  {
> >  	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
> >  
> > @@ -238,6 +253,19 @@ void mmu_partition_table_set_entry(unsigned
> > int lpid, unsigned long dw0,
> >  	/* do we need fixup here ?*/
> >  	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> >  }
> > +
> > +void mmu_partition_table_set_entry(unsigned int lpid, unsigned
> > long dw0,
> > +				  unsigned long dw1)
> > +{
> > +	__mmu_partition_table_set_entry(lpid, dw0, dw1);
> > +
> > +	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
> > +		uv_register_pate(lpid, dw0, dw1);
> > +		pr_info("PATE registered by ultravisor: dw0 = 0x%lx,
> > dw1 = 0x%lx\n",
> > +			dw0, dw1);
> > +	}
> > +}
> 
> I agree with Alexey that this patch and the next should be merged and
> the result cleaned up a bit.
> 
> > +
> 
> No extra blank please.
> 
> >  EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
> >  
> >  static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
> > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c
> > b/arch/powerpc/mm/book3s64/radix_pgtable.c
> > index 8904aa1243d8..da6a6b76a040 100644
> > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> > @@ -656,8 +656,10 @@ void radix__early_init_mmu_secondary(void)
> >  		lpcr = mfspr(SPRN_LPCR);
> >  		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
> >  
> > -		mtspr(SPRN_PTCR,
> > -		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
> > +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> > +			mtspr(SPRN_PTCR, __pa(partition_tb) |
> > +			      (PATB_SIZE_SHIFT - 12));
> > +
> >  		radix_init_amor();
> >  	}
> >  
> > @@ -673,7 +675,8 @@ void radix__mmu_cleanup_all(void)
> >  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
> >  		lpcr = mfspr(SPRN_LPCR);
> >  		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
> > -		mtspr(SPRN_PTCR, 0);
> > +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> > +			mtspr(SPRN_PTCR, 0);
> >  		powernv_set_nmmu_ptcr(0);
> >  		radix__flush_tlb_all();
> >  	}
> 
> There's four of these case where we skip touching the PTCR, which is
> right on the borderline of warranting an accessor. I guess we can do
> it
> as a cleanup later.
> 
> cheers
>
Claudio Carvalho July 18, 2019, 9:25 p.m. UTC | #4
On 7/11/19 9:57 AM, Michael Ellerman wrote:
>
>>  
>>  static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> index 8904aa1243d8..da6a6b76a040 100644
>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>> @@ -656,8 +656,10 @@ void radix__early_init_mmu_secondary(void)
>>  		lpcr = mfspr(SPRN_LPCR);
>>  		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
>>  
>> -		mtspr(SPRN_PTCR,
>> -		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
>> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>> +			mtspr(SPRN_PTCR, __pa(partition_tb) |
>> +			      (PATB_SIZE_SHIFT - 12));
>> +
>>  		radix_init_amor();
>>  	}
>>  
>> @@ -673,7 +675,8 @@ void radix__mmu_cleanup_all(void)
>>  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
>>  		lpcr = mfspr(SPRN_LPCR);
>>  		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
>> -		mtspr(SPRN_PTCR, 0);
>> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>> +			mtspr(SPRN_PTCR, 0);
>>  		powernv_set_nmmu_ptcr(0);
>>  		radix__flush_tlb_all();
>>  	}
> There's four of these case where we skip touching the PTCR, which is
> right on the borderline of warranting an accessor. I guess we can do it
> as a cleanup later.

I agree.

Since the kernel doesn't need to access a big number of ultravisor
privileged registers, maybe we can define mtspr_<reg> and mfspr_<reg>
inline functions that in ultravisor.h that skip touching the register if an
ultravisor is present and and the register is ultravisor privileged. Thus,
we don't need to replicate comments and that also would make it easier for
developers to know what are the ultravisor privileged registers.

Something like this:

--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -10,10 +10,21 @@
 
 #include <asm/ultravisor-api.h>
 #include <asm/asm-prototypes.h>
+#include <asm/reg.h>
 
 int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
                                  int depth, void *data);
 
+static inline void mtspr_ptcr(unsigned long val)
+{
+       /*
+        * If the ultravisor firmware is present, it maintains the partition
+        * table. PTCR becomes ultravisor privileged only for writing.
+        */
+       if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+               mtspr(SPRN_PTCR, val);
+}
+
 static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
 {
        return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
diff --git a/arch/powerpc/mm/book3s64/pgtable.c
b/arch/powerpc/mm/book3s64/pgtable.c
index e1bbc48e730f..25156f9dfde8 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -220,7 +220,7 @@ void __init mmu_partition_table_init(void)
         * 64 K size.
         */
        ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
-       mtspr(SPRN_PTCR, ptcr);
+       mtspr_ptcr(ptcr);
        powernv_set_nmmu_ptcr(ptcr);
 }

What do you think?
An alternative could be to change the mtspr() and mfspr() macros as we
proposed in the v1, but access to non-ultravisor privileged registers would
be performance impacted because we always would need to check if the
register is one of the few ultravisor registers that the kernel needs to
access.

Thanks,
Claudio


> cheers
>
Michael Ellerman July 19, 2019, 2:25 a.m. UTC | #5
Claudio Carvalho <cclaudio@linux.ibm.com> writes:
> On 7/11/19 9:57 AM, Michael Ellerman wrote:
>>>  static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
>>> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> index 8904aa1243d8..da6a6b76a040 100644
>>> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
>>> @@ -656,8 +656,10 @@ void radix__early_init_mmu_secondary(void)
>>>  		lpcr = mfspr(SPRN_LPCR);
>>>  		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
>>>  
>>> -		mtspr(SPRN_PTCR,
>>> -		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
>>> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>>> +			mtspr(SPRN_PTCR, __pa(partition_tb) |
>>> +			      (PATB_SIZE_SHIFT - 12));
>>> +
>>>  		radix_init_amor();
>>>  	}
>>>  
>>> @@ -673,7 +675,8 @@ void radix__mmu_cleanup_all(void)
>>>  	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
>>>  		lpcr = mfspr(SPRN_LPCR);
>>>  		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
>>> -		mtspr(SPRN_PTCR, 0);
>>> +		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
>>> +			mtspr(SPRN_PTCR, 0);
>>>  		powernv_set_nmmu_ptcr(0);
>>>  		radix__flush_tlb_all();
>>>  	}
>> There's four of these case where we skip touching the PTCR, which is
>> right on the borderline of warranting an accessor. I guess we can do it
>> as a cleanup later.
>
> I agree.
>
> Since the kernel doesn't need to access a big number of ultravisor
> privileged registers, maybe we can define mtspr_<reg> and mfspr_<reg>
> inline functions that in ultravisor.h that skip touching the register if an
> ultravisor is present and and the register is ultravisor privileged. Thus,
> we don't need to replicate comments and that also would make it easier for
> developers to know what are the ultravisor privileged registers.
>
> Something like this:
>
> --- a/arch/powerpc/include/asm/ultravisor.h
> +++ b/arch/powerpc/include/asm/ultravisor.h
> @@ -10,10 +10,21 @@
>  
>  #include <asm/ultravisor-api.h>
>  #include <asm/asm-prototypes.h>
> +#include <asm/reg.h>
>  
>  int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
>                                   int depth, void *data);
>  
> +static inline void mtspr_ptcr(unsigned long val)
> +{
> +       /*
> +        * If the ultravisor firmware is present, it maintains the partition
> +        * table. PTCR becomes ultravisor privileged only for writing.
> +        */
> +       if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
> +               mtspr(SPRN_PTCR, val);
> +}
+
>  static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
>  {
>         return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c
> b/arch/powerpc/mm/book3s64/pgtable.c
> index e1bbc48e730f..25156f9dfde8 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -220,7 +220,7 @@ void __init mmu_partition_table_init(void)
>          * 64 K size.
>          */
>         ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
> -       mtspr(SPRN_PTCR, ptcr);
> +       mtspr_ptcr(ptcr);
>         powernv_set_nmmu_ptcr(ptcr);
>  }
>
> What do you think?

I don't think that's actually clearer.

If the logic was always:

  if (ultravisor)
     do_ucall()
  else
     mtspr()

Then a wrapper called eg. set_ptcr() would make sense.

But because in some cases you do a ucall and some you don't, I don't
think it helps to hide that in an accessor like above.

That is confusing to a reader who sees all this code to setup a value
and then the write to PTCR does nothing.

And in fact you didn't explain why it's OK for those cases to not do the
write at all.

> An alternative could be to change the mtspr() and mfspr() macros as we
> proposed in the v1, but access to non-ultravisor privileged registers would
> be performance impacted because we always would need to check if the
> register is one of the few ultravisor registers that the kernel needs to
> access.

Yeah that and it would be very confusing to a reader who sees:

    ptcr = ...;
    mtspr(SPRN_PTCR, ptcr);
    ...

And then they discover the mtspr does *nothing* when the Ultravisor is
enabled.

cheers

Patch
diff mbox series

diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
index 49e766adabc7..141940771add 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -15,6 +15,9 @@ 
 #define U_SUCCESS		H_SUCCESS
 #define U_FUNCTION		H_FUNCTION
 #define U_PARAMETER		H_PARAMETER
+#define U_PERMISSION		H_PERMISSION
 
-#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
+/* opcodes */
+#define UV_WRITE_PATE			0xF104
 
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
index a78a2dacfd0b..996c1efd6c6d 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -12,6 +12,8 @@ 
 
 #if !defined(__ASSEMBLY__)
 
+#include <linux/types.h>
+
 /* Internal functions */
 extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
 					 int depth, void *data);
@@ -28,8 +30,20 @@  extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
  */
 #if defined(CONFIG_PPC_POWERNV)
 long ucall(unsigned long opcode, unsigned long *retbuf, ...);
+#else
+static long ucall(unsigned long opcode, unsigned long *retbuf, ...)
+{
+	return U_NOT_AVAILABLE;
+}
 #endif
 
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+	unsigned long retbuf[UCALL_BUFSIZE];
+
+	return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 1ff451892d7f..220a4e133240 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1080,9 +1080,10 @@  void hash__early_init_mmu_secondary(void)
 
 		if (!cpu_has_feature(CPU_FTR_ARCH_300))
 			mtspr(SPRN_SDR1, _SDR1);
-		else
+		else if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
 			mtspr(SPRN_PTCR,
 			      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+
 	}
 	/* Initialize SLB */
 	slb_initialize();
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index ad3dd977c22d..224c5c7c2e3d 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -16,6 +16,8 @@ 
 #include <asm/tlb.h>
 #include <asm/trace.h>
 #include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
 
 #include <mm/mmu_decl.h>
 #include <trace/events/thp.h>
@@ -206,12 +208,25 @@  void __init mmu_partition_table_init(void)
 	 * 64 K size.
 	 */
 	ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
-	mtspr(SPRN_PTCR, ptcr);
+	/*
+	 * If ultravisor is available, it is responsible for creating and
+	 * managing partition table
+	 */
+	if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+		mtspr(SPRN_PTCR, ptcr);
+
+	/*
+	 * Since nestMMU cannot access secure memory. Create
+	 * and manage our own partition table. This table
+	 * contains entries for nonsecure and hypervisor
+	 * partition.
+	 */
 	powernv_set_nmmu_ptcr(ptcr);
 }
 
-void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
-				   unsigned long dw1)
+static void __mmu_partition_table_set_entry(unsigned int lpid,
+					    unsigned long dw0,
+					    unsigned long dw1)
 {
 	unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
 
@@ -238,6 +253,19 @@  void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
 	/* do we need fixup here ?*/
 	asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+				  unsigned long dw1)
+{
+	__mmu_partition_table_set_entry(lpid, dw0, dw1);
+
+	if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+		uv_register_pate(lpid, dw0, dw1);
+		pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+			dw0, dw1);
+	}
+}
+
 EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
 
 static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 8904aa1243d8..da6a6b76a040 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -656,8 +656,10 @@  void radix__early_init_mmu_secondary(void)
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
 
-		mtspr(SPRN_PTCR,
-		      __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+			mtspr(SPRN_PTCR, __pa(partition_tb) |
+			      (PATB_SIZE_SHIFT - 12));
+
 		radix_init_amor();
 	}
 
@@ -673,7 +675,8 @@  void radix__mmu_cleanup_all(void)
 	if (!firmware_has_feature(FW_FEATURE_LPAR)) {
 		lpcr = mfspr(SPRN_LPCR);
 		mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
-		mtspr(SPRN_PTCR, 0);
+		if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+			mtspr(SPRN_PTCR, 0);
 		powernv_set_nmmu_ptcr(0);
 		radix__flush_tlb_all();
 	}