diff mbox series

[v8,5/8] KVM: Register/unregister the guest private memory regions

Message ID 20220915142913.2213336-6-chao.p.peng@linux.intel.com
State New
Headers show
Series KVM: mm: fd-based approach for supporting KVM | expand

Commit Message

Chao Peng Sept. 15, 2022, 2:29 p.m. UTC
If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
ioctls. The patch reuses existing SEV ioctl number but differs that the
address in the region for KVM_PRIVATE_MEM case is gpa while for SEV case
it's hva. Which usages should the ioctls go is determined by the newly
added kvm_arch_has_private_mem(). Architecture which supports
KVM_PRIVATE_MEM should override this function.

The current implementation defaults all memory to private. The shared
memory regions are stored in a xarray variable for memory efficiency and
zapping existing memory mappings is also a side effect of these two
ioctls when defined.

Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 Documentation/virt/kvm/api.rst  | 17 ++++++--
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.h              |  2 -
 include/linux/kvm_host.h        | 13 ++++++
 virt/kvm/kvm_main.c             | 73 +++++++++++++++++++++++++++++++++
 5 files changed, 100 insertions(+), 6 deletions(-)

Comments

Fuad Tabba Sept. 26, 2022, 10:36 a.m. UTC | #1
Hi Chao,

On Thu, Sep 15, 2022 at 3:38 PM Chao Peng <chao.p.peng@linux.intel.com> wrote:
>
> If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
> guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
> ioctls. The patch reuses existing SEV ioctl number but differs that the
> address in the region for KVM_PRIVATE_MEM case is gpa while for SEV case
> it's hva. Which usages should the ioctls go is determined by the newly
> added kvm_arch_has_private_mem(). Architecture which supports
> KVM_PRIVATE_MEM should override this function.
>
> The current implementation defaults all memory to private. The shared
> memory regions are stored in a xarray variable for memory efficiency and
> zapping existing memory mappings is also a side effect of these two
> ioctls when defined.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  Documentation/virt/kvm/api.rst  | 17 ++++++--
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu.h              |  2 -
>  include/linux/kvm_host.h        | 13 ++++++
>  virt/kvm/kvm_main.c             | 73 +++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 1a6c003b2a0b..c0f800d04ffc 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4715,10 +4715,19 @@ Documentation/virt/kvm/x86/amd-memory-encryption.rst.
>  This ioctl can be used to register a guest memory region which may
>  contain encrypted data (e.g. guest RAM, SMRAM etc).
>
> -It is used in the SEV-enabled guest. When encryption is enabled, a guest
> -memory region may contain encrypted data. The SEV memory encryption
> -engine uses a tweak such that two identical plaintext pages, each at
> -different locations will have differing ciphertexts. So swapping or
> +Currently this ioctl supports registering memory regions for two usages:
> +private memory and SEV-encrypted memory.
> +
> +When private memory is enabled, this ioctl is used to register guest private
> +memory region and the addr/size of kvm_enc_region represents guest physical
> +address (GPA). In this usage, this ioctl zaps the existing guest memory
> +mappings in KVM that fallen into the region.
> +
> +When SEV-encrypted memory is enabled, this ioctl is used to register guest
> +memory region which may contain encrypted data for a SEV-enabled guest. The
> +addr/size of kvm_enc_region represents userspace address (HVA). The SEV
> +memory encryption engine uses a tweak such that two identical plaintext pages,
> +each at different locations will have differing ciphertexts. So swapping or
>  moving ciphertext of those pages will not result in plaintext being
>  swapped. So relocating (or migrating) physical backing pages for the SEV
>  guest will require some additional steps.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2c96c43c313a..cfad6ba1a70a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -37,6 +37,7 @@
>  #include <asm/hyperv-tlfs.h>
>
>  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ZAP_GFN_RANGE
>
>  #define KVM_MAX_VCPUS 1024
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 6bdaacb6faa0..c94b620bf94b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -211,8 +211,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>         return -(u32)fault & errcode;
>  }
>
> -void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> -
>  int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
>
>  int kvm_mmu_post_init_vm(struct kvm *kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2125b50f6345..d65690cae80b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -260,6 +260,15 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
>  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
>  #endif
>
> +#ifdef __KVM_HAVE_ZAP_GFN_RANGE
> +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> +#else
> +static inline void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start
> +                                                     gfn_t gfn_end)

Missing a comma after gfn_start.

Cheers,
/fuad



> +{
> +}
> +#endif
> +
>  enum {
>         OUTSIDE_GUEST_MODE,
>         IN_GUEST_MODE,
> @@ -795,6 +804,9 @@ struct kvm {
>         struct notifier_block pm_notifier;
>  #endif
>         char stats_id[KVM_STATS_NAME_SIZE];
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       struct xarray mem_attr_array;
> +#endif
>  };
>
>  #define kvm_err(fmt, ...) \
> @@ -1454,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_arch_post_init_vm(struct kvm *kvm);
>  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
>  int kvm_arch_create_vm_debugfs(struct kvm *kvm);
> +bool kvm_arch_has_private_mem(struct kvm *kvm);
>
>  #ifndef __KVM_HAVE_ARCH_VM_ALLOC
>  /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index fa9dd2d2c001..de5cce8c82c7 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -937,6 +937,47 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +#define KVM_MEM_ATTR_SHARED    0x0001
> +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> +                                    bool is_private)
> +{
> +       gfn_t start, end;
> +       unsigned long index;
> +       void *entry;
> +       int r;
> +
> +       if (size == 0 || gpa + size < gpa)
> +               return -EINVAL;
> +       if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
> +               return -EINVAL;
> +
> +       start = gpa >> PAGE_SHIFT;
> +       end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
> +
> +       /*
> +        * Guest memory defaults to private, kvm->mem_attr_array only stores
> +        * shared memory.
> +        */
> +       entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
> +
> +       for (index = start; index < end; index++) {
> +               r = xa_err(xa_store(&kvm->mem_attr_array, index, entry,
> +                                   GFP_KERNEL_ACCOUNT));
> +               if (r)
> +                       goto err;
> +       }
> +
> +       kvm_zap_gfn_range(kvm, start, end);
> +
> +       return r;
> +err:
> +       for (; index > start; index--)
> +               xa_erase(&kvm->mem_attr_array, index);
> +       return r;
> +}
> +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>  static int kvm_pm_notifier_call(struct notifier_block *bl,
>                                 unsigned long state,
> @@ -1165,6 +1206,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
>         spin_lock_init(&kvm->mn_invalidate_lock);
>         rcuwait_init(&kvm->mn_memslots_update_rcuwait);
>         xa_init(&kvm->vcpu_array);
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       xa_init(&kvm->mem_attr_array);
> +#endif
>
>         INIT_LIST_HEAD(&kvm->gpc_list);
>         spin_lock_init(&kvm->gpc_lock);
> @@ -1338,6 +1382,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
>                 kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
>                 kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
>         }
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       xa_destroy(&kvm->mem_attr_array);
> +#endif
>         cleanup_srcu_struct(&kvm->irq_srcu);
>         cleanup_srcu_struct(&kvm->srcu);
>         kvm_arch_free_vm(kvm);
> @@ -1541,6 +1588,11 @@ static void kvm_replace_memslot(struct kvm *kvm,
>         }
>  }
>
> +bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
> +{
> +       return false;
> +}
> +
>  static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
>  {
>         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> @@ -4703,6 +4755,24 @@ static long kvm_vm_ioctl(struct file *filp,
>                 r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
>                 break;
>         }
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       case KVM_MEMORY_ENCRYPT_REG_REGION:
> +       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> +               struct kvm_enc_region region;
> +               bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> +
> +               if (!kvm_arch_has_private_mem(kvm))
> +                       goto arch_vm_ioctl;
> +
> +               r = -EFAULT;
> +               if (copy_from_user(&region, argp, sizeof(region)))
> +                       goto out;
> +
> +               r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
> +                                             region.size, set);
> +               break;
> +       }
> +#endif
>         case KVM_GET_DIRTY_LOG: {
>                 struct kvm_dirty_log log;
>
> @@ -4856,6 +4926,9 @@ static long kvm_vm_ioctl(struct file *filp,
>                 r = kvm_vm_ioctl_get_stats_fd(kvm);
>                 break;
>         default:
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +arch_vm_ioctl:
> +#endif
>                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>         }
>  out:
> --
> 2.25.1
>
Chao Peng Sept. 26, 2022, 2:07 p.m. UTC | #2
On Mon, Sep 26, 2022 at 11:36:34AM +0100, Fuad Tabba wrote:
...

> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 2125b50f6345..d65690cae80b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -260,6 +260,15 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> >  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> >  #endif
> >
> > +#ifdef __KVM_HAVE_ZAP_GFN_RANGE
> > +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> > +#else
> > +static inline void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start
> > +                                                     gfn_t gfn_end)
> 
> Missing a comma after gfn_start.

Good catch, thanks!
Chao
> 
> Cheers,
> /fuad
Fuad Tabba Oct. 11, 2022, 9:48 a.m. UTC | #3
Hi,

On Thu, Sep 15, 2022 at 3:38 PM Chao Peng <chao.p.peng@linux.intel.com> wrote:
>
> If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
> guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
> ioctls. The patch reuses existing SEV ioctl number but differs that the
> address in the region for KVM_PRIVATE_MEM case is gpa while for SEV case
> it's hva. Which usages should the ioctls go is determined by the newly
> added kvm_arch_has_private_mem(). Architecture which supports
> KVM_PRIVATE_MEM should override this function.
>
> The current implementation defaults all memory to private. The shared
> memory regions are stored in a xarray variable for memory efficiency and
> zapping existing memory mappings is also a side effect of these two
> ioctls when defined.
>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  Documentation/virt/kvm/api.rst  | 17 ++++++--
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu.h              |  2 -
>  include/linux/kvm_host.h        | 13 ++++++
>  virt/kvm/kvm_main.c             | 73 +++++++++++++++++++++++++++++++++
>  5 files changed, 100 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 1a6c003b2a0b..c0f800d04ffc 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -4715,10 +4715,19 @@ Documentation/virt/kvm/x86/amd-memory-encryption.rst.
>  This ioctl can be used to register a guest memory region which may
>  contain encrypted data (e.g. guest RAM, SMRAM etc).
>
> -It is used in the SEV-enabled guest. When encryption is enabled, a guest
> -memory region may contain encrypted data. The SEV memory encryption
> -engine uses a tweak such that two identical plaintext pages, each at
> -different locations will have differing ciphertexts. So swapping or
> +Currently this ioctl supports registering memory regions for two usages:
> +private memory and SEV-encrypted memory.
> +
> +When private memory is enabled, this ioctl is used to register guest private
> +memory region and the addr/size of kvm_enc_region represents guest physical
> +address (GPA). In this usage, this ioctl zaps the existing guest memory
> +mappings in KVM that fallen into the region.
> +
> +When SEV-encrypted memory is enabled, this ioctl is used to register guest
> +memory region which may contain encrypted data for a SEV-enabled guest. The
> +addr/size of kvm_enc_region represents userspace address (HVA). The SEV
> +memory encryption engine uses a tweak such that two identical plaintext pages,
> +each at different locations will have differing ciphertexts. So swapping or
>  moving ciphertext of those pages will not result in plaintext being
>  swapped. So relocating (or migrating) physical backing pages for the SEV
>  guest will require some additional steps.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 2c96c43c313a..cfad6ba1a70a 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -37,6 +37,7 @@
>  #include <asm/hyperv-tlfs.h>
>
>  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> +#define __KVM_HAVE_ZAP_GFN_RANGE
>
>  #define KVM_MAX_VCPUS 1024
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 6bdaacb6faa0..c94b620bf94b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -211,8 +211,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>         return -(u32)fault & errcode;
>  }
>
> -void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> -
>  int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
>
>  int kvm_mmu_post_init_vm(struct kvm *kvm);
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2125b50f6345..d65690cae80b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -260,6 +260,15 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
>  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
>  #endif
>
> +#ifdef __KVM_HAVE_ZAP_GFN_RANGE
> +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> +#else
> +static inline void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start
> +                                                     gfn_t gfn_end)
> +{
> +}
> +#endif
> +
>  enum {
>         OUTSIDE_GUEST_MODE,
>         IN_GUEST_MODE,
> @@ -795,6 +804,9 @@ struct kvm {
>         struct notifier_block pm_notifier;
>  #endif
>         char stats_id[KVM_STATS_NAME_SIZE];
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       struct xarray mem_attr_array;
> +#endif
>  };
>
>  #define kvm_err(fmt, ...) \
> @@ -1454,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_arch_post_init_vm(struct kvm *kvm);
>  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
>  int kvm_arch_create_vm_debugfs(struct kvm *kvm);
> +bool kvm_arch_has_private_mem(struct kvm *kvm);
>
>  #ifndef __KVM_HAVE_ARCH_VM_ALLOC
>  /*
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index fa9dd2d2c001..de5cce8c82c7 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -937,6 +937,47 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
>
>  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
>
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +#define KVM_MEM_ATTR_SHARED    0x0001
> +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> +                                    bool is_private)
> +{

I wonder if this ioctl should be implemented as an arch-specific
ioctl. In this patch it performs some actions that pKVM might not need
or might want to do differently.

pKVM tracks the sharing status in the stage-2 page table's software
bits, so it can avoid the overhead of using mem_attr_array.

Also, this ioctl calls kvm_zap_gfn_range(), as does the invalidation
notifier (introduced in patch 8). For pKVM, the kind of zapping (or
the information conveyed to the hypervisor) might need to be different
depending on the cause; whether it's invalidation or change of sharing
status.

Thanks,
/fuad


> +       gfn_t start, end;
> +       unsigned long index;
> +       void *entry;
> +       int r;
> +
> +       if (size == 0 || gpa + size < gpa)
> +               return -EINVAL;
> +       if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
> +               return -EINVAL;
> +
> +       start = gpa >> PAGE_SHIFT;
> +       end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
> +
> +       /*
> +        * Guest memory defaults to private, kvm->mem_attr_array only stores
> +        * shared memory.
> +        */
> +       entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
> +
> +       for (index = start; index < end; index++) {
> +               r = xa_err(xa_store(&kvm->mem_attr_array, index, entry,
> +                                   GFP_KERNEL_ACCOUNT));
> +               if (r)
> +                       goto err;
> +       }
> +
> +       kvm_zap_gfn_range(kvm, start, end);
> +
> +       return r;
> +err:
> +       for (; index > start; index--)
> +               xa_erase(&kvm->mem_attr_array, index);
> +       return r;
> +}
> +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> +
>  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
>  static int kvm_pm_notifier_call(struct notifier_block *bl,
>                                 unsigned long state,
> @@ -1165,6 +1206,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
>         spin_lock_init(&kvm->mn_invalidate_lock);
>         rcuwait_init(&kvm->mn_memslots_update_rcuwait);
>         xa_init(&kvm->vcpu_array);
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       xa_init(&kvm->mem_attr_array);
> +#endif
>
>         INIT_LIST_HEAD(&kvm->gpc_list);
>         spin_lock_init(&kvm->gpc_lock);
> @@ -1338,6 +1382,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
>                 kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
>                 kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
>         }
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       xa_destroy(&kvm->mem_attr_array);
> +#endif
>         cleanup_srcu_struct(&kvm->irq_srcu);
>         cleanup_srcu_struct(&kvm->srcu);
>         kvm_arch_free_vm(kvm);
> @@ -1541,6 +1588,11 @@ static void kvm_replace_memslot(struct kvm *kvm,
>         }
>  }
>
> +bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
> +{
> +       return false;
> +}
> +
>  static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
>  {
>         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> @@ -4703,6 +4755,24 @@ static long kvm_vm_ioctl(struct file *filp,
>                 r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
>                 break;
>         }
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +       case KVM_MEMORY_ENCRYPT_REG_REGION:
> +       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> +               struct kvm_enc_region region;
> +               bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> +
> +               if (!kvm_arch_has_private_mem(kvm))
> +                       goto arch_vm_ioctl;
> +
> +               r = -EFAULT;
> +               if (copy_from_user(&region, argp, sizeof(region)))
> +                       goto out;
> +
> +               r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
> +                                             region.size, set);
> +               break;
> +       }
> +#endif
>         case KVM_GET_DIRTY_LOG: {
>                 struct kvm_dirty_log log;
>
> @@ -4856,6 +4926,9 @@ static long kvm_vm_ioctl(struct file *filp,
>                 r = kvm_vm_ioctl_get_stats_fd(kvm);
>                 break;
>         default:
> +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> +arch_vm_ioctl:
> +#endif
>                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>         }
>  out:
> --
> 2.25.1
>
Chao Peng Oct. 12, 2022, 2:35 a.m. UTC | #4
On Tue, Oct 11, 2022 at 10:48:58AM +0100, Fuad Tabba wrote:
> Hi,
> 
> On Thu, Sep 15, 2022 at 3:38 PM Chao Peng <chao.p.peng@linux.intel.com> wrote:
> >
> > If CONFIG_HAVE_KVM_PRIVATE_MEM=y, userspace can register/unregister the
> > guest private memory regions through KVM_MEMORY_ENCRYPT_{UN,}REG_REGION
> > ioctls. The patch reuses existing SEV ioctl number but differs that the
> > address in the region for KVM_PRIVATE_MEM case is gpa while for SEV case
> > it's hva. Which usages should the ioctls go is determined by the newly
> > added kvm_arch_has_private_mem(). Architecture which supports
> > KVM_PRIVATE_MEM should override this function.
> >
> > The current implementation defaults all memory to private. The shared
> > memory regions are stored in a xarray variable for memory efficiency and
> > zapping existing memory mappings is also a side effect of these two
> > ioctls when defined.
> >
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> > ---
> >  Documentation/virt/kvm/api.rst  | 17 ++++++--
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/mmu.h              |  2 -
> >  include/linux/kvm_host.h        | 13 ++++++
> >  virt/kvm/kvm_main.c             | 73 +++++++++++++++++++++++++++++++++
> >  5 files changed, 100 insertions(+), 6 deletions(-)
> >
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index 1a6c003b2a0b..c0f800d04ffc 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -4715,10 +4715,19 @@ Documentation/virt/kvm/x86/amd-memory-encryption.rst.
> >  This ioctl can be used to register a guest memory region which may
> >  contain encrypted data (e.g. guest RAM, SMRAM etc).
> >
> > -It is used in the SEV-enabled guest. When encryption is enabled, a guest
> > -memory region may contain encrypted data. The SEV memory encryption
> > -engine uses a tweak such that two identical plaintext pages, each at
> > -different locations will have differing ciphertexts. So swapping or
> > +Currently this ioctl supports registering memory regions for two usages:
> > +private memory and SEV-encrypted memory.
> > +
> > +When private memory is enabled, this ioctl is used to register guest private
> > +memory region and the addr/size of kvm_enc_region represents guest physical
> > +address (GPA). In this usage, this ioctl zaps the existing guest memory
> > +mappings in KVM that fallen into the region.
> > +
> > +When SEV-encrypted memory is enabled, this ioctl is used to register guest
> > +memory region which may contain encrypted data for a SEV-enabled guest. The
> > +addr/size of kvm_enc_region represents userspace address (HVA). The SEV
> > +memory encryption engine uses a tweak such that two identical plaintext pages,
> > +each at different locations will have differing ciphertexts. So swapping or
> >  moving ciphertext of those pages will not result in plaintext being
> >  swapped. So relocating (or migrating) physical backing pages for the SEV
> >  guest will require some additional steps.
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 2c96c43c313a..cfad6ba1a70a 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -37,6 +37,7 @@
> >  #include <asm/hyperv-tlfs.h>
> >
> >  #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
> > +#define __KVM_HAVE_ZAP_GFN_RANGE
> >
> >  #define KVM_MAX_VCPUS 1024
> >
> > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> > index 6bdaacb6faa0..c94b620bf94b 100644
> > --- a/arch/x86/kvm/mmu.h
> > +++ b/arch/x86/kvm/mmu.h
> > @@ -211,8 +211,6 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> >         return -(u32)fault & errcode;
> >  }
> >
> > -void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> > -
> >  int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
> >
> >  int kvm_mmu_post_init_vm(struct kvm *kvm);
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 2125b50f6345..d65690cae80b 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -260,6 +260,15 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> >  bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> >  #endif
> >
> > +#ifdef __KVM_HAVE_ZAP_GFN_RANGE
> > +void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> > +#else
> > +static inline void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start
> > +                                                     gfn_t gfn_end)
> > +{
> > +}
> > +#endif
> > +
> >  enum {
> >         OUTSIDE_GUEST_MODE,
> >         IN_GUEST_MODE,
> > @@ -795,6 +804,9 @@ struct kvm {
> >         struct notifier_block pm_notifier;
> >  #endif
> >         char stats_id[KVM_STATS_NAME_SIZE];
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +       struct xarray mem_attr_array;
> > +#endif
> >  };
> >
> >  #define kvm_err(fmt, ...) \
> > @@ -1454,6 +1466,7 @@ bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
> >  int kvm_arch_post_init_vm(struct kvm *kvm);
> >  void kvm_arch_pre_destroy_vm(struct kvm *kvm);
> >  int kvm_arch_create_vm_debugfs(struct kvm *kvm);
> > +bool kvm_arch_has_private_mem(struct kvm *kvm);
> >
> >  #ifndef __KVM_HAVE_ARCH_VM_ALLOC
> >  /*
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index fa9dd2d2c001..de5cce8c82c7 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -937,6 +937,47 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
> >
> >  #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
> >
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +#define KVM_MEM_ATTR_SHARED    0x0001
> > +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> > +                                    bool is_private)
> > +{
> 
> I wonder if this ioctl should be implemented as an arch-specific
> ioctl. In this patch it performs some actions that pKVM might not need
> or might want to do differently.

I think it's doable. We can provide the mem_attr_array kind thing in
common code and let arch code decide to use it or not. Currently
mem_attr_array is defined in the struct kvm, if those bytes are
unnecessary for pKVM it can even be moved to arch definition, but that
also loses the potential code sharing for confidential usages in other
non-architectures, e.g. if ARM also supports such usage. Or it can be
provided through a different CONFIG_ instead of
CONFIG_HAVE_KVM_PRIVATE_MEM.

Thanks,
Chao
> 
> pKVM tracks the sharing status in the stage-2 page table's software
> bits, so it can avoid the overhead of using mem_attr_array.
> 
> Also, this ioctl calls kvm_zap_gfn_range(), as does the invalidation
> notifier (introduced in patch 8). For pKVM, the kind of zapping (or
> the information conveyed to the hypervisor) might need to be different
> depending on the cause; whether it's invalidation or change of sharing
> status.

> 
> Thanks,
> /fuad
> 
> 
> > +       gfn_t start, end;
> > +       unsigned long index;
> > +       void *entry;
> > +       int r;
> > +
> > +       if (size == 0 || gpa + size < gpa)
> > +               return -EINVAL;
> > +       if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
> > +               return -EINVAL;
> > +
> > +       start = gpa >> PAGE_SHIFT;
> > +       end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
> > +
> > +       /*
> > +        * Guest memory defaults to private, kvm->mem_attr_array only stores
> > +        * shared memory.
> > +        */
> > +       entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
> > +
> > +       for (index = start; index < end; index++) {
> > +               r = xa_err(xa_store(&kvm->mem_attr_array, index, entry,
> > +                                   GFP_KERNEL_ACCOUNT));
> > +               if (r)
> > +                       goto err;
> > +       }
> > +
> > +       kvm_zap_gfn_range(kvm, start, end);
> > +
> > +       return r;
> > +err:
> > +       for (; index > start; index--)
> > +               xa_erase(&kvm->mem_attr_array, index);
> > +       return r;
> > +}
> > +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> > +
> >  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
> >  static int kvm_pm_notifier_call(struct notifier_block *bl,
> >                                 unsigned long state,
> > @@ -1165,6 +1206,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
> >         spin_lock_init(&kvm->mn_invalidate_lock);
> >         rcuwait_init(&kvm->mn_memslots_update_rcuwait);
> >         xa_init(&kvm->vcpu_array);
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +       xa_init(&kvm->mem_attr_array);
> > +#endif
> >
> >         INIT_LIST_HEAD(&kvm->gpc_list);
> >         spin_lock_init(&kvm->gpc_lock);
> > @@ -1338,6 +1382,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
> >                 kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
> >                 kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
> >         }
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +       xa_destroy(&kvm->mem_attr_array);
> > +#endif
> >         cleanup_srcu_struct(&kvm->irq_srcu);
> >         cleanup_srcu_struct(&kvm->srcu);
> >         kvm_arch_free_vm(kvm);
> > @@ -1541,6 +1588,11 @@ static void kvm_replace_memslot(struct kvm *kvm,
> >         }
> >  }
> >
> > +bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
> > +{
> > +       return false;
> > +}
> > +
> >  static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> >  {
> >         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> > @@ -4703,6 +4755,24 @@ static long kvm_vm_ioctl(struct file *filp,
> >                 r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
> >                 break;
> >         }
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +       case KVM_MEMORY_ENCRYPT_REG_REGION:
> > +       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > +               struct kvm_enc_region region;
> > +               bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > +
> > +               if (!kvm_arch_has_private_mem(kvm))
> > +                       goto arch_vm_ioctl;
> > +
> > +               r = -EFAULT;
> > +               if (copy_from_user(&region, argp, sizeof(region)))
> > +                       goto out;
> > +
> > +               r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
> > +                                             region.size, set);
> > +               break;
> > +       }
> > +#endif
> >         case KVM_GET_DIRTY_LOG: {
> >                 struct kvm_dirty_log log;
> >
> > @@ -4856,6 +4926,9 @@ static long kvm_vm_ioctl(struct file *filp,
> >                 r = kvm_vm_ioctl_get_stats_fd(kvm);
> >                 break;
> >         default:
> > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > +arch_vm_ioctl:
> > +#endif
> >                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> >         }
> >  out:
> > --
> > 2.25.1
> >
Fuad Tabba Oct. 17, 2022, 10:15 a.m. UTC | #5
Hi,

> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > +#define KVM_MEM_ATTR_SHARED    0x0001
> > > +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> > > +                                    bool is_private)
> > > +{
> >
> > I wonder if this ioctl should be implemented as an arch-specific
> > ioctl. In this patch it performs some actions that pKVM might not need
> > or might want to do differently.
>
> I think it's doable. We can provide the mem_attr_array kind thing in
> common code and let arch code decide to use it or not. Currently
> mem_attr_array is defined in the struct kvm, if those bytes are
> unnecessary for pKVM it can even be moved to arch definition, but that
> also loses the potential code sharing for confidential usages in other
> non-architectures, e.g. if ARM also supports such usage. Or it can be
> provided through a different CONFIG_ instead of
> CONFIG_HAVE_KVM_PRIVATE_MEM.

This sounds good. Thank you.


/fuad

> Thanks,
> Chao
> >
> > pKVM tracks the sharing status in the stage-2 page table's software
> > bits, so it can avoid the overhead of using mem_attr_array.
> >
> > Also, this ioctl calls kvm_zap_gfn_range(), as does the invalidation
> > notifier (introduced in patch 8). For pKVM, the kind of zapping (or
> > the information conveyed to the hypervisor) might need to be different
> > depending on the cause; whether it's invalidation or change of sharing
> > status.
>
> >
> > Thanks,
> > /fuad
> >
> >
> > > +       gfn_t start, end;
> > > +       unsigned long index;
> > > +       void *entry;
> > > +       int r;
> > > +
> > > +       if (size == 0 || gpa + size < gpa)
> > > +               return -EINVAL;
> > > +       if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
> > > +               return -EINVAL;
> > > +
> > > +       start = gpa >> PAGE_SHIFT;
> > > +       end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
> > > +
> > > +       /*
> > > +        * Guest memory defaults to private, kvm->mem_attr_array only stores
> > > +        * shared memory.
> > > +        */
> > > +       entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
> > > +
> > > +       for (index = start; index < end; index++) {
> > > +               r = xa_err(xa_store(&kvm->mem_attr_array, index, entry,
> > > +                                   GFP_KERNEL_ACCOUNT));
> > > +               if (r)
> > > +                       goto err;
> > > +       }
> > > +
> > > +       kvm_zap_gfn_range(kvm, start, end);
> > > +
> > > +       return r;
> > > +err:
> > > +       for (; index > start; index--)
> > > +               xa_erase(&kvm->mem_attr_array, index);
> > > +       return r;
> > > +}
> > > +#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
> > > +
> > >  #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
> > >  static int kvm_pm_notifier_call(struct notifier_block *bl,
> > >                                 unsigned long state,
> > > @@ -1165,6 +1206,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
> > >         spin_lock_init(&kvm->mn_invalidate_lock);
> > >         rcuwait_init(&kvm->mn_memslots_update_rcuwait);
> > >         xa_init(&kvm->vcpu_array);
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > +       xa_init(&kvm->mem_attr_array);
> > > +#endif
> > >
> > >         INIT_LIST_HEAD(&kvm->gpc_list);
> > >         spin_lock_init(&kvm->gpc_lock);
> > > @@ -1338,6 +1382,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
> > >                 kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
> > >                 kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
> > >         }
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > +       xa_destroy(&kvm->mem_attr_array);
> > > +#endif
> > >         cleanup_srcu_struct(&kvm->irq_srcu);
> > >         cleanup_srcu_struct(&kvm->srcu);
> > >         kvm_arch_free_vm(kvm);
> > > @@ -1541,6 +1588,11 @@ static void kvm_replace_memslot(struct kvm *kvm,
> > >         }
> > >  }
> > >
> > > +bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
> > > +{
> > > +       return false;
> > > +}
> > > +
> > >  static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
> > >  {
> > >         u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
> > > @@ -4703,6 +4755,24 @@ static long kvm_vm_ioctl(struct file *filp,
> > >                 r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
> > >                 break;
> > >         }
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > +       case KVM_MEMORY_ENCRYPT_REG_REGION:
> > > +       case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
> > > +               struct kvm_enc_region region;
> > > +               bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
> > > +
> > > +               if (!kvm_arch_has_private_mem(kvm))
> > > +                       goto arch_vm_ioctl;
> > > +
> > > +               r = -EFAULT;
> > > +               if (copy_from_user(&region, argp, sizeof(region)))
> > > +                       goto out;
> > > +
> > > +               r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
> > > +                                             region.size, set);
> > > +               break;
> > > +       }
> > > +#endif
> > >         case KVM_GET_DIRTY_LOG: {
> > >                 struct kvm_dirty_log log;
> > >
> > > @@ -4856,6 +4926,9 @@ static long kvm_vm_ioctl(struct file *filp,
> > >                 r = kvm_vm_ioctl_get_stats_fd(kvm);
> > >                 break;
> > >         default:
> > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > +arch_vm_ioctl:
> > > +#endif
> > >                 r = kvm_arch_vm_ioctl(filp, ioctl, arg);
> > >         }
> > >  out:
> > > --
> > > 2.25.1
> > >
Sean Christopherson Oct. 17, 2022, 10:17 p.m. UTC | #6
On Mon, Oct 17, 2022, Fuad Tabba wrote:
> Hi,
> 
> > > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > > +#define KVM_MEM_ATTR_SHARED    0x0001
> > > > +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> > > > +                                    bool is_private)
> > > > +{
> > >
> > > I wonder if this ioctl should be implemented as an arch-specific
> > > ioctl. In this patch it performs some actions that pKVM might not need
> > > or might want to do differently.
> >
> > I think it's doable. We can provide the mem_attr_array kind thing in
> > common code and let arch code decide to use it or not. Currently
> > mem_attr_array is defined in the struct kvm, if those bytes are
> > unnecessary for pKVM it can even be moved to arch definition, but that
> > also loses the potential code sharing for confidential usages in other
> > non-architectures, e.g. if ARM also supports such usage. Or it can be
> > provided through a different CONFIG_ instead of
> > CONFIG_HAVE_KVM_PRIVATE_MEM.
> 
> This sounds good. Thank you.

I like the idea of a separate Kconfig, e.g. CONFIG_KVM_GENERIC_PRIVATE_MEM or
something.  I highly doubt there will be any non-x86 users for multiple years,
if ever, but it would allow testing the private memory stuff on ARM (and any other
non-x86 arch) without needing full pKVM support and with only minor KVM
modifications, e.g. the x86 support[*] to test UPM without TDX is shaping up to be
trivial.

[*] https://lore.kernel.org/all/Y0mu1FKugNQG5T8K@google.com
Chao Peng Oct. 19, 2022, 1:23 p.m. UTC | #7
On Mon, Oct 17, 2022 at 10:17:45PM +0000, Sean Christopherson wrote:
> On Mon, Oct 17, 2022, Fuad Tabba wrote:
> > Hi,
> > 
> > > > > +#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
> > > > > +#define KVM_MEM_ATTR_SHARED    0x0001
> > > > > +static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
> > > > > +                                    bool is_private)
> > > > > +{
> > > >
> > > > I wonder if this ioctl should be implemented as an arch-specific
> > > > ioctl. In this patch it performs some actions that pKVM might not need
> > > > or might want to do differently.
> > >
> > > I think it's doable. We can provide the mem_attr_array kind thing in
> > > common code and let arch code decide to use it or not. Currently
> > > mem_attr_array is defined in the struct kvm, if those bytes are
> > > unnecessary for pKVM it can even be moved to arch definition, but that
> > > also loses the potential code sharing for confidential usages in other
> > > non-architectures, e.g. if ARM also supports such usage. Or it can be
> > > provided through a different CONFIG_ instead of
> > > CONFIG_HAVE_KVM_PRIVATE_MEM.
> > 
> > This sounds good. Thank you.
> 
> I like the idea of a separate Kconfig, e.g. CONFIG_KVM_GENERIC_PRIVATE_MEM or
> something.  I highly doubt there will be any non-x86 users for multiple years,
> if ever, but it would allow testing the private memory stuff on ARM (and any other
> non-x86 arch) without needing full pKVM support and with only minor KVM
> modifications, e.g. the x86 support[*] to test UPM without TDX is shaping up to be
> trivial.

CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.

Thanks,
Chao
> 
> [*] https://lore.kernel.org/all/Y0mu1FKugNQG5T8K@google.com
Fuad Tabba Oct. 19, 2022, 3:02 p.m. UTC | #8
> > > This sounds good. Thank you.
> >
> > I like the idea of a separate Kconfig, e.g. CONFIG_KVM_GENERIC_PRIVATE_MEM or
> > something.  I highly doubt there will be any non-x86 users for multiple years,
> > if ever, but it would allow testing the private memory stuff on ARM (and any other
> > non-x86 arch) without needing full pKVM support and with only minor KVM
> > modifications, e.g. the x86 support[*] to test UPM without TDX is shaping up to be
> > trivial.
>
> CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.

That sounds good to me, and just keeping the xarray isn't really an
issue for pKVM. We could end up using it instead of some of the other
structures we use for tracking.

Cheers,
/fuad

> Thanks,
> Chao
> >
> > [*] https://lore.kernel.org/all/Y0mu1FKugNQG5T8K@google.com
Sean Christopherson Oct. 19, 2022, 4:09 p.m. UTC | #9
On Wed, Oct 19, 2022, Fuad Tabba wrote:
> > > > This sounds good. Thank you.
> > >
> > > I like the idea of a separate Kconfig, e.g. CONFIG_KVM_GENERIC_PRIVATE_MEM or
> > > something.  I highly doubt there will be any non-x86 users for multiple years,
> > > if ever, but it would allow testing the private memory stuff on ARM (and any other
> > > non-x86 arch) without needing full pKVM support and with only minor KVM
> > > modifications, e.g. the x86 support[*] to test UPM without TDX is shaping up to be
> > > trivial.
> >
> > CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.
> 
> That sounds good to me, and just keeping the xarray isn't really an
> issue for pKVM.

The xarray won't exist for pKVM if the #ifdefs in this patch are changed from
CONFIG_HAVE_KVM_PRIVATE_MEM => CONFIG_KVM_GENERIC_PRIVATE_MEM.

> We could end up using it instead of some of the other
> structures we use for tracking.

I don't think pKVM should hijack the xarray for other purposes.  At best, it will
be confusing, at worst we'll end up with a mess if ARM ever supports the "generic"
implementation.
Fuad Tabba Oct. 19, 2022, 6:32 p.m. UTC | #10
On Wed, Oct 19, 2022 at 5:09 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Oct 19, 2022, Fuad Tabba wrote:
> > > > > This sounds good. Thank you.
> > > >
> > > > I like the idea of a separate Kconfig, e.g. CONFIG_KVM_GENERIC_PRIVATE_MEM or
> > > > something.  I highly doubt there will be any non-x86 users for multiple years,
> > > > if ever, but it would allow testing the private memory stuff on ARM (and any other
> > > > non-x86 arch) without needing full pKVM support and with only minor KVM
> > > > modifications, e.g. the x86 support[*] to test UPM without TDX is shaping up to be
> > > > trivial.
> > >
> > > CONFIG_KVM_GENERIC_PRIVATE_MEM looks good to me.
> >
> > That sounds good to me, and just keeping the xarray isn't really an
> > issue for pKVM.
>
> The xarray won't exist for pKVM if the #ifdefs in this patch are changed from
> CONFIG_HAVE_KVM_PRIVATE_MEM => CONFIG_KVM_GENERIC_PRIVATE_MEM.
>
> > We could end up using it instead of some of the other
> > structures we use for tracking.
>
> I don't think pKVM should hijack the xarray for other purposes.  At best, it will
> be confusing, at worst we'll end up with a mess if ARM ever supports the "generic"
> implementation.

Definitely wasn't referring to hijacking it for other purposes, which
is the main reason I wanted to clarify the documentation and the
naming of private_fd. Anyway, I'm glad to see that we're in agreement.
Once I've tightened the screws, I'll share the pKVM port as an RFC as
well.

Cheers,
/fuad
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1a6c003b2a0b..c0f800d04ffc 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4715,10 +4715,19 @@  Documentation/virt/kvm/x86/amd-memory-encryption.rst.
 This ioctl can be used to register a guest memory region which may
 contain encrypted data (e.g. guest RAM, SMRAM etc).
 
-It is used in the SEV-enabled guest. When encryption is enabled, a guest
-memory region may contain encrypted data. The SEV memory encryption
-engine uses a tweak such that two identical plaintext pages, each at
-different locations will have differing ciphertexts. So swapping or
+Currently this ioctl supports registering memory regions for two usages:
+private memory and SEV-encrypted memory.
+
+When private memory is enabled, this ioctl is used to register guest private
+memory region and the addr/size of kvm_enc_region represents guest physical
+address (GPA). In this usage, this ioctl zaps the existing guest memory
+mappings in KVM that fallen into the region.
+
+When SEV-encrypted memory is enabled, this ioctl is used to register guest
+memory region which may contain encrypted data for a SEV-enabled guest. The
+addr/size of kvm_enc_region represents userspace address (HVA). The SEV
+memory encryption engine uses a tweak such that two identical plaintext pages,
+each at different locations will have differing ciphertexts. So swapping or
 moving ciphertext of those pages will not result in plaintext being
 swapped. So relocating (or migrating) physical backing pages for the SEV
 guest will require some additional steps.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2c96c43c313a..cfad6ba1a70a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -37,6 +37,7 @@ 
 #include <asm/hyperv-tlfs.h>
 
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+#define __KVM_HAVE_ZAP_GFN_RANGE
 
 #define KVM_MAX_VCPUS 1024
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 6bdaacb6faa0..c94b620bf94b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -211,8 +211,6 @@  static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return -(u32)fault & errcode;
 }
 
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
-
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_post_init_vm(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2125b50f6345..d65690cae80b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -260,6 +260,15 @@  bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
 #endif
 
+#ifdef __KVM_HAVE_ZAP_GFN_RANGE
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
+#else
+static inline void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start
+						      gfn_t gfn_end)
+{
+}
+#endif
+
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
@@ -795,6 +804,9 @@  struct kvm {
 	struct notifier_block pm_notifier;
 #endif
 	char stats_id[KVM_STATS_NAME_SIZE];
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+	struct xarray mem_attr_array;
+#endif
 };
 
 #define kvm_err(fmt, ...) \
@@ -1454,6 +1466,7 @@  bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_post_init_vm(struct kvm *kvm);
 void kvm_arch_pre_destroy_vm(struct kvm *kvm);
 int kvm_arch_create_vm_debugfs(struct kvm *kvm);
+bool kvm_arch_has_private_mem(struct kvm *kvm);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa9dd2d2c001..de5cce8c82c7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -937,6 +937,47 @@  static int kvm_init_mmu_notifier(struct kvm *kvm)
 
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
 
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+#define KVM_MEM_ATTR_SHARED	0x0001
+static int kvm_vm_ioctl_set_mem_attr(struct kvm *kvm, gpa_t gpa, gpa_t size,
+				     bool is_private)
+{
+	gfn_t start, end;
+	unsigned long index;
+	void *entry;
+	int r;
+
+	if (size == 0 || gpa + size < gpa)
+		return -EINVAL;
+	if (gpa & (PAGE_SIZE - 1) || size & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	start = gpa >> PAGE_SHIFT;
+	end = (gpa + size - 1 + PAGE_SIZE) >> PAGE_SHIFT;
+
+	/*
+	 * Guest memory defaults to private, kvm->mem_attr_array only stores
+	 * shared memory.
+	 */
+	entry = is_private ? NULL : xa_mk_value(KVM_MEM_ATTR_SHARED);
+
+	for (index = start; index < end; index++) {
+		r = xa_err(xa_store(&kvm->mem_attr_array, index, entry,
+				    GFP_KERNEL_ACCOUNT));
+		if (r)
+			goto err;
+	}
+
+	kvm_zap_gfn_range(kvm, start, end);
+
+	return r;
+err:
+	for (; index > start; index--)
+		xa_erase(&kvm->mem_attr_array, index);
+	return r;
+}
+#endif /* CONFIG_HAVE_KVM_PRIVATE_MEM */
+
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 static int kvm_pm_notifier_call(struct notifier_block *bl,
 				unsigned long state,
@@ -1165,6 +1206,9 @@  static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
 	spin_lock_init(&kvm->mn_invalidate_lock);
 	rcuwait_init(&kvm->mn_memslots_update_rcuwait);
 	xa_init(&kvm->vcpu_array);
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+	xa_init(&kvm->mem_attr_array);
+#endif
 
 	INIT_LIST_HEAD(&kvm->gpc_list);
 	spin_lock_init(&kvm->gpc_lock);
@@ -1338,6 +1382,9 @@  static void kvm_destroy_vm(struct kvm *kvm)
 		kvm_free_memslots(kvm, &kvm->__memslots[i][0]);
 		kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
 	}
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+	xa_destroy(&kvm->mem_attr_array);
+#endif
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
@@ -1541,6 +1588,11 @@  static void kvm_replace_memslot(struct kvm *kvm,
 	}
 }
 
+bool __weak kvm_arch_has_private_mem(struct kvm *kvm)
+{
+	return false;
+}
+
 static int check_memory_region_flags(const struct kvm_user_mem_region *mem)
 {
 	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
@@ -4703,6 +4755,24 @@  static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_set_memory_region(kvm, &mem);
 		break;
 	}
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+	case KVM_MEMORY_ENCRYPT_REG_REGION:
+	case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
+		struct kvm_enc_region region;
+		bool set = ioctl == KVM_MEMORY_ENCRYPT_REG_REGION;
+
+		if (!kvm_arch_has_private_mem(kvm))
+			goto arch_vm_ioctl;
+
+		r = -EFAULT;
+		if (copy_from_user(&region, argp, sizeof(region)))
+			goto out;
+
+		r = kvm_vm_ioctl_set_mem_attr(kvm, region.addr,
+					      region.size, set);
+		break;
+	}
+#endif
 	case KVM_GET_DIRTY_LOG: {
 		struct kvm_dirty_log log;
 
@@ -4856,6 +4926,9 @@  static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_vm_ioctl_get_stats_fd(kvm);
 		break;
 	default:
+#ifdef CONFIG_HAVE_KVM_PRIVATE_MEM
+arch_vm_ioctl:
+#endif
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
 out: