diff mbox series

[SRU,Impish] KVM: x86/mmu: do compare-and-exchange of gPTE via the user address

Message ID 20220505193939.3754679-1-cascardo@canonical.com
State New
Headers show
Series [SRU,Impish] KVM: x86/mmu: do compare-and-exchange of gPTE via the user address | expand

Commit Message

Thadeu Lima de Souza Cascardo May 5, 2022, 7:39 p.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.

FNAME(cmpxchg_gpte) is an inefficient mess.  It is at least decent if it
can go through get_user_pages_fast(), but if it cannot then it tries to
use memremap(); that is not just terribly slow, it is also wrong because
it assumes that the VM_PFNMAP VMA is contiguous.

The right way to do it would be to do the same thing as
hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
fix up page faults before giving up", 2016-07-05), using follow_pte()
and fixup_user_fault() to determine the correct address to use for
memremap().  To do this, one could for example extract hva_to_pfn()
for use outside virt/kvm/kvm_main.c.  But really there is no reason to
do that either, because there is already a perfectly valid address to
do the cmpxchg() on, only it is a userspace address.  That means doing
user_access_begin()/user_access_end() and writing the code in assembly
to handle exceptions correctly.  Worse, the guest PTE can be 8-byte
even on i686 so there is the extra complication of using cmpxchg8b to
account for.  But at least it is an efficient mess.

(Thanks to Linus for suggesting improvement on the inline assembly).

Reported-by: Qiuhao Li <qiuhao@sysec.org>
Reported-by: Gaoning Pan <pgn@zju.edu.cn>
Reported-by: Yongkang Jia <kangel@zju.edu.cn>
Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
Cc: stable@vger.kernel.org
Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 8771d9673e0bdb7148299f3c074667124bde6dff linux-5.15.y)
CVE-2022-1158
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
---
 arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------
 1 file changed, 37 insertions(+), 40 deletions(-)

Comments

Luke Nowakowski-Krijger May 5, 2022, 9:14 p.m. UTC | #1
Acked-by: Luke Nowakowski-Krijger <luke.nowakowskikrijger@canonical.com>

On Thu, May 5, 2022 at 12:41 PM Thadeu Lima de Souza Cascardo <
cascardo@canonical.com> wrote:

> From: Paolo Bonzini <pbonzini@redhat.com>
>
> commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
>
> FNAME(cmpxchg_gpte) is an inefficient mess.  It is at least decent if it
> can go through get_user_pages_fast(), but if it cannot then it tries to
> use memremap(); that is not just terribly slow, it is also wrong because
> it assumes that the VM_PFNMAP VMA is contiguous.
>
> The right way to do it would be to do the same thing as
> hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
> fix up page faults before giving up", 2016-07-05), using follow_pte()
> and fixup_user_fault() to determine the correct address to use for
> memremap().  To do this, one could for example extract hva_to_pfn()
> for use outside virt/kvm/kvm_main.c.  But really there is no reason to
> do that either, because there is already a perfectly valid address to
> do the cmpxchg() on, only it is a userspace address.  That means doing
> user_access_begin()/user_access_end() and writing the code in assembly
> to handle exceptions correctly.  Worse, the guest PTE can be 8-byte
> even on i686 so there is the extra complication of using cmpxchg8b to
> account for.  But at least it is an efficient mess.
>
> (Thanks to Linus for suggesting improvement on the inline assembly).
>
> Reported-by: Qiuhao Li <qiuhao@sysec.org>
> Reported-by: Gaoning Pan <pgn@zju.edu.cn>
> Reported-by: Yongkang Jia <kangel@zju.edu.cn>
> Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
> Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
> Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
> Cc: stable@vger.kernel.org
> Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when
> touching GPTEs")
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> (cherry picked from commit 8771d9673e0bdb7148299f3c074667124bde6dff
> linux-5.15.y)
> CVE-2022-1158
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---
>  arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------
>  1 file changed, 37 insertions(+), 40 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h
> b/arch/x86/kvm/mmu/paging_tmpl.h
> index 45160fe80098..20a269bce359 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -34,9 +34,8 @@
>         #define PT_HAVE_ACCESSED_DIRTY(mmu) true
>         #ifdef CONFIG_X86_64
>         #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
> -       #define CMPXCHG cmpxchg
> +       #define CMPXCHG "cmpxchgq"
>         #else
> -       #define CMPXCHG cmpxchg64
>         #define PT_MAX_FULL_LEVELS 2
>         #endif
>  #elif PTTYPE == 32
> @@ -52,7 +51,7 @@
>         #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
>         #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
>         #define PT_HAVE_ACCESSED_DIRTY(mmu) true
> -       #define CMPXCHG cmpxchg
> +       #define CMPXCHG "cmpxchgl"
>  #elif PTTYPE == PTTYPE_EPT
>         #define pt_element_t u64
>         #define guest_walker guest_walkerEPT
> @@ -65,7 +64,9 @@
>         #define PT_GUEST_DIRTY_SHIFT 9
>         #define PT_GUEST_ACCESSED_SHIFT 8
>         #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
> -       #define CMPXCHG cmpxchg64
> +       #ifdef CONFIG_X86_64
> +       #define CMPXCHG "cmpxchgq"
> +       #endif
>         #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
>  #else
>         #error Invalid PTTYPE value
> @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu
> *vcpu, struct kvm_mmu *mmu,
>                                pt_element_t __user *ptep_user, unsigned
> index,
>                                pt_element_t orig_pte, pt_element_t new_pte)
>  {
> -       int npages;
> -       pt_element_t ret;
> -       pt_element_t *table;
> -       struct page *page;
> -
> -       npages = get_user_pages_fast((unsigned long)ptep_user, 1,
> FOLL_WRITE, &page);
> -       if (likely(npages == 1)) {
> -               table = kmap_atomic(page);
> -               ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -               kunmap_atomic(table);
> -
> -               kvm_release_page_dirty(page);
> -       } else {
> -               struct vm_area_struct *vma;
> -               unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
> -               unsigned long pfn;
> -               unsigned long paddr;
> -
> -               mmap_read_lock(current->mm);
> -               vma = find_vma_intersection(current->mm, vaddr, vaddr +
> PAGE_SIZE);
> -               if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
> -                       mmap_read_unlock(current->mm);
> -                       return -EFAULT;
> -               }
> -               pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) +
> vma->vm_pgoff;
> -               paddr = pfn << PAGE_SHIFT;
> -               table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
> -               if (!table) {
> -                       mmap_read_unlock(current->mm);
> -                       return -EFAULT;
> -               }
> -               ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -               memunmap(table);
> -               mmap_read_unlock(current->mm);
> -       }
> +       int r = -EFAULT;
> +
> +       if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
> +               return -EFAULT;
> +
> +#ifdef CMPXCHG
> +       asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
> +                    "mov $0, %[r]\n"
> +                    "setnz %b[r]\n"
> +                    "2:"
> +                    _ASM_EXTABLE_UA(1b, 2b)
> +                    : [ptr] "+m" (*ptep_user),
> +                      [old] "+a" (orig_pte),
> +                      [r] "+q" (r)
> +                    : [new] "r" (new_pte)
> +                    : "memory");
> +#else
> +       asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
> +                    "movl $0, %[r]\n"
> +                    "jz 2f\n"
> +                    "incl %[r]\n"
> +                    "2:"
> +                    _ASM_EXTABLE_UA(1b, 2b)
> +                    : [ptr] "+m" (*ptep_user),
> +                      [old] "+A" (orig_pte),
> +                      [r] "+rm" (r)
> +                    : [new_lo] "b" ((u32)new_pte),
> +                      [new_hi] "c" ((u32)(new_pte >> 32))
> +                    : "memory");
> +#endif
>
> -       return (ret != orig_pte);
> +       user_access_end();
> +       return r;
>  }
>
>  static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
> --
> 2.32.0
>
>
> --
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
>
Stefan Bader May 6, 2022, 7:38 a.m. UTC | #2
On 05.05.22 21:39, Thadeu Lima de Souza Cascardo wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
> 
> FNAME(cmpxchg_gpte) is an inefficient mess.  It is at least decent if it
> can go through get_user_pages_fast(), but if it cannot then it tries to
> use memremap(); that is not just terribly slow, it is also wrong because
> it assumes that the VM_PFNMAP VMA is contiguous.
> 
> The right way to do it would be to do the same thing as
> hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
> fix up page faults before giving up", 2016-07-05), using follow_pte()
> and fixup_user_fault() to determine the correct address to use for
> memremap().  To do this, one could for example extract hva_to_pfn()
> for use outside virt/kvm/kvm_main.c.  But really there is no reason to
> do that either, because there is already a perfectly valid address to
> do the cmpxchg() on, only it is a userspace address.  That means doing
> user_access_begin()/user_access_end() and writing the code in assembly
> to handle exceptions correctly.  Worse, the guest PTE can be 8-byte
> even on i686 so there is the extra complication of using cmpxchg8b to
> account for.  But at least it is an efficient mess.
> 
> (Thanks to Linus for suggesting improvement on the inline assembly).
> 
> Reported-by: Qiuhao Li <qiuhao@sysec.org>
> Reported-by: Gaoning Pan <pgn@zju.edu.cn>
> Reported-by: Yongkang Jia <kangel@zju.edu.cn>
> Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
> Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
> Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
> Cc: stable@vger.kernel.org
> Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> (cherry picked from commit 8771d9673e0bdb7148299f3c074667124bde6dff linux-5.15.y)
> CVE-2022-1158
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>   arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------
>   1 file changed, 37 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 45160fe80098..20a269bce359 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -34,9 +34,8 @@
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
>   	#ifdef CONFIG_X86_64
>   	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
> -	#define CMPXCHG cmpxchg
> +	#define CMPXCHG "cmpxchgq"
>   	#else
> -	#define CMPXCHG cmpxchg64
>   	#define PT_MAX_FULL_LEVELS 2
>   	#endif
>   #elif PTTYPE == 32
> @@ -52,7 +51,7 @@
>   	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
>   	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
> -	#define CMPXCHG cmpxchg
> +	#define CMPXCHG "cmpxchgl"
>   #elif PTTYPE == PTTYPE_EPT
>   	#define pt_element_t u64
>   	#define guest_walker guest_walkerEPT
> @@ -65,7 +64,9 @@
>   	#define PT_GUEST_DIRTY_SHIFT 9
>   	#define PT_GUEST_ACCESSED_SHIFT 8
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
> -	#define CMPXCHG cmpxchg64
> +	#ifdef CONFIG_X86_64
> +	#define CMPXCHG "cmpxchgq"
> +	#endif
>   	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
>   #else
>   	#error Invalid PTTYPE value
> @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>   			       pt_element_t __user *ptep_user, unsigned index,
>   			       pt_element_t orig_pte, pt_element_t new_pte)
>   {
> -	int npages;
> -	pt_element_t ret;
> -	pt_element_t *table;
> -	struct page *page;
> -
> -	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
> -	if (likely(npages == 1)) {
> -		table = kmap_atomic(page);
> -		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -		kunmap_atomic(table);
> -
> -		kvm_release_page_dirty(page);
> -	} else {
> -		struct vm_area_struct *vma;
> -		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
> -		unsigned long pfn;
> -		unsigned long paddr;
> -
> -		mmap_read_lock(current->mm);
> -		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
> -		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
> -			mmap_read_unlock(current->mm);
> -			return -EFAULT;
> -		}
> -		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> -		paddr = pfn << PAGE_SHIFT;
> -		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
> -		if (!table) {
> -			mmap_read_unlock(current->mm);
> -			return -EFAULT;
> -		}
> -		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -		memunmap(table);
> -		mmap_read_unlock(current->mm);
> -	}
> +	int r = -EFAULT;
> +
> +	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
> +		return -EFAULT;
> +
> +#ifdef CMPXCHG
> +	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
> +		     "mov $0, %[r]\n"
> +		     "setnz %b[r]\n"
> +		     "2:"
> +		     _ASM_EXTABLE_UA(1b, 2b)
> +		     : [ptr] "+m" (*ptep_user),
> +		       [old] "+a" (orig_pte),
> +		       [r] "+q" (r)
> +		     : [new] "r" (new_pte)
> +		     : "memory");
> +#else
> +	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
> +		     "movl $0, %[r]\n"
> +		     "jz 2f\n"
> +		     "incl %[r]\n"
> +		     "2:"
> +		     _ASM_EXTABLE_UA(1b, 2b)
> +		     : [ptr] "+m" (*ptep_user),
> +		       [old] "+A" (orig_pte),
> +		       [r] "+rm" (r)
> +		     : [new_lo] "b" ((u32)new_pte),
> +		       [new_hi] "c" ((u32)(new_pte >> 32))
> +		     : "memory");
> +#endif
>   
> -	return (ret != orig_pte);
> +	user_access_end();
> +	return r;
>   }
>   
>   static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
Stefan Bader May 6, 2022, 7:50 a.m. UTC | #3
On 05.05.22 21:39, Thadeu Lima de Souza Cascardo wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> commit 2a8859f373b0a86f0ece8ec8312607eacf12485d upstream.
> 
> FNAME(cmpxchg_gpte) is an inefficient mess.  It is at least decent if it
> can go through get_user_pages_fast(), but if it cannot then it tries to
> use memremap(); that is not just terribly slow, it is also wrong because
> it assumes that the VM_PFNMAP VMA is contiguous.
> 
> The right way to do it would be to do the same thing as
> hva_to_pfn_remapped() does since commit add6a0cd1c5b ("KVM: MMU: try to
> fix up page faults before giving up", 2016-07-05), using follow_pte()
> and fixup_user_fault() to determine the correct address to use for
> memremap().  To do this, one could for example extract hva_to_pfn()
> for use outside virt/kvm/kvm_main.c.  But really there is no reason to
> do that either, because there is already a perfectly valid address to
> do the cmpxchg() on, only it is a userspace address.  That means doing
> user_access_begin()/user_access_end() and writing the code in assembly
> to handle exceptions correctly.  Worse, the guest PTE can be 8-byte
> even on i686 so there is the extra complication of using cmpxchg8b to
> account for.  But at least it is an efficient mess.
> 
> (Thanks to Linus for suggesting improvement on the inline assembly).
> 
> Reported-by: Qiuhao Li <qiuhao@sysec.org>
> Reported-by: Gaoning Pan <pgn@zju.edu.cn>
> Reported-by: Yongkang Jia <kangel@zju.edu.cn>
> Reported-by: syzbot+6cde2282daa792c49ab8@syzkaller.appspotmail.com
> Debugged-by: Tadeusz Struk <tadeusz.struk@linaro.org>
> Tested-by: Maxim Levitsky <mlevitsk@redhat.com>
> Cc: stable@vger.kernel.org
> Fixes: bd53cb35a3e9 ("X86/KVM: Handle PFNs outside of kernel reach when touching GPTEs")
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> (cherry picked from commit 8771d9673e0bdb7148299f3c074667124bde6dff linux-5.15.y)
> CVE-2022-1158
> Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
> ---

Applied to impish:linux/master-next. Thanks.

-Stefan

>   arch/x86/kvm/mmu/paging_tmpl.h | 77 ++++++++++++++++------------------
>   1 file changed, 37 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
> index 45160fe80098..20a269bce359 100644
> --- a/arch/x86/kvm/mmu/paging_tmpl.h
> +++ b/arch/x86/kvm/mmu/paging_tmpl.h
> @@ -34,9 +34,8 @@
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
>   	#ifdef CONFIG_X86_64
>   	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
> -	#define CMPXCHG cmpxchg
> +	#define CMPXCHG "cmpxchgq"
>   	#else
> -	#define CMPXCHG cmpxchg64
>   	#define PT_MAX_FULL_LEVELS 2
>   	#endif
>   #elif PTTYPE == 32
> @@ -52,7 +51,7 @@
>   	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
>   	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
> -	#define CMPXCHG cmpxchg
> +	#define CMPXCHG "cmpxchgl"
>   #elif PTTYPE == PTTYPE_EPT
>   	#define pt_element_t u64
>   	#define guest_walker guest_walkerEPT
> @@ -65,7 +64,9 @@
>   	#define PT_GUEST_DIRTY_SHIFT 9
>   	#define PT_GUEST_ACCESSED_SHIFT 8
>   	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
> -	#define CMPXCHG cmpxchg64
> +	#ifdef CONFIG_X86_64
> +	#define CMPXCHG "cmpxchgq"
> +	#endif
>   	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
>   #else
>   	#error Invalid PTTYPE value
> @@ -147,43 +148,39 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>   			       pt_element_t __user *ptep_user, unsigned index,
>   			       pt_element_t orig_pte, pt_element_t new_pte)
>   {
> -	int npages;
> -	pt_element_t ret;
> -	pt_element_t *table;
> -	struct page *page;
> -
> -	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
> -	if (likely(npages == 1)) {
> -		table = kmap_atomic(page);
> -		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -		kunmap_atomic(table);
> -
> -		kvm_release_page_dirty(page);
> -	} else {
> -		struct vm_area_struct *vma;
> -		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
> -		unsigned long pfn;
> -		unsigned long paddr;
> -
> -		mmap_read_lock(current->mm);
> -		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
> -		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
> -			mmap_read_unlock(current->mm);
> -			return -EFAULT;
> -		}
> -		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
> -		paddr = pfn << PAGE_SHIFT;
> -		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
> -		if (!table) {
> -			mmap_read_unlock(current->mm);
> -			return -EFAULT;
> -		}
> -		ret = CMPXCHG(&table[index], orig_pte, new_pte);
> -		memunmap(table);
> -		mmap_read_unlock(current->mm);
> -	}
> +	int r = -EFAULT;
> +
> +	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
> +		return -EFAULT;
> +
> +#ifdef CMPXCHG
> +	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
> +		     "mov $0, %[r]\n"
> +		     "setnz %b[r]\n"
> +		     "2:"
> +		     _ASM_EXTABLE_UA(1b, 2b)
> +		     : [ptr] "+m" (*ptep_user),
> +		       [old] "+a" (orig_pte),
> +		       [r] "+q" (r)
> +		     : [new] "r" (new_pte)
> +		     : "memory");
> +#else
> +	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
> +		     "movl $0, %[r]\n"
> +		     "jz 2f\n"
> +		     "incl %[r]\n"
> +		     "2:"
> +		     _ASM_EXTABLE_UA(1b, 2b)
> +		     : [ptr] "+m" (*ptep_user),
> +		       [old] "+A" (orig_pte),
> +		       [r] "+rm" (r)
> +		     : [new_lo] "b" ((u32)new_pte),
> +		       [new_hi] "c" ((u32)(new_pte >> 32))
> +		     : "memory");
> +#endif
>   
> -	return (ret != orig_pte);
> +	user_access_end();
> +	return r;
>   }
>   
>   static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 45160fe80098..20a269bce359 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -34,9 +34,8 @@ 
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
 	#ifdef CONFIG_X86_64
 	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
-	#define CMPXCHG cmpxchg
+	#define CMPXCHG "cmpxchgq"
 	#else
-	#define CMPXCHG cmpxchg64
 	#define PT_MAX_FULL_LEVELS 2
 	#endif
 #elif PTTYPE == 32
@@ -52,7 +51,7 @@ 
 	#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
 	#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) true
-	#define CMPXCHG cmpxchg
+	#define CMPXCHG "cmpxchgl"
 #elif PTTYPE == PTTYPE_EPT
 	#define pt_element_t u64
 	#define guest_walker guest_walkerEPT
@@ -65,7 +64,9 @@ 
 	#define PT_GUEST_DIRTY_SHIFT 9
 	#define PT_GUEST_ACCESSED_SHIFT 8
 	#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
-	#define CMPXCHG cmpxchg64
+	#ifdef CONFIG_X86_64
+	#define CMPXCHG "cmpxchgq"
+	#endif
 	#define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
 #else
 	#error Invalid PTTYPE value
@@ -147,43 +148,39 @@  static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			       pt_element_t __user *ptep_user, unsigned index,
 			       pt_element_t orig_pte, pt_element_t new_pte)
 {
-	int npages;
-	pt_element_t ret;
-	pt_element_t *table;
-	struct page *page;
-
-	npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
-	if (likely(npages == 1)) {
-		table = kmap_atomic(page);
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
-		kunmap_atomic(table);
-
-		kvm_release_page_dirty(page);
-	} else {
-		struct vm_area_struct *vma;
-		unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
-		unsigned long pfn;
-		unsigned long paddr;
-
-		mmap_read_lock(current->mm);
-		vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
-		if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
-			mmap_read_unlock(current->mm);
-			return -EFAULT;
-		}
-		pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
-		paddr = pfn << PAGE_SHIFT;
-		table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
-		if (!table) {
-			mmap_read_unlock(current->mm);
-			return -EFAULT;
-		}
-		ret = CMPXCHG(&table[index], orig_pte, new_pte);
-		memunmap(table);
-		mmap_read_unlock(current->mm);
-	}
+	int r = -EFAULT;
+
+	if (!user_access_begin(ptep_user, sizeof(pt_element_t)))
+		return -EFAULT;
+
+#ifdef CMPXCHG
+	asm volatile("1:" LOCK_PREFIX CMPXCHG " %[new], %[ptr]\n"
+		     "mov $0, %[r]\n"
+		     "setnz %b[r]\n"
+		     "2:"
+		     _ASM_EXTABLE_UA(1b, 2b)
+		     : [ptr] "+m" (*ptep_user),
+		       [old] "+a" (orig_pte),
+		       [r] "+q" (r)
+		     : [new] "r" (new_pte)
+		     : "memory");
+#else
+	asm volatile("1:" LOCK_PREFIX "cmpxchg8b %[ptr]\n"
+		     "movl $0, %[r]\n"
+		     "jz 2f\n"
+		     "incl %[r]\n"
+		     "2:"
+		     _ASM_EXTABLE_UA(1b, 2b)
+		     : [ptr] "+m" (*ptep_user),
+		       [old] "+A" (orig_pte),
+		       [r] "+rm" (r)
+		     : [new_lo] "b" ((u32)new_pte),
+		       [new_hi] "c" ((u32)(new_pte >> 32))
+		     : "memory");
+#endif
 
-	return (ret != orig_pte);
+	user_access_end();
+	return r;
 }
 
 static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,