diff mbox series

[1/1] mremap: properly flush TLB before releasing the page

Message ID 1544134470-20053-2-git-send-email-tyhicks@canonical.com
State New
Headers show
Series CVE-2018-18281 - stale TLB entries via mremap() | expand

Commit Message

Tyler Hicks Dec. 6, 2018, 10:14 p.m. UTC
From: Linus Torvalds <torvalds@linux-foundation.org>

Jann Horn points out that our TLB flushing was subtly wrong for the
mremap() case.  What makes mremap() special is that we don't follow the
usual "add page to list of pages to be freed, then flush tlb, and then
free pages".  No, mremap() obviously just _moves_ the page from one page
table location to another.

That matters, because mremap() thus doesn't directly control the
lifetime of the moved page with a freelist: instead, the lifetime of the
page is controlled by the page table locking, that serializes access to
the entry.

As a result, we need to flush the TLB not just before releasing the lock
for the source location (to avoid any concurrent accesses to the entry),
but also before we release the destination page table lock (to avoid the
TLB being flushed after somebody else has already done something to that
page).

This also makes the whole "need_flush" logic unnecessary, since we now
always end up flushing the TLB for every valid entry.

Reported-and-tested-by: Jann Horn <jannh@google.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

CVE-2018-18281

(cherry picked from commit eb66ae030829605d61fbef1909ce310e29f78821)
Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
---
 include/linux/huge_mm.h |  2 +-
 mm/huge_memory.c        | 10 ++++------
 mm/mremap.c             | 30 +++++++++++++-----------------
 3 files changed, 18 insertions(+), 24 deletions(-)

Comments

Stefan Bader Jan. 7, 2019, 3:41 p.m. UTC | #1
On 06.12.18 23:14, Tyler Hicks wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
> 
> Jann Horn points out that our TLB flushing was subtly wrong for the
> mremap() case.  What makes mremap() special is that we don't follow the
> usual "add page to list of pages to be freed, then flush tlb, and then
> free pages".  No, mremap() obviously just _moves_ the page from one page
> table location to another.
> 
> That matters, because mremap() thus doesn't directly control the
> lifetime of the moved page with a freelist: instead, the lifetime of the
> page is controlled by the page table locking, that serializes access to
> the entry.
> 
> As a result, we need to flush the TLB not just before releasing the lock
> for the source location (to avoid any concurrent accesses to the entry),
> but also before we release the destination page table lock (to avoid the
> TLB being flushed after somebody else has already done something to that
> page).
> 
> This also makes the whole "need_flush" logic unnecessary, since we now
> always end up flushing the TLB for every valid entry.
> 
> Reported-and-tested-by: Jann Horn <jannh@google.com>
> Acked-by: Will Deacon <will.deacon@arm.com>
> Tested-by: Ingo Molnar <mingo@kernel.org>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
> 
> CVE-2018-18281
> 
> (cherry picked from commit eb66ae030829605d61fbef1909ce310e29f78821)
> Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  include/linux/huge_mm.h |  2 +-
>  mm/huge_memory.c        | 10 ++++------
>  mm/mremap.c             | 30 +++++++++++++-----------------
>  3 files changed, 18 insertions(+), 24 deletions(-)
> 
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index a8a126259bc4..0bec79ae4c2d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -42,7 +42,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  			unsigned char *vec);
>  extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  			 unsigned long new_addr, unsigned long old_end,
> -			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
> +			 pmd_t *old_pmd, pmd_t *new_pmd);
>  extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  			unsigned long addr, pgprot_t newprot,
>  			int prot_numa);
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index d8b3b7693818..768a4687e9eb 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1782,7 +1782,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
>  
>  bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		  unsigned long new_addr, unsigned long old_end,
> -		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +		  pmd_t *old_pmd, pmd_t *new_pmd)
>  {
>  	spinlock_t *old_ptl, *new_ptl;
>  	pmd_t pmd;
> @@ -1813,7 +1813,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		if (new_ptl != old_ptl)
>  			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>  		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
> -		if (pmd_present(pmd) && pmd_dirty(pmd))
> +		if (pmd_present(pmd))
>  			force_flush = true;
>  		VM_BUG_ON(!pmd_none(*new_pmd));
>  
> @@ -1824,12 +1824,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		}
>  		pmd = move_soft_dirty_pmd(pmd);
>  		set_pmd_at(mm, new_addr, new_pmd, pmd);
> -		if (new_ptl != old_ptl)
> -			spin_unlock(new_ptl);
>  		if (force_flush)
>  			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
> -		else
> -			*need_flush = true;
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
>  		spin_unlock(old_ptl);
>  		return true;
>  	}
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 049470aa1e3e..88ceeb4ef817 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
>  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		unsigned long old_addr, unsigned long old_end,
>  		struct vm_area_struct *new_vma, pmd_t *new_pmd,
> -		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
> +		unsigned long new_addr, bool need_rmap_locks)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	pte_t *old_pte, *new_pte, pte;
> @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  
>  		pte = ptep_get_and_clear(mm, old_addr, old_pte);
>  		/*
> -		 * If we are remapping a dirty PTE, make sure
> +		 * If we are remapping a valid PTE, make sure
>  		 * to flush TLB before we drop the PTL for the
> -		 * old PTE or we may race with page_mkclean().
> +		 * PTE.
>  		 *
> -		 * This check has to be done after we removed the
> -		 * old PTE from page tables or another thread may
> -		 * dirty it after the check and before the removal.
> +		 * NOTE! Both old and new PTL matter: the old one
> +		 * for racing with page_mkclean(), the new one to
> +		 * make sure the physical page stays valid until
> +		 * the TLB entry for the old mapping has been
> +		 * flushed.
>  		 */
> -		if (pte_present(pte) && pte_dirty(pte))
> +		if (pte_present(pte))
>  			force_flush = true;
>  		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
>  		pte = move_soft_dirty_pte(pte);
> @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  	}
>  
>  	arch_leave_lazy_mmu_mode();
> +	if (force_flush)
> +		flush_tlb_range(vma, old_end - len, old_end);
>  	if (new_ptl != old_ptl)
>  		spin_unlock(new_ptl);
>  	pte_unmap(new_pte - 1);
> -	if (force_flush)
> -		flush_tlb_range(vma, old_end - len, old_end);
> -	else
> -		*need_flush = true;
>  	pte_unmap_unlock(old_pte - 1, old_ptl);
>  	if (need_rmap_locks)
>  		drop_rmap_locks(vma);
> @@ -200,7 +200,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  {
>  	unsigned long extent, next, old_end;
>  	pmd_t *old_pmd, *new_pmd;
> -	bool need_flush = false;
>  	unsigned long mmun_start;	/* For mmu_notifiers */
>  	unsigned long mmun_end;		/* For mmu_notifiers */
>  
> @@ -231,8 +230,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  				if (need_rmap_locks)
>  					take_rmap_locks(vma);
>  				moved = move_huge_pmd(vma, old_addr, new_addr,
> -						    old_end, old_pmd, new_pmd,
> -						    &need_flush);
> +						    old_end, old_pmd, new_pmd);
>  				if (need_rmap_locks)
>  					drop_rmap_locks(vma);
>  				if (moved)
> @@ -250,10 +248,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  		if (extent > LATENCY_LIMIT)
>  			extent = LATENCY_LIMIT;
>  		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
> -			  new_pmd, new_addr, need_rmap_locks, &need_flush);
> +			  new_pmd, new_addr, need_rmap_locks);
>  	}
> -	if (need_flush)
> -		flush_tlb_range(vma, old_end-len, old_addr);
>  
>  	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
>  
>
Kleber Sacilotto de Souza Jan. 8, 2019, 3:07 p.m. UTC | #2
On 12/6/18 11:14 PM, Tyler Hicks wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
>
> Jann Horn points out that our TLB flushing was subtly wrong for the
> mremap() case.  What makes mremap() special is that we don't follow the
> usual "add page to list of pages to be freed, then flush tlb, and then
> free pages".  No, mremap() obviously just _moves_ the page from one page
> table location to another.
>
> That matters, because mremap() thus doesn't directly control the
> lifetime of the moved page with a freelist: instead, the lifetime of the
> page is controlled by the page table locking, that serializes access to
> the entry.
>
> As a result, we need to flush the TLB not just before releasing the lock
> for the source location (to avoid any concurrent accesses to the entry),
> but also before we release the destination page table lock (to avoid the
> TLB being flushed after somebody else has already done something to that
> page).
>
> This also makes the whole "need_flush" logic unnecessary, since we now
> always end up flushing the TLB for every valid entry.
>
> Reported-and-tested-by: Jann Horn <jannh@google.com>
> Acked-by: Will Deacon <will.deacon@arm.com>
> Tested-by: Ingo Molnar <mingo@kernel.org>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
>
> CVE-2018-18281
>
> (cherry picked from commit eb66ae030829605d61fbef1909ce310e29f78821)
> Signed-off-by: Tyler Hicks <tyhicks@canonical.com>


Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  include/linux/huge_mm.h |  2 +-
>  mm/huge_memory.c        | 10 ++++------
>  mm/mremap.c             | 30 +++++++++++++-----------------
>  3 files changed, 18 insertions(+), 24 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index a8a126259bc4..0bec79ae4c2d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -42,7 +42,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  			unsigned char *vec);
>  extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  			 unsigned long new_addr, unsigned long old_end,
> -			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
> +			 pmd_t *old_pmd, pmd_t *new_pmd);
>  extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  			unsigned long addr, pgprot_t newprot,
>  			int prot_numa);
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index d8b3b7693818..768a4687e9eb 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -1782,7 +1782,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
>  
>  bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		  unsigned long new_addr, unsigned long old_end,
> -		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
> +		  pmd_t *old_pmd, pmd_t *new_pmd)
>  {
>  	spinlock_t *old_ptl, *new_ptl;
>  	pmd_t pmd;
> @@ -1813,7 +1813,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		if (new_ptl != old_ptl)
>  			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
>  		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
> -		if (pmd_present(pmd) && pmd_dirty(pmd))
> +		if (pmd_present(pmd))
>  			force_flush = true;
>  		VM_BUG_ON(!pmd_none(*new_pmd));
>  
> @@ -1824,12 +1824,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		}
>  		pmd = move_soft_dirty_pmd(pmd);
>  		set_pmd_at(mm, new_addr, new_pmd, pmd);
> -		if (new_ptl != old_ptl)
> -			spin_unlock(new_ptl);
>  		if (force_flush)
>  			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
> -		else
> -			*need_flush = true;
> +		if (new_ptl != old_ptl)
> +			spin_unlock(new_ptl);
>  		spin_unlock(old_ptl);
>  		return true;
>  	}
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 049470aa1e3e..88ceeb4ef817 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
>  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		unsigned long old_addr, unsigned long old_end,
>  		struct vm_area_struct *new_vma, pmd_t *new_pmd,
> -		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
> +		unsigned long new_addr, bool need_rmap_locks)
>  {
>  	struct mm_struct *mm = vma->vm_mm;
>  	pte_t *old_pte, *new_pte, pte;
> @@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  
>  		pte = ptep_get_and_clear(mm, old_addr, old_pte);
>  		/*
> -		 * If we are remapping a dirty PTE, make sure
> +		 * If we are remapping a valid PTE, make sure
>  		 * to flush TLB before we drop the PTL for the
> -		 * old PTE or we may race with page_mkclean().
> +		 * PTE.
>  		 *
> -		 * This check has to be done after we removed the
> -		 * old PTE from page tables or another thread may
> -		 * dirty it after the check and before the removal.
> +		 * NOTE! Both old and new PTL matter: the old one
> +		 * for racing with page_mkclean(), the new one to
> +		 * make sure the physical page stays valid until
> +		 * the TLB entry for the old mapping has been
> +		 * flushed.
>  		 */
> -		if (pte_present(pte) && pte_dirty(pte))
> +		if (pte_present(pte))
>  			force_flush = true;
>  		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
>  		pte = move_soft_dirty_pte(pte);
> @@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  	}
>  
>  	arch_leave_lazy_mmu_mode();
> +	if (force_flush)
> +		flush_tlb_range(vma, old_end - len, old_end);
>  	if (new_ptl != old_ptl)
>  		spin_unlock(new_ptl);
>  	pte_unmap(new_pte - 1);
> -	if (force_flush)
> -		flush_tlb_range(vma, old_end - len, old_end);
> -	else
> -		*need_flush = true;
>  	pte_unmap_unlock(old_pte - 1, old_ptl);
>  	if (need_rmap_locks)
>  		drop_rmap_locks(vma);
> @@ -200,7 +200,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  {
>  	unsigned long extent, next, old_end;
>  	pmd_t *old_pmd, *new_pmd;
> -	bool need_flush = false;
>  	unsigned long mmun_start;	/* For mmu_notifiers */
>  	unsigned long mmun_end;		/* For mmu_notifiers */
>  
> @@ -231,8 +230,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  				if (need_rmap_locks)
>  					take_rmap_locks(vma);
>  				moved = move_huge_pmd(vma, old_addr, new_addr,
> -						    old_end, old_pmd, new_pmd,
> -						    &need_flush);
> +						    old_end, old_pmd, new_pmd);
>  				if (need_rmap_locks)
>  					drop_rmap_locks(vma);
>  				if (moved)
> @@ -250,10 +248,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
>  		if (extent > LATENCY_LIMIT)
>  			extent = LATENCY_LIMIT;
>  		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
> -			  new_pmd, new_addr, need_rmap_locks, &need_flush);
> +			  new_pmd, new_addr, need_rmap_locks);
>  	}
> -	if (need_flush)
> -		flush_tlb_range(vma, old_end-len, old_addr);
>  
>  	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
>
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a8a126259bc4..0bec79ae4c2d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -42,7 +42,7 @@  extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned char *vec);
 extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 			 unsigned long new_addr, unsigned long old_end,
-			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
+			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d8b3b7693818..768a4687e9eb 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1782,7 +1782,7 @@  static pmd_t move_soft_dirty_pmd(pmd_t pmd)
 
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, unsigned long old_end,
-		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+		  pmd_t *old_pmd, pmd_t *new_pmd)
 {
 	spinlock_t *old_ptl, *new_ptl;
 	pmd_t pmd;
@@ -1813,7 +1813,7 @@  bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
-		if (pmd_present(pmd) && pmd_dirty(pmd))
+		if (pmd_present(pmd))
 			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
@@ -1824,12 +1824,10 @@  bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		}
 		pmd = move_soft_dirty_pmd(pmd);
 		set_pmd_at(mm, new_addr, new_pmd, pmd);
-		if (new_ptl != old_ptl)
-			spin_unlock(new_ptl);
 		if (force_flush)
 			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
-		else
-			*need_flush = true;
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
 		spin_unlock(old_ptl);
 		return true;
 	}
diff --git a/mm/mremap.c b/mm/mremap.c
index 049470aa1e3e..88ceeb4ef817 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@  static pte_t move_soft_dirty_pte(pte_t pte)
 static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		unsigned long old_addr, unsigned long old_end,
 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
-		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
+		unsigned long new_addr, bool need_rmap_locks)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
 		/*
-		 * If we are remapping a dirty PTE, make sure
+		 * If we are remapping a valid PTE, make sure
 		 * to flush TLB before we drop the PTL for the
-		 * old PTE or we may race with page_mkclean().
+		 * PTE.
 		 *
-		 * This check has to be done after we removed the
-		 * old PTE from page tables or another thread may
-		 * dirty it after the check and before the removal.
+		 * NOTE! Both old and new PTL matter: the old one
+		 * for racing with page_mkclean(), the new one to
+		 * make sure the physical page stays valid until
+		 * the TLB entry for the old mapping has been
+		 * flushed.
 		 */
-		if (pte_present(pte) && pte_dirty(pte))
+		if (pte_present(pte))
 			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	}
 
 	arch_leave_lazy_mmu_mode();
+	if (force_flush)
+		flush_tlb_range(vma, old_end - len, old_end);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	pte_unmap(new_pte - 1);
-	if (force_flush)
-		flush_tlb_range(vma, old_end - len, old_end);
-	else
-		*need_flush = true;
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (need_rmap_locks)
 		drop_rmap_locks(vma);
@@ -200,7 +200,6 @@  unsigned long move_page_tables(struct vm_area_struct *vma,
 {
 	unsigned long extent, next, old_end;
 	pmd_t *old_pmd, *new_pmd;
-	bool need_flush = false;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -231,8 +230,7 @@  unsigned long move_page_tables(struct vm_area_struct *vma,
 				if (need_rmap_locks)
 					take_rmap_locks(vma);
 				moved = move_huge_pmd(vma, old_addr, new_addr,
-						    old_end, old_pmd, new_pmd,
-						    &need_flush);
+						    old_end, old_pmd, new_pmd);
 				if (need_rmap_locks)
 					drop_rmap_locks(vma);
 				if (moved)
@@ -250,10 +248,8 @@  unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (extent > LATENCY_LIMIT)
 			extent = LATENCY_LIMIT;
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
-			  new_pmd, new_addr, need_rmap_locks, &need_flush);
+			  new_pmd, new_addr, need_rmap_locks);
 	}
-	if (need_flush)
-		flush_tlb_range(vma, old_end-len, old_addr);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);