Message ID | 1544134538-20198-2-git-send-email-tyhicks@canonical.com |
---|---|
State | New |
Headers | show |
Series | CVE-2018-18281 - stale TLB entries via mremap() | expand |
On 06.12.18 23:15, Tyler Hicks wrote: > From: Linus Torvalds <torvalds@linux-foundation.org> > > Commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. > > This is a backport to stable 4.4.y. > > Jann Horn points out that our TLB flushing was subtly wrong for the > mremap() case. What makes mremap() special is that we don't follow the > usual "add page to list of pages to be freed, then flush tlb, and then > free pages". No, mremap() obviously just _moves_ the page from one page > table location to another. > > That matters, because mremap() thus doesn't directly control the > lifetime of the moved page with a freelist: instead, the lifetime of the > page is controlled by the page table locking, that serializes access to > the entry. > > As a result, we need to flush the TLB not just before releasing the lock > for the source location (to avoid any concurrent accesses to the entry), > but also before we release the destination page table lock (to avoid the > TLB being flushed after somebody else has already done something to that > page). > > This also makes the whole "need_flush" logic unnecessary, since we now > always end up flushing the TLB for every valid entry. > > Reported-and-tested-by: Jann Horn <jannh@google.com> > Acked-by: Will Deacon <will.deacon@arm.com> > Tested-by: Ingo Molnar <mingo@kernel.org> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > [will: backport to 4.4 stable] > Signed-off-by: Will Deacon <will.deacon@arm.com> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > > CVE-2018-18281 > > (backported from commit 2e3ae534fb98c7a6a5cf3e80a190181154328f80 linux-stable) > [tyhicks: Context change; pmdp_huge_get_and_clear() -> pmdp_get_and_clear()] > [tyhicks: Context change; i_mmap_rwsem -> i_mmap_mutex in a comment] > Signed-off-by: Tyler Hicks <tyhicks@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> > --- Looks about as the 4.4.y cherry pick and tested. > mm/huge_memory.c | 6 +++++- > mm/mremap.c | 21 ++++++++++++++++----- > 2 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 0ac4174cc690..fa27a34c82f0 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -1480,7 +1480,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > spinlock_t *old_ptl, *new_ptl; > int ret = 0; > pmd_t pmd; > - > + bool force_flush = false; > struct mm_struct *mm = vma->vm_mm; > > if ((old_addr & ~HPAGE_PMD_MASK) || > @@ -1508,6 +1508,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > if (new_ptl != old_ptl) > spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); > pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); > + if (pmd_present(pmd)) > + force_flush = true; > VM_BUG_ON(!pmd_none(*new_pmd)); > > if (pmd_move_must_withdraw(new_ptl, old_ptl)) { > @@ -1516,6 +1518,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > pgtable_trans_huge_deposit(mm, new_pmd, pgtable); > } > set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); > + if (force_flush) > + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > spin_unlock(old_ptl); > diff --git a/mm/mremap.c b/mm/mremap.c > index 05f1180e9f21..6b915164d136 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -97,6 +97,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, > struct mm_struct *mm = vma->vm_mm; > pte_t *old_pte, *new_pte, pte; > spinlock_t *old_ptl, *new_ptl; > + bool force_flush = false; > + unsigned long len = old_end - old_addr; > > /* > * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma > @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, > if (pte_none(*old_pte)) > continue; > pte = ptep_get_and_clear(mm, old_addr, old_pte); > + /* > + * If we are remapping a valid PTE, make sure > + * to flush TLB before we drop the PTL for the PTE. > + * > + * NOTE! Both old and new PTL matter: the old one > + * for racing with page_mkclean(), the new one to > + * make sure the physical page stays valid until > + * the TLB entry for the old mapping has been > + * flushed. > + */ > + if (pte_present(pte)) > + force_flush = true; > pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); > pte = move_soft_dirty_pte(pte); > set_pte_at(mm, new_addr, new_pte, pte); > } > > arch_leave_lazy_mmu_mode(); > + if (force_flush) > + flush_tlb_range(vma, old_end - len, old_end); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > pte_unmap(new_pte - 1); > @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > { > unsigned long extent, next, old_end; > pmd_t *old_pmd, *new_pmd; > - bool need_flush = false; > unsigned long mmun_start; /* For mmu_notifiers */ > unsigned long mmun_end; /* For mmu_notifiers */ > > @@ -206,7 +221,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > anon_vma_unlock_write(vma->anon_vma); > } > if (err > 0) { > - need_flush = true; > continue; > } else if (!err) { > split_huge_page_pmd(vma, old_addr, old_pmd); > @@ -223,10 +237,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > extent = LATENCY_LIMIT; > move_ptes(vma, old_pmd, old_addr, old_addr + extent, > new_vma, new_pmd, new_addr, need_rmap_locks); > - need_flush = true; > } > - if (likely(need_flush)) > - flush_tlb_range(vma, old_end-len, old_addr); > > mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); > >
On 12/6/18 11:15 PM, Tyler Hicks wrote: > From: Linus Torvalds <torvalds@linux-foundation.org> > > Commit eb66ae030829605d61fbef1909ce310e29f78821 upstream. > > This is a backport to stable 4.4.y. > > Jann Horn points out that our TLB flushing was subtly wrong for the > mremap() case. What makes mremap() special is that we don't follow the > usual "add page to list of pages to be freed, then flush tlb, and then > free pages". No, mremap() obviously just _moves_ the page from one page > table location to another. > > That matters, because mremap() thus doesn't directly control the > lifetime of the moved page with a freelist: instead, the lifetime of the > page is controlled by the page table locking, that serializes access to > the entry. > > As a result, we need to flush the TLB not just before releasing the lock > for the source location (to avoid any concurrent accesses to the entry), > but also before we release the destination page table lock (to avoid the > TLB being flushed after somebody else has already done something to that > page). > > This also makes the whole "need_flush" logic unnecessary, since we now > always end up flushing the TLB for every valid entry. > > Reported-and-tested-by: Jann Horn <jannh@google.com> > Acked-by: Will Deacon <will.deacon@arm.com> > Tested-by: Ingo Molnar <mingo@kernel.org> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > [will: backport to 4.4 stable] > Signed-off-by: Will Deacon <will.deacon@arm.com> > Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > > CVE-2018-18281 > > (backported from commit 2e3ae534fb98c7a6a5cf3e80a190181154328f80 linux-stable) > [tyhicks: Context change; pmdp_huge_get_and_clear() -> pmdp_get_and_clear()] > [tyhicks: Context change; i_mmap_rwsem -> i_mmap_mutex in a comment] > Signed-off-by: Tyler Hicks <tyhicks@canonical.com> Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> > --- > mm/huge_memory.c | 6 +++++- > mm/mremap.c | 21 ++++++++++++++++----- > 2 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 0ac4174cc690..fa27a34c82f0 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -1480,7 +1480,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > spinlock_t *old_ptl, *new_ptl; > int ret = 0; > pmd_t pmd; > - > + bool force_flush = false; > struct mm_struct *mm = vma->vm_mm; > > if ((old_addr & ~HPAGE_PMD_MASK) || > @@ -1508,6 +1508,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > if (new_ptl != old_ptl) > spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); > pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); > + if (pmd_present(pmd)) > + force_flush = true; > VM_BUG_ON(!pmd_none(*new_pmd)); > > if (pmd_move_must_withdraw(new_ptl, old_ptl)) { > @@ -1516,6 +1518,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, > pgtable_trans_huge_deposit(mm, new_pmd, pgtable); > } > set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); > + if (force_flush) > + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > spin_unlock(old_ptl); > diff --git a/mm/mremap.c b/mm/mremap.c > index 05f1180e9f21..6b915164d136 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -97,6 +97,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, > struct mm_struct *mm = vma->vm_mm; > pte_t *old_pte, *new_pte, pte; > spinlock_t *old_ptl, *new_ptl; > + bool force_flush = false; > + unsigned long len = old_end - old_addr; > > /* > * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma > @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, > if (pte_none(*old_pte)) > continue; > pte = ptep_get_and_clear(mm, old_addr, old_pte); > + /* > + * If we are remapping a valid PTE, make sure > + * to flush TLB before we drop the PTL for the PTE. > + * > + * NOTE! Both old and new PTL matter: the old one > + * for racing with page_mkclean(), the new one to > + * make sure the physical page stays valid until > + * the TLB entry for the old mapping has been > + * flushed. > + */ > + if (pte_present(pte)) > + force_flush = true; > pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); > pte = move_soft_dirty_pte(pte); > set_pte_at(mm, new_addr, new_pte, pte); > } > > arch_leave_lazy_mmu_mode(); > + if (force_flush) > + flush_tlb_range(vma, old_end - len, old_end); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > pte_unmap(new_pte - 1); > @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > { > unsigned long extent, next, old_end; > pmd_t *old_pmd, *new_pmd; > - bool need_flush = false; > unsigned long mmun_start; /* For mmu_notifiers */ > unsigned long mmun_end; /* For mmu_notifiers */ > > @@ -206,7 +221,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > anon_vma_unlock_write(vma->anon_vma); > } > if (err > 0) { > - need_flush = true; > continue; > } else if (!err) { > split_huge_page_pmd(vma, old_addr, old_pmd); > @@ -223,10 +237,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, > extent = LATENCY_LIMIT; > move_ptes(vma, old_pmd, old_addr, old_addr + extent, > new_vma, new_pmd, new_addr, need_rmap_locks); > - need_flush = true; > } > - if (likely(need_flush)) > - flush_tlb_range(vma, old_end-len, old_addr); > > mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end); >
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0ac4174cc690..fa27a34c82f0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1480,7 +1480,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; - + bool force_flush = false; struct mm_struct *mm = vma->vm_mm; if ((old_addr & ~HPAGE_PMD_MASK) || @@ -1508,6 +1508,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); + if (pmd_present(pmd)) + force_flush = true; VM_BUG_ON(!pmd_none(*new_pmd)); if (pmd_move_must_withdraw(new_ptl, old_ptl)) { @@ -1516,6 +1518,8 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pgtable_trans_huge_deposit(mm, new_pmd, pgtable); } set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); + if (force_flush) + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); diff --git a/mm/mremap.c b/mm/mremap.c index 05f1180e9f21..6b915164d136 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -97,6 +97,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + bool force_flush = false; + unsigned long len = old_end - old_addr; /* * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma @@ -143,12 +145,26 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, if (pte_none(*old_pte)) continue; pte = ptep_get_and_clear(mm, old_addr, old_pte); + /* + * If we are remapping a valid PTE, make sure + * to flush TLB before we drop the PTL for the PTE. + * + * NOTE! Both old and new PTL matter: the old one + * for racing with page_mkclean(), the new one to + * make sure the physical page stays valid until + * the TLB entry for the old mapping has been + * flushed. + */ + if (pte_present(pte)) + force_flush = true; pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); pte = move_soft_dirty_pte(pte); set_pte_at(mm, new_addr, new_pte, pte); } arch_leave_lazy_mmu_mode(); + if (force_flush) + flush_tlb_range(vma, old_end - len, old_end); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap(new_pte - 1); @@ -168,7 +184,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, { unsigned long extent, next, old_end; pmd_t *old_pmd, *new_pmd; - bool need_flush = false; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -206,7 +221,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma, anon_vma_unlock_write(vma->anon_vma); } if (err > 0) { - need_flush = true; continue; } else if (!err) { split_huge_page_pmd(vma, old_addr, old_pmd); @@ -223,10 +237,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, extent = LATENCY_LIMIT; move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma, new_pmd, new_addr, need_rmap_locks); - need_flush = true; } - if (likely(need_flush)) - flush_tlb_range(vma, old_end-len, old_addr); mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);