[Zesty,2/2] s390/mm: fix race on mm->context.flush_mm

Message ID 1505209542-17445-6-git-send-email-stefan.bader@canonical.com
State New
Headers show
Series
  • Untitled series #2645
Related show

Commit Message

Stefan Bader Sept. 12, 2017, 9:45 a.m.
From: Martin Schwidefsky <schwidefsky@de.ibm.com>

BugLink: http://bugs.launchpad.net/bugs/1708399

The order in __tlb_flush_mm_lazy is to flush TLB first and then clear
the mm->context.flush_mm bit. This can lead to missed flushes as the
bit can be set anytime, the order needs to be the other way aronud.

But this leads to a different race, __tlb_flush_mm_lazy may be called
on two CPUs concurrently. If mm->context.flush_mm is cleared first then
another CPU can bypass __tlb_flush_mm_lazy although the first CPU has
not done the flush yet. In a virtualized environment the time until the
flush is finally completed can be arbitrarily long.

Add a spinlock to serialize __tlb_flush_mm_lazy and use the function
in finish_arch_post_lock_switch as well.

Cc: <stable@vger.kernel.org>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
(cherry-picked from commit 60f07c8ec5fae06c23e9fd7bab67dabce92b3414 linux-next)
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
---
 arch/s390/include/asm/mmu.h         | 2 ++
 arch/s390/include/asm/mmu_context.h | 4 ++--
 arch/s390/include/asm/tlbflush.h    | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

Comments

Colin Ian King Sept. 12, 2017, 12:02 p.m. | #1
On 12/09/17 10:45, Stefan Bader wrote:
> From: Martin Schwidefsky <schwidefsky@de.ibm.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1708399
> 
> The order in __tlb_flush_mm_lazy is to flush TLB first and then clear
> the mm->context.flush_mm bit. This can lead to missed flushes as the
> bit can be set anytime, the order needs to be the other way aronud.
> 
> But this leads to a different race, __tlb_flush_mm_lazy may be called
> on two CPUs concurrently. If mm->context.flush_mm is cleared first then
> another CPU can bypass __tlb_flush_mm_lazy although the first CPU has
> not done the flush yet. In a virtualized environment the time until the
> flush is finally completed can be arbitrarily long.
> 
> Add a spinlock to serialize __tlb_flush_mm_lazy and use the function
> in finish_arch_post_lock_switch as well.
> 
> Cc: <stable@vger.kernel.org>
> Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
> (cherry-picked from commit 60f07c8ec5fae06c23e9fd7bab67dabce92b3414 linux-next)
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  arch/s390/include/asm/mmu.h         | 2 ++
>  arch/s390/include/asm/mmu_context.h | 4 ++--
>  arch/s390/include/asm/tlbflush.h    | 4 +++-
>  3 files changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
> index bd6f303..3525fe6 100644
> --- a/arch/s390/include/asm/mmu.h
> +++ b/arch/s390/include/asm/mmu.h
> @@ -5,6 +5,7 @@
>  #include <linux/errno.h>
>  
>  typedef struct {
> +	spinlock_t lock;
>  	cpumask_t cpu_attach_mask;
>  	atomic_t flush_count;
>  	unsigned int flush_mm;
> @@ -27,6 +28,7 @@ typedef struct {
>  } mm_context_t;
>  
>  #define INIT_MM_CONTEXT(name)						   \
> +	.context.lock =	__SPIN_LOCK_UNLOCKED(name.context.lock),	   \
>  	.context.pgtable_lock =						   \
>  			__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock),   \
>  	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
> diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
> index 8823e35..484efe8 100644
> --- a/arch/s390/include/asm/mmu_context.h
> +++ b/arch/s390/include/asm/mmu_context.h
> @@ -17,6 +17,7 @@
>  static inline int init_new_context(struct task_struct *tsk,
>  				   struct mm_struct *mm)
>  {
> +	spin_lock_init(&mm->context.lock);
>  	spin_lock_init(&mm->context.pgtable_lock);
>  	INIT_LIST_HEAD(&mm->context.pgtable_list);
>  	spin_lock_init(&mm->context.gmap_lock);
> @@ -121,8 +122,7 @@ static inline void finish_arch_post_lock_switch(void)
>  		while (atomic_read(&mm->context.flush_count))
>  			cpu_relax();
>  		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
> -		if (mm->context.flush_mm)
> -			__tlb_flush_mm(mm);
> +		__tlb_flush_mm_lazy(mm);
>  		preempt_enable();
>  	}
>  	set_fs(current->thread.mm_segment);
> diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
> index 16fe2a3..b08d5bc 100644
> --- a/arch/s390/include/asm/tlbflush.h
> +++ b/arch/s390/include/asm/tlbflush.h
> @@ -101,10 +101,12 @@ static inline void __tlb_flush_kernel(void)
>  
>  static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
>  {
> +	spin_lock(&mm->context.lock);
>  	if (mm->context.flush_mm) {
> -		__tlb_flush_mm(mm);
>  		mm->context.flush_mm = 0;
> +		__tlb_flush_mm(mm);
>  	}
> +	spin_unlock(&mm->context.lock);
>  }
>  
>  /*
> 
Acked-by: Colin Ian King <colin.king@canonical.com>

Patch

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bd6f303..3525fe6 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -5,6 +5,7 @@ 
 #include <linux/errno.h>
 
 typedef struct {
+	spinlock_t lock;
 	cpumask_t cpu_attach_mask;
 	atomic_t flush_count;
 	unsigned int flush_mm;
@@ -27,6 +28,7 @@  typedef struct {
 } mm_context_t;
 
 #define INIT_MM_CONTEXT(name)						   \
+	.context.lock =	__SPIN_LOCK_UNLOCKED(name.context.lock),	   \
 	.context.pgtable_lock =						   \
 			__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock),   \
 	.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 8823e35..484efe8 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -17,6 +17,7 @@ 
 static inline int init_new_context(struct task_struct *tsk,
 				   struct mm_struct *mm)
 {
+	spin_lock_init(&mm->context.lock);
 	spin_lock_init(&mm->context.pgtable_lock);
 	INIT_LIST_HEAD(&mm->context.pgtable_list);
 	spin_lock_init(&mm->context.gmap_lock);
@@ -121,8 +122,7 @@  static inline void finish_arch_post_lock_switch(void)
 		while (atomic_read(&mm->context.flush_count))
 			cpu_relax();
 		cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-		if (mm->context.flush_mm)
-			__tlb_flush_mm(mm);
+		__tlb_flush_mm_lazy(mm);
 		preempt_enable();
 	}
 	set_fs(current->thread.mm_segment);
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 16fe2a3..b08d5bc 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -101,10 +101,12 @@  static inline void __tlb_flush_kernel(void)
 
 static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
+	spin_lock(&mm->context.lock);
 	if (mm->context.flush_mm) {
-		__tlb_flush_mm(mm);
 		mm->context.flush_mm = 0;
+		__tlb_flush_mm(mm);
 	}
+	spin_unlock(&mm->context.lock);
 }
 
 /*