diff mbox

[3.19.y-ckt,stable] Patch "x86/mce: Reenable CMCI banks when swiching back to interrupt mode" has been added to staging queue

Message ID 1445294352-14605-1-git-send-email-kamal@canonical.com
State New
Headers show

Commit Message

Kamal Mostafa Oct. 19, 2015, 10:39 p.m. UTC
This is a note to let you know that I have just added a patch titled

    x86/mce: Reenable CMCI banks when swiching back to interrupt mode

to the linux-3.19.y-queue branch of the 3.19.y-ckt extended stable tree 
which can be found at:

    http://kernel.ubuntu.com/git/ubuntu/linux.git/log/?h=linux-3.19.y-queue

This patch is scheduled to be released in version 3.19.8-ckt8.

If you, or anyone else, feels it should not be added to this tree, please 
reply to this email.

For more information about the 3.19.y-ckt tree, see
https://wiki.ubuntu.com/Kernel/Dev/ExtendedStable

Thanks.
-Kamal

------

From 6bb35d219656f1eb52dabe71544eeb178aadbb68 Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Wed, 12 Aug 2015 18:29:41 +0200
Subject: x86/mce: Reenable CMCI banks when swiching back to interrupt mode

commit 1b48465500611a2dc5e75800c61ac352e22d41c3 upstream.

Zhang Liguang reported the following issue:

1) System detects a CMCI storm on the current CPU.

2) Kernel disables the CMCI interrupt on banks owned by the
   current CPU and switches to poll mode

3) After the CMCI storm subsides, kernel switches back to
   interrupt mode

4) We expect the system to reenable the CMCI interrupt on banks
   owned by the current CPU

   mce_intel_adjust_timer
   |-> cmci_reenable
       |-> cmci_discover     # owned banks are ignored here

  static void cmci_discover(int banks)
	...
	for (i = 0; i < banks; i++) {
		...
		if (test_bit(i, owned))	# ownd banks is ignore here
			continue;

So convert cmci_storm_disable_banks() to
cmci_toggle_interrupt_mode() which controls whether to enable or
disable CMCI interrupts with its argument.

NB: We cannot clear the owned bit because the banks won't be
polled, otherwise. See:

  27f6c573e0f7 ("x86, CMCI: Add proper detection of end of CMCI storms")

for more info.

Reported-by: Zhang Liguang <zhangliguang@huawei.com>
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: huawei.libin@huawei.com
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: rui.xiang@huawei.com
Link: http://lkml.kernel.org/r/1439396985-12812-10-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
---
 arch/x86/kernel/cpu/mcheck/mce_intel.c | 41 +++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 18 deletions(-)

--
1.9.1
diff mbox

Patch

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index b3c97ba..c5b971f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -97,6 +97,27 @@  void mce_intel_hcpu_update(unsigned long cpu)
 	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
 }

+static void cmci_toggle_interrupt_mode(bool on)
+{
+	unsigned long flags, *owned;
+	int bank;
+	u64 val;
+
+	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+	owned = this_cpu_ptr(mce_banks_owned);
+	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
+		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
+
+		if (on)
+			val |= MCI_CTL2_CMCI_EN;
+		else
+			val &= ~MCI_CTL2_CMCI_EN;
+
+		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
+	}
+	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+}
+
 unsigned long mce_intel_adjust_timer(unsigned long interval)
 {
 	int r;
@@ -125,7 +146,7 @@  unsigned long mce_intel_adjust_timer(unsigned long interval)
 		 */
 		if (!atomic_read(&cmci_storm_on_cpus)) {
 			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
-			cmci_reenable();
+			cmci_toggle_interrupt_mode(true);
 			cmci_recheck();
 		}
 		return CMCI_POLL_INTERVAL;
@@ -138,22 +159,6 @@  unsigned long mce_intel_adjust_timer(unsigned long interval)
 	}
 }

-static void cmci_storm_disable_banks(void)
-{
-	unsigned long flags, *owned;
-	int bank;
-	u64 val;
-
-	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
-	owned = this_cpu_ptr(mce_banks_owned);
-	for_each_set_bit(bank, owned, MAX_NR_BANKS) {
-		rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
-		val &= ~MCI_CTL2_CMCI_EN;
-		wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
-	}
-	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-}
-
 static bool cmci_storm_detect(void)
 {
 	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -175,7 +180,7 @@  static bool cmci_storm_detect(void)
 	if (cnt <= CMCI_STORM_THRESHOLD)
 		return false;

-	cmci_storm_disable_banks();
+	cmci_toggle_interrupt_mode(false);
 	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
 	r = atomic_add_return(1, &cmci_storm_on_cpus);
 	mce_timer_kick(CMCI_POLL_INTERVAL);