diff mbox

[Lucid-ec2] SRU: xen: events: do not unmask event channels on resume

Message ID 1295443222-11942-1-git-send-email-stefan.bader@canonical.com
State Accepted
Delegated to: Brad Figg
Headers show

Commit Message

Stefan Bader Jan. 19, 2011, 1:20 p.m. UTC
SRU Justification:

Impact: With the current ec2 kernels the kernel oops described below is
experienced as a result of enabling interrupts on the pv spinlock event
channel.

Fix: The following patch is taken from upstream and is included in 2.6.37.
It has been reported to successfully prevent the oops.

Testcase: Migration of a guest (using suspend)

---

From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Mon, 1 Nov 2010 16:30:09 +0000
Subject: [PATCH] xen: events: do not unmask event channels on resume

The IRQ core code will take care of disabling and reenabling
interrupts over suspend resume automatically, therefore we do not need
to do this in the Xen event channel code.

The only exception is those event channels marked IRQF_NO_SUSPEND
which the IRQ core ignores. We must unmask these ourselves, taking
care to obey the current IRQ_DISABLED status. Failure check for
IRQ_DISABLED leads to enabling polled only event channels, such as
that associated with the pv spinlocks, which must never be enabled:

[   21.970432] ------------[ cut here ]------------
[   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
[   21.970432] invalid opcode: 0000 [#1] SMP
[   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
[   21.970432] Modules linked in:
[   21.970432]
[   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
[   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
[   21.970432] EIP is at dummy_handler+0x3/0x7
[   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
[   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
[   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
[   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
[   21.970432] Stack:
[   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
[   21.970432] <0> dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
[   21.970432] <0> 00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
[   21.970432] Call Trace:
[   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
[   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
[   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
[   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
[   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
[   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
[   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
[   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
[   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
[   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
[   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
[   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
[   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
[   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
[   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
[   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
[   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5 <0f> 0b \
eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
[   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
[   21.970432] ---[ end trace c0b71f7e12cf3011 ]---

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

BugLink: http://bugs.launchpad.net/bugs/681083

(cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
---
 drivers/xen/events.c |   25 ++++++++++++++++++-------
 1 files changed, 18 insertions(+), 7 deletions(-)

Comments

Tim Gardner Jan. 19, 2011, 1:51 p.m. UTC | #1
On 01/19/2011 06:20 AM, Stefan Bader wrote:
> SRU Justification:
>
> Impact: With the current ec2 kernels the kernel oops described below is
> experienced as a result of enabling interrupts on the pv spinlock event
> channel.
>
> Fix: The following patch is taken from upstream and is included in 2.6.37.
> It has been reported to successfully prevent the oops.
>
> Testcase: Migration of a guest (using suspend)
>
> ---
>
>  From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
> From: Ian Campbell<ian.campbell@citrix.com>
> Date: Mon, 1 Nov 2010 16:30:09 +0000
> Subject: [PATCH] xen: events: do not unmask event channels on resume
>
> The IRQ core code will take care of disabling and reenabling
> interrupts over suspend resume automatically, therefore we do not need
> to do this in the Xen event channel code.
>
> The only exception is those event channels marked IRQF_NO_SUSPEND
> which the IRQ core ignores. We must unmask these ourselves, taking
> care to obey the current IRQ_DISABLED status. Failure check for
> IRQ_DISABLED leads to enabling polled only event channels, such as
> that associated with the pv spinlocks, which must never be enabled:
>
> [   21.970432] ------------[ cut here ]------------
> [   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
> [   21.970432] invalid opcode: 0000 [#1] SMP
> [   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
> [   21.970432] Modules linked in:
> [   21.970432]
> [   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
> [   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
> [   21.970432] EIP is at dummy_handler+0x3/0x7
> [   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
> [   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
> [   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
> [   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
> [   21.970432] Stack:
> [   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
> [   21.970432]<0>  dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
> [   21.970432]<0>  00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
> [   21.970432] Call Trace:
> [   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
> [   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
> [   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
> [   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
> [   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
> [   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
> [   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
> [   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
> [   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
> [   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
> [   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
> [   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
> [   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
> [   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
> [   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
> [   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
> [   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
> c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5<0f>  0b \
> eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
> [   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
> [   21.970432] ---[ end trace c0b71f7e12cf3011 ]---
>
> Signed-off-by: Ian Campbell<ian.campbell@citrix.com>
> Signed-off-by: Jeremy Fitzhardinge<jeremy.fitzhardinge@citrix.com>
>
> BugLink: http://bugs.launchpad.net/bugs/681083
>
> (cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
> Signed-off-by: Stefan Bader<stefan.bader@canonical.com>
> ---
>   drivers/xen/events.c |   25 ++++++++++++++++++-------
>   1 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 97612f5..321a0c8 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu)
>   		evtchn_to_irq[evtchn] = irq;
>   		irq_info[irq] = mk_virq_info(evtchn, virq);
>   		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
>   	}
>   }
>
> @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu)
>   		evtchn_to_irq[evtchn] = irq;
>   		irq_info[irq] = mk_ipi_info(evtchn, ipi);
>   		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
> -
>   	}
>   }
>
> @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq)
>   void xen_irq_resume(void)
>   {
>   	unsigned int cpu, irq, evtchn;
> +	struct irq_desc *desc;
>
>   	init_evtchn_cpu_bindings();
>
> @@ -1408,6 +1402,23 @@ void xen_irq_resume(void)
>   		restore_cpu_virqs(cpu);
>   		restore_cpu_ipis(cpu);
>   	}
> +
> +	/*
> +	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
> +	 * are not handled by the IRQ core.
> +	 */
> +	for_each_irq_desc(irq, desc) {
> +		if (!desc->action || !(desc->action->flags&  IRQF_NO_SUSPEND))
> +			continue;
> +		if (desc->status&  IRQ_DISABLED)
> +			continue;
> +
> +		evtchn = evtchn_from_irq(irq);
> +		if (evtchn == -1)
> +			continue;
> +
> +		unmask_evtchn(evtchn);
> +	}
>   }
>
>   static struct irq_chip xen_dynamic_chip __read_mostly = {

Isolated and easily tested.

Acked-by: Tim Gardner <tim.gardner@canonical.com>
Andy Whitcroft Jan. 19, 2011, 2:05 p.m. UTC | #2
On Wed, Jan 19, 2011 at 02:20:22PM +0100, Stefan Bader wrote:
> SRU Justification:
> 
> Impact: With the current ec2 kernels the kernel oops described below is
> experienced as a result of enabling interrupts on the pv spinlock event
> channel.
> 
> Fix: The following patch is taken from upstream and is included in 2.6.37.
> It has been reported to successfully prevent the oops.
> 
> Testcase: Migration of a guest (using suspend)
> 
> ---
> 
> From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
> From: Ian Campbell <ian.campbell@citrix.com>
> Date: Mon, 1 Nov 2010 16:30:09 +0000
> Subject: [PATCH] xen: events: do not unmask event channels on resume
> 
> The IRQ core code will take care of disabling and reenabling
> interrupts over suspend resume automatically, therefore we do not need
> to do this in the Xen event channel code.
> 
> The only exception is those event channels marked IRQF_NO_SUSPEND
> which the IRQ core ignores. We must unmask these ourselves, taking
> care to obey the current IRQ_DISABLED status. Failure check for
> IRQ_DISABLED leads to enabling polled only event channels, such as
> that associated with the pv spinlocks, which must never be enabled:
> 
> [   21.970432] ------------[ cut here ]------------
> [   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
> [   21.970432] invalid opcode: 0000 [#1] SMP
> [   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
> [   21.970432] Modules linked in:
> [   21.970432]
> [   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
> [   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
> [   21.970432] EIP is at dummy_handler+0x3/0x7
> [   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
> [   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
> [   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
> [   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
> [   21.970432] Stack:
> [   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
> [   21.970432] <0> dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
> [   21.970432] <0> 00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
> [   21.970432] Call Trace:
> [   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
> [   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
> [   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
> [   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
> [   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
> [   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
> [   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
> [   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
> [   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
> [   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
> [   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
> [   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
> [   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
> [   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
> [   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
> [   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
> [   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
> c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5 <0f> 0b \
> eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
> [   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
> [   21.970432] ---[ end trace c0b71f7e12cf3011 ]---
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/681083
> 
> (cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  drivers/xen/events.c |   25 ++++++++++++++++++-------
>  1 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 97612f5..321a0c8 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_virq_info(evtchn, virq);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
>  	}
>  }
>  
> @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_ipi_info(evtchn, ipi);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
> -
>  	}
>  }
>  
> @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq)
>  void xen_irq_resume(void)
>  {
>  	unsigned int cpu, irq, evtchn;
> +	struct irq_desc *desc;
>  
>  	init_evtchn_cpu_bindings();
>  
> @@ -1408,6 +1402,23 @@ void xen_irq_resume(void)
>  		restore_cpu_virqs(cpu);
>  		restore_cpu_ipis(cpu);
>  	}
> +
> +	/*
> +	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
> +	 * are not handled by the IRQ core.
> +	 */
> +	for_each_irq_desc(irq, desc) {
> +		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
> +			continue;
> +		if (desc->status & IRQ_DISABLED)
> +			continue;
> +
> +		evtchn = evtchn_from_irq(irq);
> +		if (evtchn == -1)
> +			continue;
> +
> +		unmask_evtchn(evtchn);
> +	}
>  }
>  
>  static struct irq_chip xen_dynamic_chip __read_mostly = {

Seems sane, only affects ec2, and indeed ec2 is a branch so low risk.
Has been tested by submitter.

Acked-by: Andy Whitcroft <apw@canonical.com>

-apw
Brad Figg Jan. 19, 2011, 9:04 p.m. UTC | #3
On 01/19/2011 05:20 AM, Stefan Bader wrote:
> SRU Justification:
>
> Impact: With the current ec2 kernels the kernel oops described below is
> experienced as a result of enabling interrupts on the pv spinlock event
> channel.
>
> Fix: The following patch is taken from upstream and is included in 2.6.37.
> It has been reported to successfully prevent the oops.
>
> Testcase: Migration of a guest (using suspend)
>
> ---
>
>  From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
> From: Ian Campbell<ian.campbell@citrix.com>
> Date: Mon, 1 Nov 2010 16:30:09 +0000
> Subject: [PATCH] xen: events: do not unmask event channels on resume
>
> The IRQ core code will take care of disabling and reenabling
> interrupts over suspend resume automatically, therefore we do not need
> to do this in the Xen event channel code.
>
> The only exception is those event channels marked IRQF_NO_SUSPEND
> which the IRQ core ignores. We must unmask these ourselves, taking
> care to obey the current IRQ_DISABLED status. Failure check for
> IRQ_DISABLED leads to enabling polled only event channels, such as
> that associated with the pv spinlocks, which must never be enabled:
>
> [   21.970432] ------------[ cut here ]------------
> [   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
> [   21.970432] invalid opcode: 0000 [#1] SMP
> [   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
> [   21.970432] Modules linked in:
> [   21.970432]
> [   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
> [   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
> [   21.970432] EIP is at dummy_handler+0x3/0x7
> [   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
> [   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
> [   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
> [   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
> [   21.970432] Stack:
> [   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
> [   21.970432]<0>  dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
> [   21.970432]<0>  00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
> [   21.970432] Call Trace:
> [   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
> [   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
> [   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
> [   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
> [   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
> [   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
> [   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
> [   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
> [   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
> [   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
> [   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
> [   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
> [   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
> [   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
> [   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
> [   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
> [   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
> c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5<0f>  0b \
> eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
> [   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
> [   21.970432] ---[ end trace c0b71f7e12cf3011 ]---
>
> Signed-off-by: Ian Campbell<ian.campbell@citrix.com>
> Signed-off-by: Jeremy Fitzhardinge<jeremy.fitzhardinge@citrix.com>
>
> BugLink: http://bugs.launchpad.net/bugs/681083
>
> (cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
> Signed-off-by: Stefan Bader<stefan.bader@canonical.com>
> ---
>   drivers/xen/events.c |   25 ++++++++++++++++++-------
>   1 files changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 97612f5..321a0c8 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu)
>   		evtchn_to_irq[evtchn] = irq;
>   		irq_info[irq] = mk_virq_info(evtchn, virq);
>   		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
>   	}
>   }
>
> @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu)
>   		evtchn_to_irq[evtchn] = irq;
>   		irq_info[irq] = mk_ipi_info(evtchn, ipi);
>   		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
> -
>   	}
>   }
>
> @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq)
>   void xen_irq_resume(void)
>   {
>   	unsigned int cpu, irq, evtchn;
> +	struct irq_desc *desc;
>
>   	init_evtchn_cpu_bindings();
>
> @@ -1408,6 +1402,23 @@ void xen_irq_resume(void)
>   		restore_cpu_virqs(cpu);
>   		restore_cpu_ipis(cpu);
>   	}
> +
> +	/*
> +	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
> +	 * are not handled by the IRQ core.
> +	 */
> +	for_each_irq_desc(irq, desc) {
> +		if (!desc->action || !(desc->action->flags&  IRQF_NO_SUSPEND))
> +			continue;
> +		if (desc->status&  IRQ_DISABLED)
> +			continue;
> +
> +		evtchn = evtchn_from_irq(irq);
> +		if (evtchn == -1)
> +			continue;
> +
> +		unmask_evtchn(evtchn);
> +	}
>   }
>
>   static struct irq_chip xen_dynamic_chip __read_mostly = {

Applied and pushed to Lucid ec2.
Stefan Bader March 15, 2011, 1:22 p.m. UTC | #4
Unfortunately I have to bring this up again. The problem is that the combination
of Xen and Lucid automatically triggers the connection with the ec2 topic branch.
But this patch has absolutely no effect in the ec2 topic branch as the file is
inactive due to configuration options. It only modifies xen driver code, so
regression for our normal builds should be nil.
But for people taking the master branch and building xen guest kernels it helps,
so we could consider moving the patch from the ec2 branch to our lucid master
branch for convenience.
(If the ec2 branch needs something similar it will take reasonably more effort
because the layout of the files used and the code looks much different).

-Stefan

On 01/19/2011 02:20 PM, Stefan Bader wrote:
> SRU Justification:
> 
> Impact: With the current ec2 kernels the kernel oops described below is
> experienced as a result of enabling interrupts on the pv spinlock event
> channel.
> 
> Fix: The following patch is taken from upstream and is included in 2.6.37.
> It has been reported to successfully prevent the oops.
> 
> Testcase: Migration of a guest (using suspend)
> 
> ---
> 
> From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
> From: Ian Campbell <ian.campbell@citrix.com>
> Date: Mon, 1 Nov 2010 16:30:09 +0000
> Subject: [PATCH] xen: events: do not unmask event channels on resume
> 
> The IRQ core code will take care of disabling and reenabling
> interrupts over suspend resume automatically, therefore we do not need
> to do this in the Xen event channel code.
> 
> The only exception is those event channels marked IRQF_NO_SUSPEND
> which the IRQ core ignores. We must unmask these ourselves, taking
> care to obey the current IRQ_DISABLED status. Failure check for
> IRQ_DISABLED leads to enabling polled only event channels, such as
> that associated with the pv spinlocks, which must never be enabled:
> 
> [   21.970432] ------------[ cut here ]------------
> [   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
> [   21.970432] invalid opcode: 0000 [#1] SMP
> [   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
> [   21.970432] Modules linked in:
> [   21.970432]
> [   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
> [   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
> [   21.970432] EIP is at dummy_handler+0x3/0x7
> [   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
> [   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
> [   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
> [   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
> [   21.970432] Stack:
> [   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
> [   21.970432] <0> dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
> [   21.970432] <0> 00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
> [   21.970432] Call Trace:
> [   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
> [   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
> [   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
> [   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
> [   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
> [   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
> [   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
> [   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
> [   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
> [   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
> [   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
> [   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
> [   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
> [   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
> [   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
> [   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
> [   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
> c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5 <0f> 0b \
> eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
> [   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
> [   21.970432] ---[ end trace c0b71f7e12cf3011 ]---
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/681083
> 
> (cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  drivers/xen/events.c |   25 ++++++++++++++++++-------
>  1 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 97612f5..321a0c8 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_virq_info(evtchn, virq);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
>  	}
>  }
>  
> @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_ipi_info(evtchn, ipi);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
> -
>  	}
>  }
>  
> @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq)
>  void xen_irq_resume(void)
>  {
>  	unsigned int cpu, irq, evtchn;
> +	struct irq_desc *desc;
>  
>  	init_evtchn_cpu_bindings();
>  
> @@ -1408,6 +1402,23 @@ void xen_irq_resume(void)
>  		restore_cpu_virqs(cpu);
>  		restore_cpu_ipis(cpu);
>  	}
> +
> +	/*
> +	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
> +	 * are not handled by the IRQ core.
> +	 */
> +	for_each_irq_desc(irq, desc) {
> +		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
> +			continue;
> +		if (desc->status & IRQ_DISABLED)
> +			continue;
> +
> +		evtchn = evtchn_from_irq(irq);
> +		if (evtchn == -1)
> +			continue;
> +
> +		unmask_evtchn(evtchn);
> +	}
>  }
>  
>  static struct irq_chip xen_dynamic_chip __read_mostly = {
Stefan Bader April 6, 2011, 3:40 p.m. UTC | #5
On 01/19/2011 02:20 PM, Stefan Bader wrote:
> SRU Justification:
> 
> Impact: With the current ec2 kernels the kernel oops described below is
> experienced as a result of enabling interrupts on the pv spinlock event
> channel.
> 
> Fix: The following patch is taken from upstream and is included in 2.6.37.
> It has been reported to successfully prevent the oops.
> 
> Testcase: Migration of a guest (using suspend)
> 

I just have added that patch to the lucid master-next branch. Currently it is in
the ec2 branch but I'll drop it there on the next rebase. This only has effect
for people using the generic kernel as a paravirt guest in Xen. So it never was
doing any good in the ec2 branch anyway.

Stefan

> ---
> 
> From 6903591f314b8947d0e362bda7715e90eb9df75e Mon Sep 17 00:00:00 2001
> From: Ian Campbell <ian.campbell@citrix.com>
> Date: Mon, 1 Nov 2010 16:30:09 +0000
> Subject: [PATCH] xen: events: do not unmask event channels on resume
> 
> The IRQ core code will take care of disabling and reenabling
> interrupts over suspend resume automatically, therefore we do not need
> to do this in the Xen event channel code.
> 
> The only exception is those event channels marked IRQF_NO_SUSPEND
> which the IRQ core ignores. We must unmask these ourselves, taking
> care to obey the current IRQ_DISABLED status. Failure check for
> IRQ_DISABLED leads to enabling polled only event channels, such as
> that associated with the pv spinlocks, which must never be enabled:
> 
> [   21.970432] ------------[ cut here ]------------
> [   21.970432] kernel BUG at arch/x86/xen/spinlock.c:343!
> [   21.970432] invalid opcode: 0000 [#1] SMP
> [   21.970432] last sysfs file: /sys/devices/virtual/net/lo/operstate
> [   21.970432] Modules linked in:
> [   21.970432]
> [   21.970432] Pid: 0, comm: swapper Not tainted (2.6.32.24-x86_32p-xen-01034-g787c727 #34)
> [   21.970432] EIP: 0061:[<c102e209>] EFLAGS: 00010046 CPU: 3
> [   21.970432] EIP is at dummy_handler+0x3/0x7
> [   21.970432] EAX: 0000021c EBX: dfc16880 ECX: 0000001a EDX: 00000000
> [   21.970432] ESI: dfc02c00 EDI: 00000001 EBP: dfc47e10 ESP: dfc47e10
> [   21.970432]  DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069
> [   21.970432] Process swapper (pid: 0, ti=dfc46000 task=dfc39440 task.ti=dfc46000)
> [   21.970432] Stack:
> [   21.970432]  dfc47e30 c10a39f0 0000021c 00000000 00000000 dfc16880 0000021c 00000001
> [   21.970432] <0> dfc47e40 c10a4f08 0000021c 00000000 dfc47e78 c12240a7 c1839284 c1839284
> [   21.970432] <0> 00000200 00000000 00000000 f5720000 c1f3d028 c1f3d02c 00000180 dfc47e90
> [   21.970432] Call Trace:
> [   21.970432]  [<c10a39f0>] ? handle_IRQ_event+0x5f/0x122
> [   21.970432]  [<c10a4f08>] ? handle_percpu_irq+0x2f/0x55
> [   21.970432]  [<c12240a7>] ? __xen_evtchn_do_upcall+0xdb/0x15f
> [   21.970432]  [<c122481e>] ? xen_evtchn_do_upcall+0x20/0x30
> [   21.970432]  [<c1030d47>] ? xen_do_upcall+0x7/0xc
> [   21.970432]  [<c102007b>] ? apic_reg_read+0xd3/0x22d
> [   21.970432]  [<c1002227>] ? hypercall_page+0x227/0x1005
> [   21.970432]  [<c102d30b>] ? xen_force_evtchn_callback+0xf/0x14
> [   21.970432]  [<c102da7c>] ? check_events+0x8/0xc
> [   21.970432]  [<c102da3b>] ? xen_irq_enable_direct_end+0x0/0x1
> [   21.970432]  [<c105e485>] ? finish_task_switch+0x62/0xba
> [   21.970432]  [<c14e3f84>] ? schedule+0x808/0x89d
> [   21.970432]  [<c1084dc5>] ? hrtimer_start_expires+0x1a/0x22
> [   21.970432]  [<c1085154>] ? tick_nohz_restart_sched_tick+0x15a/0x162
> [   21.970432]  [<c102f43a>] ? cpu_idle+0x6d/0x6f
> [   21.970432]  [<c14db29e>] ? cpu_bringup_and_idle+0xd/0xf
> [   21.970432] Code: 5d 0f 95 c0 0f b6 c0 c3 55 66 83 78 02 00 89 e5 5d 0f 95 \
> c0 0f b6 c0 c3 55 b2 01 86 10 31 c0 84 d2 89 e5 0f 94 c0 5d c3 55 89 e5 <0f> 0b \
> eb fe 55 80 3d 4c ce 84 c1 00 89 e5 57 56 89 c6 53 74 15
> [   21.970432] EIP: [<c102e209>] dummy_handler+0x3/0x7 SS:ESP 0069:dfc47e10
> [   21.970432] ---[ end trace c0b71f7e12cf3011 ]---
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/681083
> 
> (cherry-picked from commit 6903591f314b8947d0e362bda7715e90eb9df75e upstream)
> Signed-off-by: Stefan Bader <stefan.bader@canonical.com>
> ---
>  drivers/xen/events.c |   25 ++++++++++++++++++-------
>  1 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/xen/events.c b/drivers/xen/events.c
> index 97612f5..321a0c8 100644
> --- a/drivers/xen/events.c
> +++ b/drivers/xen/events.c
> @@ -1299,9 +1299,6 @@ static void restore_cpu_virqs(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_virq_info(evtchn, virq);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
>  	}
>  }
>  
> @@ -1327,10 +1324,6 @@ static void restore_cpu_ipis(unsigned int cpu)
>  		evtchn_to_irq[evtchn] = irq;
>  		irq_info[irq] = mk_ipi_info(evtchn, ipi);
>  		bind_evtchn_to_cpu(evtchn, cpu);
> -
> -		/* Ready for use. */
> -		unmask_evtchn(evtchn);
> -
>  	}
>  }
>  
> @@ -1390,6 +1383,7 @@ void xen_poll_irq(int irq)
>  void xen_irq_resume(void)
>  {
>  	unsigned int cpu, irq, evtchn;
> +	struct irq_desc *desc;
>  
>  	init_evtchn_cpu_bindings();
>  
> @@ -1408,6 +1402,23 @@ void xen_irq_resume(void)
>  		restore_cpu_virqs(cpu);
>  		restore_cpu_ipis(cpu);
>  	}
> +
> +	/*
> +	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
> +	 * are not handled by the IRQ core.
> +	 */
> +	for_each_irq_desc(irq, desc) {
> +		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
> +			continue;
> +		if (desc->status & IRQ_DISABLED)
> +			continue;
> +
> +		evtchn = evtchn_from_irq(irq);
> +		if (evtchn == -1)
> +			continue;
> +
> +		unmask_evtchn(evtchn);
> +	}
>  }
>  
>  static struct irq_chip xen_dynamic_chip __read_mostly = {
diff mbox

Patch

diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 97612f5..321a0c8 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1299,9 +1299,6 @@  static void restore_cpu_virqs(unsigned int cpu)
 		evtchn_to_irq[evtchn] = irq;
 		irq_info[irq] = mk_virq_info(evtchn, virq);
 		bind_evtchn_to_cpu(evtchn, cpu);
-
-		/* Ready for use. */
-		unmask_evtchn(evtchn);
 	}
 }
 
@@ -1327,10 +1324,6 @@  static void restore_cpu_ipis(unsigned int cpu)
 		evtchn_to_irq[evtchn] = irq;
 		irq_info[irq] = mk_ipi_info(evtchn, ipi);
 		bind_evtchn_to_cpu(evtchn, cpu);
-
-		/* Ready for use. */
-		unmask_evtchn(evtchn);
-
 	}
 }
 
@@ -1390,6 +1383,7 @@  void xen_poll_irq(int irq)
 void xen_irq_resume(void)
 {
 	unsigned int cpu, irq, evtchn;
+	struct irq_desc *desc;
 
 	init_evtchn_cpu_bindings();
 
@@ -1408,6 +1402,23 @@  void xen_irq_resume(void)
 		restore_cpu_virqs(cpu);
 		restore_cpu_ipis(cpu);
 	}
+
+	/*
+	 * Unmask any IRQF_NO_SUSPEND IRQs which are enabled. These
+	 * are not handled by the IRQ core.
+	 */
+	for_each_irq_desc(irq, desc) {
+		if (!desc->action || !(desc->action->flags & IRQF_NO_SUSPEND))
+			continue;
+		if (desc->status & IRQ_DISABLED)
+			continue;
+
+		evtchn = evtchn_from_irq(irq);
+		if (evtchn == -1)
+			continue;
+
+		unmask_evtchn(evtchn);
+	}
 }
 
 static struct irq_chip xen_dynamic_chip __read_mostly = {