diff mbox series

ipmi: Fix timer race with module unload

Message ID 20181022224834.16637-2-manoj.iyer@canonical.com
State New
Headers show
Series ipmi: Fix timer race with module unload | expand

Commit Message

Manoj Iyer Oct. 22, 2018, 10:48 p.m. UTC
From: Jan Glauber <jglauber@cavium.com>

Please note that below oops is from an older kernel, but the same
race seems to be present in the upstream kernel too.

---8<---

The following panic was encountered during removing the ipmi_ssif
module:

[ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090
[ 526.360464] Mem abort info:
[ 526.363257] ESR = 0x86000007
[ 526.366304] Exception class = IABT (current EL), IL = 32 bits
[ 526.372221] SET = 0, FnV = 0
[ 526.375269] EA = 0, S1PTW = 0
[ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416
[ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000
[ 526.396141] Internal error: Oops: 86000007 [#1] SMP
[ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif]
[ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1
[ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018
[ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO)
[ 526.489713] pc : 0xffff000006923090
[ 526.493198] lr : call_timer_fn+0x34/0x178
[ 526.497194] sp : ffff000009b0bdd0
[ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082
[ 526.505796] x27: 0000000000000002 x26: ffff000009515188
[ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018
[ 526.516396] x23: ffff000009519660 x22: dead000000000200
[ 526.521696] x21: ffff000006923090 x20: 0000000000000100
[ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000
[ 526.532295] x17: 000000000000000e x16: 0000000000000007
[ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c
[ 526.542894] x13: 0000000000000000 x12: 0000000000000000
[ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88
[ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005
[ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020
[ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20
[ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40
[ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40
[ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc)
[ 526.586854] Call trace:
[ 526.589289] 0xffff000006923090
[ 526.592419] expire_timers+0xc8/0x130
[ 526.596070] run_timer_softirq+0xec/0x1b0
[ 526.600070] __do_softirq+0x134/0x328
[ 526.603726] irq_exit+0xc8/0xe0
[ 526.606857] __handle_domain_irq+0x6c/0xc0
[ 526.610941] gic_handle_irq+0x84/0x188
[ 526.614679] el1_irq+0xe8/0x180
[ 526.617822] cpuidle_enter_state+0xa0/0x328
[ 526.621993] cpuidle_enter+0x34/0x48
[ 526.625564] call_cpuidle+0x44/0x70
[ 526.629040] do_idle+0x1b8/0x1f0
[ 526.632256] cpu_startup_entry+0x2c/0x30
[ 526.636174] secondary_start_kernel+0x11c/0x130
[ 526.640694] Code: bad PC value
[ 526.643800] ---[ end trace d020b0b8417c2498 ]---
[ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt
[ 526.654778] SMP: stopping secondary CPUs
[ 526.658734] Kernel Offset: disabled
[ 526.662211] CPU features: 0x5800c38
[ 526.665688] Memory Limit: none
[ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt

Prevent mod_timer from arming a timer that was already removed by
del_timer during module unload.

BugLink: http://launchpad.net/bugs/1799281

Signed-off-by: Jan Glauber <jglauber@cavium.com>
Cc: <stable@vger.kernel.org> # 3.19
Signed-off-by: Corey Minyard <cminyard@mvista.com>

(cherry picked from commit 0711e8c1b4572d076264e71b0002d223f2666ed7
linux-next)

Signed-off-by: Manoj Iyer <manoj.iyer@canonical.com>
---
 drivers/char/ipmi/ipmi_ssif.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

Comments

Seth Forshee Oct. 24, 2018, 3:54 p.m. UTC | #1
On Mon, Oct 22, 2018 at 05:48:34PM -0500, Manoj Iyer wrote:
> From: Jan Glauber <jglauber@cavium.com>
> 
> Please note that below oops is from an older kernel, but the same
> race seems to be present in the upstream kernel too.
> 
> ---8<---
> 
> The following panic was encountered during removing the ipmi_ssif
> module:
> 
> [ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090
> [ 526.360464] Mem abort info:
> [ 526.363257] ESR = 0x86000007
> [ 526.366304] Exception class = IABT (current EL), IL = 32 bits
> [ 526.372221] SET = 0, FnV = 0
> [ 526.375269] EA = 0, S1PTW = 0
> [ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416
> [ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000
> [ 526.396141] Internal error: Oops: 86000007 [#1] SMP
> [ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif]
> [ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1
> [ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018
> [ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO)
> [ 526.489713] pc : 0xffff000006923090
> [ 526.493198] lr : call_timer_fn+0x34/0x178
> [ 526.497194] sp : ffff000009b0bdd0
> [ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082
> [ 526.505796] x27: 0000000000000002 x26: ffff000009515188
> [ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018
> [ 526.516396] x23: ffff000009519660 x22: dead000000000200
> [ 526.521696] x21: ffff000006923090 x20: 0000000000000100
> [ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000
> [ 526.532295] x17: 000000000000000e x16: 0000000000000007
> [ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c
> [ 526.542894] x13: 0000000000000000 x12: 0000000000000000
> [ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88
> [ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005
> [ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020
> [ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20
> [ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40
> [ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40
> [ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc)
> [ 526.586854] Call trace:
> [ 526.589289] 0xffff000006923090
> [ 526.592419] expire_timers+0xc8/0x130
> [ 526.596070] run_timer_softirq+0xec/0x1b0
> [ 526.600070] __do_softirq+0x134/0x328
> [ 526.603726] irq_exit+0xc8/0xe0
> [ 526.606857] __handle_domain_irq+0x6c/0xc0
> [ 526.610941] gic_handle_irq+0x84/0x188
> [ 526.614679] el1_irq+0xe8/0x180
> [ 526.617822] cpuidle_enter_state+0xa0/0x328
> [ 526.621993] cpuidle_enter+0x34/0x48
> [ 526.625564] call_cpuidle+0x44/0x70
> [ 526.629040] do_idle+0x1b8/0x1f0
> [ 526.632256] cpu_startup_entry+0x2c/0x30
> [ 526.636174] secondary_start_kernel+0x11c/0x130
> [ 526.640694] Code: bad PC value
> [ 526.643800] ---[ end trace d020b0b8417c2498 ]---
> [ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt
> [ 526.654778] SMP: stopping secondary CPUs
> [ 526.658734] Kernel Offset: disabled
> [ 526.662211] CPU features: 0x5800c38
> [ 526.665688] Memory Limit: none
> [ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt
> 
> Prevent mod_timer from arming a timer that was already removed by
> del_timer during module unload.
> 
> BugLink: http://launchpad.net/bugs/1799281
> 
> Signed-off-by: Jan Glauber <jglauber@cavium.com>
> Cc: <stable@vger.kernel.org> # 3.19
> Signed-off-by: Corey Minyard <cminyard@mvista.com>
> 
> (cherry picked from commit 0711e8c1b4572d076264e71b0002d223f2666ed7
> linux-next)
> 
> Signed-off-by: Manoj Iyer <manoj.iyer@canonical.com>

Acked-by: Seth Forshee <seth.forshee@canonical.com>

Applied to unstable/master, thanks!
Stefan Bader Nov. 6, 2018, 8:50 a.m. UTC | #2
On 23.10.18 00:48, Manoj Iyer wrote:
> From: Jan Glauber <jglauber@cavium.com>
> 
> Please note that below oops is from an older kernel, but the same
> race seems to be present in the upstream kernel too.
> 
> ---8<---
> 
> The following panic was encountered during removing the ipmi_ssif
> module:
> 
> [ 526.352555] Unable to handle kernel paging request at virtual address ffff000006923090
> [ 526.360464] Mem abort info:
> [ 526.363257] ESR = 0x86000007
> [ 526.366304] Exception class = IABT (current EL), IL = 32 bits
> [ 526.372221] SET = 0, FnV = 0
> [ 526.375269] EA = 0, S1PTW = 0
> [ 526.378405] swapper pgtable: 4k pages, 48-bit VAs, pgd = 000000008ae60416
> [ 526.385185] [ffff000006923090] *pgd=000000bffcffe803, *pud=000000bffcffd803, *pmd=0000009f4731a003, *pte=0000000000000000
> [ 526.396141] Internal error: Oops: 86000007 [#1] SMP
> [ 526.401008] Modules linked in: nls_iso8859_1 ipmi_devintf joydev input_leds ipmi_msghandler shpchp sch_fq_codel ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear i2c_smbus hid_generic usbhid uas hid usb_storage ast aes_ce_blk i2c_algo_bit aes_ce_cipher qede ttm crc32_ce ptp crct10dif_ce drm_kms_helper ghash_ce syscopyarea sha2_ce sysfillrect sysimgblt pps_core fb_sys_fops sha256_arm64 sha1_ce mpt3sas qed drm raid_class ahci scsi_transport_sas libahci gpio_xlp i2c_xlp9xx aes_neon_bs aes_neon_blk crypto_simd cryptd aes_arm64 [last unloaded: ipmi_ssif]
> [ 526.468085] CPU: 125 PID: 0 Comm: swapper/125 Not tainted 4.15.0-35-generic #38~lp1775396+build.1
> [ 526.476942] Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL022 08/14/2018
> [ 526.484932] pstate: 00400009 (nzcv daif +PAN -UAO)
> [ 526.489713] pc : 0xffff000006923090
> [ 526.493198] lr : call_timer_fn+0x34/0x178
> [ 526.497194] sp : ffff000009b0bdd0
> [ 526.500496] x29: ffff000009b0bdd0 x28: 0000000000000082
> [ 526.505796] x27: 0000000000000002 x26: ffff000009515188
> [ 526.511096] x25: ffff000009515180 x24: ffff0000090f1018
> [ 526.516396] x23: ffff000009519660 x22: dead000000000200
> [ 526.521696] x21: ffff000006923090 x20: 0000000000000100
> [ 526.526995] x19: ffff809eeb466a40 x18: 0000000000000000
> [ 526.532295] x17: 000000000000000e x16: 0000000000000007
> [ 526.537594] x15: 0000000000000000 x14: 071c71c71c71c71c
> [ 526.542894] x13: 0000000000000000 x12: 0000000000000000
> [ 526.548193] x11: 0000000000000001 x10: ffff000009b0be88
> [ 526.553493] x9 : 0000000000000000 x8 : 0000000000000005
> [ 526.558793] x7 : ffff80befc1f8528 x6 : 0000000000000020
> [ 526.564092] x5 : 0000000000000040 x4 : 0000000020001b20
> [ 526.569392] x3 : 0000000000000000 x2 : ffff809eeb466a40
> [ 526.574692] x1 : ffff000006923090 x0 : ffff809eeb466a40
> [ 526.579992] Process swapper/125 (pid: 0, stack limit = 0x000000002eb50acc)
> [ 526.586854] Call trace:
> [ 526.589289] 0xffff000006923090
> [ 526.592419] expire_timers+0xc8/0x130
> [ 526.596070] run_timer_softirq+0xec/0x1b0
> [ 526.600070] __do_softirq+0x134/0x328
> [ 526.603726] irq_exit+0xc8/0xe0
> [ 526.606857] __handle_domain_irq+0x6c/0xc0
> [ 526.610941] gic_handle_irq+0x84/0x188
> [ 526.614679] el1_irq+0xe8/0x180
> [ 526.617822] cpuidle_enter_state+0xa0/0x328
> [ 526.621993] cpuidle_enter+0x34/0x48
> [ 526.625564] call_cpuidle+0x44/0x70
> [ 526.629040] do_idle+0x1b8/0x1f0
> [ 526.632256] cpu_startup_entry+0x2c/0x30
> [ 526.636174] secondary_start_kernel+0x11c/0x130
> [ 526.640694] Code: bad PC value
> [ 526.643800] ---[ end trace d020b0b8417c2498 ]---
> [ 526.648404] Kernel panic - not syncing: Fatal exception in interrupt
> [ 526.654778] SMP: stopping secondary CPUs
> [ 526.658734] Kernel Offset: disabled
> [ 526.662211] CPU features: 0x5800c38
> [ 526.665688] Memory Limit: none
> [ 526.668768] ---[ end Kernel panic - not syncing: Fatal exception in interrupt
> 
> Prevent mod_timer from arming a timer that was already removed by
> del_timer during module unload.
> 
> BugLink: http://launchpad.net/bugs/1799281
> 
> Signed-off-by: Jan Glauber <jglauber@cavium.com>
> Cc: <stable@vger.kernel.org> # 3.19
> Signed-off-by: Corey Minyard <cminyard@mvista.com>
> 
> (cherry picked from commit 0711e8c1b4572d076264e71b0002d223f2666ed7
> linux-next)
> Signed-off-by: Manoj Iyer <manoj.iyer@canonical.com>
Acked-by: Stefan Bader <stefan.bader@canonical.com>
> ---

This is a bit of a confusing mess as you sent the same patch as a re-send for
bionic causing it to become two independent entities. If you find out something
needs to go to another release and applies there just as before, why not just
reply to your own cover email and add a +<series> to the subject (cause that is
the first thing one sees in the mail reader) and some short explanation for it
in the body?

-Stefan

>  drivers/char/ipmi/ipmi_ssif.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
> index 265d6a6583bc..e33fefd6ceae 100644
> --- a/drivers/char/ipmi/ipmi_ssif.c
> +++ b/drivers/char/ipmi/ipmi_ssif.c
> @@ -606,8 +606,9 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result,
>  			flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
>  			ssif_info->waiting_alert = true;
>  			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
> -			mod_timer(&ssif_info->retry_timer,
> -				  jiffies + SSIF_MSG_JIFFIES);
> +			if (!ssif_info->stopping)
> +				mod_timer(&ssif_info->retry_timer,
> +					  jiffies + SSIF_MSG_JIFFIES);
>  			ipmi_ssif_unlock_cond(ssif_info, flags);
>  			return;
>  		}
> @@ -939,8 +940,9 @@ static void msg_written_handler(struct ssif_info *ssif_info, int result,
>  			ssif_info->waiting_alert = true;
>  			ssif_info->retries_left = SSIF_RECV_RETRIES;
>  			ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
> -			mod_timer(&ssif_info->retry_timer,
> -				  jiffies + SSIF_MSG_PART_JIFFIES);
> +			if (!ssif_info->stopping)
> +				mod_timer(&ssif_info->retry_timer,
> +					  jiffies + SSIF_MSG_PART_JIFFIES);
>  			ipmi_ssif_unlock_cond(ssif_info, flags);
>  		}
>  	}
>
diff mbox series

Patch

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 265d6a6583bc..e33fefd6ceae 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -606,8 +606,9 @@  static void msg_done_handler(struct ssif_info *ssif_info, int result,
 			flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
 			ssif_info->waiting_alert = true;
 			ssif_info->rtc_us_timer = SSIF_MSG_USEC;
-			mod_timer(&ssif_info->retry_timer,
-				  jiffies + SSIF_MSG_JIFFIES);
+			if (!ssif_info->stopping)
+				mod_timer(&ssif_info->retry_timer,
+					  jiffies + SSIF_MSG_JIFFIES);
 			ipmi_ssif_unlock_cond(ssif_info, flags);
 			return;
 		}
@@ -939,8 +940,9 @@  static void msg_written_handler(struct ssif_info *ssif_info, int result,
 			ssif_info->waiting_alert = true;
 			ssif_info->retries_left = SSIF_RECV_RETRIES;
 			ssif_info->rtc_us_timer = SSIF_MSG_PART_USEC;
-			mod_timer(&ssif_info->retry_timer,
-				  jiffies + SSIF_MSG_PART_JIFFIES);
+			if (!ssif_info->stopping)
+				mod_timer(&ssif_info->retry_timer,
+					  jiffies + SSIF_MSG_PART_JIFFIES);
 			ipmi_ssif_unlock_cond(ssif_info, flags);
 		}
 	}