diff mbox series

[dev-5.0,4/4] media: aspeed: clear interrupt status flags immediately

Message ID 20190425222040.2413-5-jae.hyun.yoo@linux.intel.com
State Accepted, archived
Headers show
Series Improve stability of Aspeed video engine driver - 2nd phase | expand

Commit Message

Jae Hyun Yoo April 25, 2019, 10:20 p.m. UTC
Interrupt status flags should be cleared immediately otherwise
interrupt handler will be called again and again until the flag
is cleared, but this driver clears some flags through a 500ms
delayed work which is a bad idea in interrupt handling, so this
commit makes the interrupt handler clear the status flags
immediately.

Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
---
 drivers/media/platform/aspeed-video.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

Comments

Eddie James April 29, 2019, 10:29 p.m. UTC | #1
On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
> Interrupt status flags should be cleared immediately otherwise
> interrupt handler will be called again and again until the flag
> is cleared, but this driver clears some flags through a 500ms
> delayed work which is a bad idea in interrupt handling, so this
> commit makes the interrupt handler clear the status flags
> immediately.
>
> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> ---
>   drivers/media/platform/aspeed-video.c | 12 +++++++-----
>   1 file changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c
> index 77c209a472ca..e218f375b9f5 100644
> --- a/drivers/media/platform/aspeed-video.c
> +++ b/drivers/media/platform/aspeed-video.c
> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>   	 * re-initialize
>   	 */
>   	if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
> +		aspeed_video_write(video, VE_INTERRUPT_STATUS,
> +				   VE_INTERRUPT_MODE_DETECT_WD);


aspeed_video_irq_res_change disables all IRQs and turns off the clocks. 
This shouldn't be necessary.


The rest looks fine.

Thanks,

Eddie


>   		aspeed_video_irq_res_change(video);
>   		return IRQ_HANDLED;
>   	}
>   
>   	if (sts & VE_INTERRUPT_MODE_DETECT) {
> +		aspeed_video_write(video, VE_INTERRUPT_STATUS,
> +				   VE_INTERRUPT_MODE_DETECT);
>   		if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
>   			aspeed_video_update(video, VE_INTERRUPT_CTRL,
>   					    VE_INTERRUPT_MODE_DETECT, 0);
> -			aspeed_video_write(video, VE_INTERRUPT_STATUS,
> -					   VE_INTERRUPT_MODE_DETECT);
> -
>   			set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
>   			wake_up_interruptible_all(&video->wait);
>   		} else {
> @@ -574,6 +575,9 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>   		u32 frame_size = aspeed_video_read(video,
>   						   VE_OFFSET_COMP_STREAM);
>   
> +		aspeed_video_write(video, VE_INTERRUPT_STATUS,
> +				   VE_INTERRUPT_COMP_COMPLETE);
> +
>   		spin_lock(&video->lock);
>   		clear_bit(VIDEO_FRAME_INPRG, &video->flags);
>   		buf = list_first_entry_or_null(&video->buffers,
> @@ -599,8 +603,6 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>   				    VE_SEQ_CTRL_TRIG_COMP, 0);
>   		aspeed_video_update(video, VE_INTERRUPT_CTRL,
>   				    VE_INTERRUPT_COMP_COMPLETE, 0);
> -		aspeed_video_write(video, VE_INTERRUPT_STATUS,
> -				   VE_INTERRUPT_COMP_COMPLETE);
>   
>   		if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>   			aspeed_video_start_frame(video);
Jae Hyun Yoo April 29, 2019, 11:38 p.m. UTC | #2
On 4/29/2019 3:29 PM, Eddie James wrote:
> 
> On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
>> Interrupt status flags should be cleared immediately otherwise
>> interrupt handler will be called again and again until the flag
>> is cleared, but this driver clears some flags through a 500ms
>> delayed work which is a bad idea in interrupt handling, so this
>> commit makes the interrupt handler clear the status flags
>> immediately.
>>
>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>> ---
>>   drivers/media/platform/aspeed-video.c | 12 +++++++-----
>>   1 file changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/media/platform/aspeed-video.c 
>> b/drivers/media/platform/aspeed-video.c
>> index 77c209a472ca..e218f375b9f5 100644
>> --- a/drivers/media/platform/aspeed-video.c
>> +++ b/drivers/media/platform/aspeed-video.c
>> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq, 
>> void *arg)
>>        * re-initialize
>>        */
>>       if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> +                   VE_INTERRUPT_MODE_DETECT_WD);
> 
> 
> aspeed_video_irq_res_change disables all IRQs and turns off the clocks. 
> This shouldn't be necessary.

In fact, this patch fixes a watch dog reset with printing out a stack
trace like below. This happens very rarely but it's critical because it
causes a BMC reset. In my experiments, interrupt flags should be cleared
even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
aspeed_video_off(), or we should add
apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
before the disabling interrupt. I think the way in this patch is better.

After applying this patch, I've not seen the watch dog reset yet for 
over a week.

Thanks,

Jae

[ 2174.199114] sched: RT throttling activated
[ 2200.009118] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! 
[irq/23-aspeed-v:609]
[ 2200.016802] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted 
5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
[ 2200.026884] Hardware name: Generic DT based system
[ 2200.031705] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
[ 2200.037284] LR is at unmask_irq.part.4+0x30/0x44
[ 2200.041902] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
[ 2200.048159] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
[ 2200.053377] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
[ 2200.058595] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
[ 2200.065112] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
[ 2200.071630] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM 
Segment none
[ 2200.078754] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
[ 2200.084496] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted 
5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
[ 2200.094567] Hardware name: Generic DT based system
[ 2200.099352] Backtrace:
[ 2200.101844] [<80107cdc>] (dump_backtrace) from [<80107f10>] 
(show_stack+0x20/0x24)
[ 2200.109415]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
[ 2200.115092] [<80107ef0>] (show_stack) from [<80691844>] 
(dump_stack+0x20/0x28)
[ 2200.122320] [<80691824>] (dump_stack) from [<80103dd8>] 
(show_regs+0x1c/0x20)
[ 2200.129467] [<80103dbc>] (show_regs) from [<8017a940>] 
(watchdog_timer_fn+0x1c4/0x21c)
[ 2200.137393] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>] 
(__hrtimer_run_queues.constprop.3+0x14c/0x280)
[ 2200.147306]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193 
r6:80a18b80 r5:80a18bc0
[ 2200.155121]  r4:80a1ba40
[ 2200.157667] [<80159b94>] (__hrtimer_run_queues.constprop.3) from 
[<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
[ 2200.167491]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff 
r6:00000003 r5:20000193
[ 2200.175305]  r4:80a18b80
[ 2200.177867] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>] 
(fttmr010_timer_interrupt+0x20/0x28)
[ 2200.186915]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011 
r6:9d01fd80 r5:80a4af84
[ 2200.194727]  r4:9d001b80
[ 2200.197292] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>] 
(__handle_irq_event_percpu+0x48/0x1c4)
[ 2200.207125] [<8014be20>] (__handle_irq_event_percpu) from 
[<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
[ 2200.216778]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000 
r6:9d01fd80 r5:80a4af84
[ 2200.224594]  r4:80a07008
[ 2200.227139] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>] 
(handle_irq_event+0x38/0x4c)
[ 2200.236003]  r6:00000001 r5:80a4af84 r4:9d01fd80
[ 2200.240623] [<8014c074>] (handle_irq_event) from [<8014fa84>] 
(handle_edge_irq+0xb0/0x1cc)
[ 2200.248881]  r5:80a4af84 r4:9d01fd80
[ 2200.252462] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>] 
(generic_handle_irq+0x30/0x44)
[ 2200.260805]  r5:80a4af84 r4:00000011
[ 2200.264387] [<8014b674>] (generic_handle_irq) from [<8014b710>] 
(__handle_domain_irq+0x58/0xb8)
[ 2200.273091] [<8014b6b8>] (__handle_domain_irq) from [<80102164>] 
(avic_handle_irq+0x68/0x70)
[ 2200.281525]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff 
r5:9e56bea0 r4:9d002940
[ 2200.289267] [<801020fc>] (avic_handle_irq) from [<801019ec>] 
(__irq_svc+0x6c/0x90)
[ 2200.296824] Exception stack(0x9e56bea0 to 0x9e56bee8)
[ 2200.301879] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80 
9d11ed90 00000001 00000001
[ 2200.310060] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000 
9e56bef0 8014ecb4 8014c550
[ 2200.318222] bee0: a0000013 ffffffff
[ 2200.321708]  r5:a0000013 r4:8014c550
[ 2200.325290] [<8014c4e4>] (irq_finalize_oneshot.part.0) from 
[<8014c72c>] (irq_thread_fn+0x7c/0x88)
[ 2200.334238]  r5:9d11ed80 r4:9e539540
[ 2200.337821] [<8014c6b0>] (irq_thread_fn) from [<8014c974>] 
(irq_thread+0x104/0x1e0)
[ 2200.345473]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
[ 2200.351153] [<8014c870>] (irq_thread) from [<80133490>] 
(kthread+0x14c/0x164)
[ 2200.358287]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000 
r6:00000000 r5:9e542060
[ 2200.366104]  r4:9e539b00
[ 2200.368650] [<80133344>] (kthread) from [<801010e8>] 
(ret_from_fork+0x14/0x2c)
[ 2200.375865] Exception stack(0x9e56bfb0 to 0x9e56bff8)
[ 2200.380911] bfa0:                                     00000000 
00000000 00000000 00000000
[ 2200.389079] bfc0: 00000000 00000000 00000000 00000000 00000000 
00000000 00000000 00000000
[ 2200.397248] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[ 2200.403863]  r10:00000000 r9:00000000 r8:00000000 r7:00000000 
r6:00000000 r5:80133344
[ 2200.411681]  r4:9e542060
[ 2228.009114] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! 
[irq/23-aspeed-v:609]
[ 2228.016798] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G 
    L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
[ 2228.028268] Hardware name: Generic DT based system
[ 2228.033085] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
[ 2228.038670] LR is at unmask_irq.part.4+0x30/0x44
[ 2228.043288] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
[ 2228.049545] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
[ 2228.054762] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
[ 2228.059980] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
[ 2228.066498] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
[ 2228.073014] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM 
Segment none
[ 2228.080137] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
[ 2228.085881] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G 
    L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
[ 2228.097338] Hardware name: Generic DT based system
[ 2228.102123] Backtrace:
[ 2228.104609] [<80107cdc>] (dump_backtrace) from [<80107f10>] 
(show_stack+0x20/0x24)
[ 2228.112176]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
[ 2228.117852] [<80107ef0>] (show_stack) from [<80691844>] 
(dump_stack+0x20/0x28)
[ 2228.125080] [<80691824>] (dump_stack) from [<80103dd8>] 
(show_regs+0x1c/0x20)
[ 2228.132234] [<80103dbc>] (show_regs) from [<8017a940>] 
(watchdog_timer_fn+0x1c4/0x21c)
[ 2228.140164] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>] 
(__hrtimer_run_queues.constprop.3+0x14c/0x280)
[ 2228.150078]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193 
r6:80a18b80 r5:80a18bc0
[ 2228.157891]  r4:80a1ba40
[ 2228.160438] [<80159b94>] (__hrtimer_run_queues.constprop.3) from 
[<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
[ 2228.170262]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff 
r6:00000003 r5:20000193
[ 2228.178077]  r4:80a18b80
[ 2228.180637] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>] 
(fttmr010_timer_interrupt+0x20/0x28)
[ 2228.189674]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011 
r6:9d01fd80 r5:80a4af84
[ 2228.197491]  r4:9d001b80
[ 2228.200052] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>] 
(__handle_irq_event_percpu+0x48/0x1c4)
[ 2228.209884] [<8014be20>] (__handle_irq_event_percpu) from 
[<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
[ 2228.219530]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000 
r6:9d01fd80 r5:80a4af84
[ 2228.227348]  r4:80a07008
[ 2228.229892] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>] 
(handle_irq_event+0x38/0x4c)
[ 2228.238758]  r6:00000001 r5:80a4af84 r4:9d01fd80
[ 2228.243388] [<8014c074>] (handle_irq_event) from [<8014fa84>] 
(handle_edge_irq+0xb0/0x1cc)
[ 2228.251643]  r5:80a4af84 r4:9d01fd80
[ 2228.255224] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>] 
(generic_handle_irq+0x30/0x44)
[ 2228.263558]  r5:80a4af84 r4:00000011
[ 2228.267142] [<8014b674>] (generic_handle_irq) from [<8014b710>] 
(__handle_domain_irq+0x58/0xb8)
[ 2228.275844] [<8014b6b8>] (__handle_domain_irq) from [<80102164>] 
(avic_handle_irq+0x68/0x70)
[ 2228.284279]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff 
r5:9e56bea0 r4:9d002940
[ 2228.292019] [<801020fc>] (avic_handle_irq) from [<801019ec>] 
(__irq_svc+0x6c/0x90)
[ 2228.299577] Exception stack(0x9e56bea0 to 0x9e56bee8)
[ 2228.304630] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80 
9d11ed90 00000001 00000001
[ 2228.312803] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000 
9e56bef0 8014ecb4 8014c550
[ 2228.320967] bee0: a0000013 ffffffff
[ 2228.324451]  r5:a0000013 r4:8014c550
[ 2228.328038] [<8014c4e4>] (irq_finalize_oneshot.part.0) from 
[<8014c72c>] (irq_thread_fn+0x7c/0x88)
[ 2228.336983]  r5:9d11ed80 r4:9e539540
[ 2228.340566] [<8014c6b0>] (irq_thread_fn) from [<8014c974>] 
(irq_thread+0x104/0x1e0)
[ 2228.348219]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
[ 2228.353898] [<8014c870>] (irq_thread) from [<80133490>] 
(kthread+0x14c/0x164)
[ 2228.361031]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000 
r6:00000000 r5:9e542060
[ 2228.368848]  r4:9e539b00
[ 2228.371394] [<80133344>] (kthread) from [<801010e8>] 
(ret_from_fork+0x14/0x2c)
[ 2228.378609] Exception stack(0x9e56bfb0 to 0x9e56bff8)
[ 2228.383656] bfa0:                                     00000000 
00000000 00000000 00000000
[ 2228.391833] bfc0: 00000000 00000000 00000000 00000000 00000000 
00000000 00000000 00000000
[ 2228.400004] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
[ 2228.406617]  r10:00000000 r9:00000000 r8:00000000 r7:00000000 
r6:00000000 r5:80133344
[ 2228.414436]  r4:9e542060

> 
> 
> The rest looks fine.
> 
> Thanks,
> 
> Eddie
> 
> 
>>           aspeed_video_irq_res_change(video);
>>           return IRQ_HANDLED;
>>       }
>>       if (sts & VE_INTERRUPT_MODE_DETECT) {
>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> +                   VE_INTERRUPT_MODE_DETECT);
>>           if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
>>               aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>                           VE_INTERRUPT_MODE_DETECT, 0);
>> -            aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> -                       VE_INTERRUPT_MODE_DETECT);
>> -
>>               set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
>>               wake_up_interruptible_all(&video->wait);
>>           } else {
>> @@ -574,6 +575,9 @@ static irqreturn_t aspeed_video_irq(int irq, void 
>> *arg)
>>           u32 frame_size = aspeed_video_read(video,
>>                              VE_OFFSET_COMP_STREAM);
>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> +                   VE_INTERRUPT_COMP_COMPLETE);
>> +
>>           spin_lock(&video->lock);
>>           clear_bit(VIDEO_FRAME_INPRG, &video->flags);
>>           buf = list_first_entry_or_null(&video->buffers,
>> @@ -599,8 +603,6 @@ static irqreturn_t aspeed_video_irq(int irq, void 
>> *arg)
>>                       VE_SEQ_CTRL_TRIG_COMP, 0);
>>           aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>                       VE_INTERRUPT_COMP_COMPLETE, 0);
>> -        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> -                   VE_INTERRUPT_COMP_COMPLETE);
>>           if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>>               aspeed_video_start_frame(video);
>
Joel Stanley April 30, 2019, 3:04 a.m. UTC | #3
On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
>
> On 4/29/2019 3:29 PM, Eddie James wrote:
> >
> > On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
> >> Interrupt status flags should be cleared immediately otherwise
> >> interrupt handler will be called again and again until the flag
> >> is cleared, but this driver clears some flags through a 500ms
> >> delayed work which is a bad idea in interrupt handling, so this
> >> commit makes the interrupt handler clear the status flags
> >> immediately.
> >>
> >> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> >> ---
> >>   drivers/media/platform/aspeed-video.c | 12 +++++++-----
> >>   1 file changed, 7 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/media/platform/aspeed-video.c
> >> b/drivers/media/platform/aspeed-video.c
> >> index 77c209a472ca..e218f375b9f5 100644
> >> --- a/drivers/media/platform/aspeed-video.c
> >> +++ b/drivers/media/platform/aspeed-video.c
> >> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
> >> void *arg)
> >>        * re-initialize
> >>        */
> >>       if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
> >> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >> +                   VE_INTERRUPT_MODE_DETECT_WD);
> >
> >
> > aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
> > This shouldn't be necessary.
>
> In fact, this patch fixes a watch dog reset with printing out a stack
> trace like below. This happens very rarely but it's critical because it
> causes a BMC reset. In my experiments, interrupt flags should be cleared
> even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
> aspeed_video_off(), or we should add
> apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
> before the disabling interrupt. I think the way in this patch is better.

In general, a driver should certainly be clearing (acking) the
interrupt bits in the interrupt handler before returning.

Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
in the soft lockup situation.

I took a closer look at this function, and it should probably not
return IRQ_HANDLED at the bottom, as it may have fallen through all of
the if statements and not have handled any interrupt. Jae, can you
take a look at this and send another patch if you think that is
correct.

Cheers,

Joel

>
> After applying this patch, I've not seen the watch dog reset yet for
> over a week.
>
> Thanks,
>
> Jae
>
> [ 2174.199114] sched: RT throttling activated
> [ 2200.009118] watchdog: BUG: soft lockup - CPU#0 stuck for 22s!
> [irq/23-aspeed-v:609]
> [ 2200.016802] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted
> 5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
> [ 2200.026884] Hardware name: Generic DT based system
> [ 2200.031705] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
> [ 2200.037284] LR is at unmask_irq.part.4+0x30/0x44
> [ 2200.041902] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
> [ 2200.048159] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
> [ 2200.053377] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
> [ 2200.058595] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
> [ 2200.065112] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
> [ 2200.071630] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
> Segment none
> [ 2200.078754] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
> [ 2200.084496] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted
> 5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
> [ 2200.094567] Hardware name: Generic DT based system
> [ 2200.099352] Backtrace:
> [ 2200.101844] [<80107cdc>] (dump_backtrace) from [<80107f10>]
> (show_stack+0x20/0x24)
> [ 2200.109415]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
> [ 2200.115092] [<80107ef0>] (show_stack) from [<80691844>]
> (dump_stack+0x20/0x28)
> [ 2200.122320] [<80691824>] (dump_stack) from [<80103dd8>]
> (show_regs+0x1c/0x20)
> [ 2200.129467] [<80103dbc>] (show_regs) from [<8017a940>]
> (watchdog_timer_fn+0x1c4/0x21c)
> [ 2200.137393] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>]
> (__hrtimer_run_queues.constprop.3+0x14c/0x280)
> [ 2200.147306]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193
> r6:80a18b80 r5:80a18bc0
> [ 2200.155121]  r4:80a1ba40
> [ 2200.157667] [<80159b94>] (__hrtimer_run_queues.constprop.3) from
> [<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
> [ 2200.167491]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff
> r6:00000003 r5:20000193
> [ 2200.175305]  r4:80a18b80
> [ 2200.177867] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>]
> (fttmr010_timer_interrupt+0x20/0x28)
> [ 2200.186915]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011
> r6:9d01fd80 r5:80a4af84
> [ 2200.194727]  r4:9d001b80
> [ 2200.197292] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>]
> (__handle_irq_event_percpu+0x48/0x1c4)
> [ 2200.207125] [<8014be20>] (__handle_irq_event_percpu) from
> [<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
> [ 2200.216778]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000
> r6:9d01fd80 r5:80a4af84
> [ 2200.224594]  r4:80a07008
> [ 2200.227139] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>]
> (handle_irq_event+0x38/0x4c)
> [ 2200.236003]  r6:00000001 r5:80a4af84 r4:9d01fd80
> [ 2200.240623] [<8014c074>] (handle_irq_event) from [<8014fa84>]
> (handle_edge_irq+0xb0/0x1cc)
> [ 2200.248881]  r5:80a4af84 r4:9d01fd80
> [ 2200.252462] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>]
> (generic_handle_irq+0x30/0x44)
> [ 2200.260805]  r5:80a4af84 r4:00000011
> [ 2200.264387] [<8014b674>] (generic_handle_irq) from [<8014b710>]
> (__handle_domain_irq+0x58/0xb8)
> [ 2200.273091] [<8014b6b8>] (__handle_domain_irq) from [<80102164>]
> (avic_handle_irq+0x68/0x70)
> [ 2200.281525]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff
> r5:9e56bea0 r4:9d002940
> [ 2200.289267] [<801020fc>] (avic_handle_irq) from [<801019ec>]
> (__irq_svc+0x6c/0x90)
> [ 2200.296824] Exception stack(0x9e56bea0 to 0x9e56bee8)
> [ 2200.301879] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80
> 9d11ed90 00000001 00000001
> [ 2200.310060] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000
> 9e56bef0 8014ecb4 8014c550
> [ 2200.318222] bee0: a0000013 ffffffff
> [ 2200.321708]  r5:a0000013 r4:8014c550
> [ 2200.325290] [<8014c4e4>] (irq_finalize_oneshot.part.0) from
> [<8014c72c>] (irq_thread_fn+0x7c/0x88)
> [ 2200.334238]  r5:9d11ed80 r4:9e539540
> [ 2200.337821] [<8014c6b0>] (irq_thread_fn) from [<8014c974>]
> (irq_thread+0x104/0x1e0)
> [ 2200.345473]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
> [ 2200.351153] [<8014c870>] (irq_thread) from [<80133490>]
> (kthread+0x14c/0x164)
> [ 2200.358287]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000
> r6:00000000 r5:9e542060
> [ 2200.366104]  r4:9e539b00
> [ 2200.368650] [<80133344>] (kthread) from [<801010e8>]
> (ret_from_fork+0x14/0x2c)
> [ 2200.375865] Exception stack(0x9e56bfb0 to 0x9e56bff8)
> [ 2200.380911] bfa0:                                     00000000
> 00000000 00000000 00000000
> [ 2200.389079] bfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [ 2200.397248] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [ 2200.403863]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
> r6:00000000 r5:80133344
> [ 2200.411681]  r4:9e542060
> [ 2228.009114] watchdog: BUG: soft lockup - CPU#0 stuck for 22s!
> [irq/23-aspeed-v:609]
> [ 2228.016798] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G
>     L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
> [ 2228.028268] Hardware name: Generic DT based system
> [ 2228.033085] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
> [ 2228.038670] LR is at unmask_irq.part.4+0x30/0x44
> [ 2228.043288] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
> [ 2228.049545] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
> [ 2228.054762] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
> [ 2228.059980] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
> [ 2228.066498] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
> [ 2228.073014] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
> Segment none
> [ 2228.080137] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
> [ 2228.085881] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G
>     L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
> [ 2228.097338] Hardware name: Generic DT based system
> [ 2228.102123] Backtrace:
> [ 2228.104609] [<80107cdc>] (dump_backtrace) from [<80107f10>]
> (show_stack+0x20/0x24)
> [ 2228.112176]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
> [ 2228.117852] [<80107ef0>] (show_stack) from [<80691844>]
> (dump_stack+0x20/0x28)
> [ 2228.125080] [<80691824>] (dump_stack) from [<80103dd8>]
> (show_regs+0x1c/0x20)
> [ 2228.132234] [<80103dbc>] (show_regs) from [<8017a940>]
> (watchdog_timer_fn+0x1c4/0x21c)
> [ 2228.140164] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>]
> (__hrtimer_run_queues.constprop.3+0x14c/0x280)
> [ 2228.150078]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193
> r6:80a18b80 r5:80a18bc0
> [ 2228.157891]  r4:80a1ba40
> [ 2228.160438] [<80159b94>] (__hrtimer_run_queues.constprop.3) from
> [<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
> [ 2228.170262]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff
> r6:00000003 r5:20000193
> [ 2228.178077]  r4:80a18b80
> [ 2228.180637] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>]
> (fttmr010_timer_interrupt+0x20/0x28)
> [ 2228.189674]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011
> r6:9d01fd80 r5:80a4af84
> [ 2228.197491]  r4:9d001b80
> [ 2228.200052] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>]
> (__handle_irq_event_percpu+0x48/0x1c4)
> [ 2228.209884] [<8014be20>] (__handle_irq_event_percpu) from
> [<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
> [ 2228.219530]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000
> r6:9d01fd80 r5:80a4af84
> [ 2228.227348]  r4:80a07008
> [ 2228.229892] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>]
> (handle_irq_event+0x38/0x4c)
> [ 2228.238758]  r6:00000001 r5:80a4af84 r4:9d01fd80
> [ 2228.243388] [<8014c074>] (handle_irq_event) from [<8014fa84>]
> (handle_edge_irq+0xb0/0x1cc)
> [ 2228.251643]  r5:80a4af84 r4:9d01fd80
> [ 2228.255224] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>]
> (generic_handle_irq+0x30/0x44)
> [ 2228.263558]  r5:80a4af84 r4:00000011
> [ 2228.267142] [<8014b674>] (generic_handle_irq) from [<8014b710>]
> (__handle_domain_irq+0x58/0xb8)
> [ 2228.275844] [<8014b6b8>] (__handle_domain_irq) from [<80102164>]
> (avic_handle_irq+0x68/0x70)
> [ 2228.284279]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff
> r5:9e56bea0 r4:9d002940
> [ 2228.292019] [<801020fc>] (avic_handle_irq) from [<801019ec>]
> (__irq_svc+0x6c/0x90)
> [ 2228.299577] Exception stack(0x9e56bea0 to 0x9e56bee8)
> [ 2228.304630] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80
> 9d11ed90 00000001 00000001
> [ 2228.312803] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000
> 9e56bef0 8014ecb4 8014c550
> [ 2228.320967] bee0: a0000013 ffffffff
> [ 2228.324451]  r5:a0000013 r4:8014c550
> [ 2228.328038] [<8014c4e4>] (irq_finalize_oneshot.part.0) from
> [<8014c72c>] (irq_thread_fn+0x7c/0x88)
> [ 2228.336983]  r5:9d11ed80 r4:9e539540
> [ 2228.340566] [<8014c6b0>] (irq_thread_fn) from [<8014c974>]
> (irq_thread+0x104/0x1e0)
> [ 2228.348219]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
> [ 2228.353898] [<8014c870>] (irq_thread) from [<80133490>]
> (kthread+0x14c/0x164)
> [ 2228.361031]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000
> r6:00000000 r5:9e542060
> [ 2228.368848]  r4:9e539b00
> [ 2228.371394] [<80133344>] (kthread) from [<801010e8>]
> (ret_from_fork+0x14/0x2c)
> [ 2228.378609] Exception stack(0x9e56bfb0 to 0x9e56bff8)
> [ 2228.383656] bfa0:                                     00000000
> 00000000 00000000 00000000
> [ 2228.391833] bfc0: 00000000 00000000 00000000 00000000 00000000
> 00000000 00000000 00000000
> [ 2228.400004] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
> [ 2228.406617]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
> r6:00000000 r5:80133344
> [ 2228.414436]  r4:9e542060
>
> >
> >
> > The rest looks fine.
> >
> > Thanks,
> >
> > Eddie
> >
> >
> >>           aspeed_video_irq_res_change(video);
> >>           return IRQ_HANDLED;
> >>       }
> >>       if (sts & VE_INTERRUPT_MODE_DETECT) {
> >> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >> +                   VE_INTERRUPT_MODE_DETECT);
> >>           if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
> >>               aspeed_video_update(video, VE_INTERRUPT_CTRL,
> >>                           VE_INTERRUPT_MODE_DETECT, 0);
> >> -            aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >> -                       VE_INTERRUPT_MODE_DETECT);
> >> -
> >>               set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
> >>               wake_up_interruptible_all(&video->wait);
> >>           } else {
> >> @@ -574,6 +575,9 @@ static irqreturn_t aspeed_video_irq(int irq, void
> >> *arg)
> >>           u32 frame_size = aspeed_video_read(video,
> >>                              VE_OFFSET_COMP_STREAM);
> >> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >> +                   VE_INTERRUPT_COMP_COMPLETE);
> >> +
> >>           spin_lock(&video->lock);
> >>           clear_bit(VIDEO_FRAME_INPRG, &video->flags);
> >>           buf = list_first_entry_or_null(&video->buffers,
> >> @@ -599,8 +603,6 @@ static irqreturn_t aspeed_video_irq(int irq, void
> >> *arg)
> >>                       VE_SEQ_CTRL_TRIG_COMP, 0);
> >>           aspeed_video_update(video, VE_INTERRUPT_CTRL,
> >>                       VE_INTERRUPT_COMP_COMPLETE, 0);
> >> -        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >> -                   VE_INTERRUPT_COMP_COMPLETE);
> >>           if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
> >>               aspeed_video_start_frame(video);
> >
Joel Stanley April 30, 2019, 6:01 a.m. UTC | #4
On Tue, 30 Apr 2019 at 03:04, Joel Stanley <joel@jms.id.au> wrote:
>
> On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
> >
> > On 4/29/2019 3:29 PM, Eddie James wrote:
> > >
> > > On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
> > >> Interrupt status flags should be cleared immediately otherwise
> > >> interrupt handler will be called again and again until the flag
> > >> is cleared, but this driver clears some flags through a 500ms
> > >> delayed work which is a bad idea in interrupt handling, so this
> > >> commit makes the interrupt handler clear the status flags
> > >> immediately.
> > >>
> > >> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> > >> ---
> > >>   drivers/media/platform/aspeed-video.c | 12 +++++++-----
> > >>   1 file changed, 7 insertions(+), 5 deletions(-)
> > >>
> > >> diff --git a/drivers/media/platform/aspeed-video.c
> > >> b/drivers/media/platform/aspeed-video.c
> > >> index 77c209a472ca..e218f375b9f5 100644
> > >> --- a/drivers/media/platform/aspeed-video.c
> > >> +++ b/drivers/media/platform/aspeed-video.c
> > >> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
> > >> void *arg)
> > >>        * re-initialize
> > >>        */
> > >>       if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
> > >> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> > >> +                   VE_INTERRUPT_MODE_DETECT_WD);
> > >
> > >
> > > aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
> > > This shouldn't be necessary.
> >
> > In fact, this patch fixes a watch dog reset with printing out a stack
> > trace like below. This happens very rarely but it's critical because it
> > causes a BMC reset. In my experiments, interrupt flags should be cleared
> > even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
> > aspeed_video_off(), or we should add
> > apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
> > before the disabling interrupt. I think the way in this patch is better.
>
> In general, a driver should certainly be clearing (acking) the
> interrupt bits in the interrupt handler before returning.
>
> Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
> in the soft lockup situation.
>
> I took a closer look at this function, and it should probably not
> return IRQ_HANDLED at the bottom, as it may have fallen through all of
> the if statements and not have handled any interrupt. Jae, can you
> take a look at this and send another patch if you think that is
> correct.

Something like the patch below. It also made me wonder why you don't
return  from the flags = VIDEO_RES_DETECT state?

I don't have a way to test. Is there a simple command I can run on a
BMC to test, or do I need the entire stack?

--- a/drivers/media/platform/aspeed-video.c
+++ b/drivers/media/platform/aspeed-video.c
@@ -566,8 +566,8 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
                         * detection; reset the engine and re-initialize
                         */
                        aspeed_video_irq_res_change(video);
-                       return IRQ_HANDLED;
                }
+               return IRQ_HANDLED;
        }

        if (sts & VE_INTERRUPT_COMP_COMPLETE) {
@@ -606,9 +606,13 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)

                if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
                        aspeed_video_start_frame(video);
+
+               return IRQ_HANDLED;
        }

-       return IRQ_HANDLED;
+       dev_dbg(video->dev, "unhandled states: %08x\n", sts);
+
+       return IRQ_NONE;
 }
Jae Hyun Yoo April 30, 2019, 5:35 p.m. UTC | #5
On 4/29/2019 8:04 PM, Joel Stanley wrote:
> On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
>>
>> On 4/29/2019 3:29 PM, Eddie James wrote:
>>>
>>> On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
>>>> Interrupt status flags should be cleared immediately otherwise
>>>> interrupt handler will be called again and again until the flag
>>>> is cleared, but this driver clears some flags through a 500ms
>>>> delayed work which is a bad idea in interrupt handling, so this
>>>> commit makes the interrupt handler clear the status flags
>>>> immediately.
>>>>
>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>> ---
>>>>    drivers/media/platform/aspeed-video.c | 12 +++++++-----
>>>>    1 file changed, 7 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/drivers/media/platform/aspeed-video.c
>>>> b/drivers/media/platform/aspeed-video.c
>>>> index 77c209a472ca..e218f375b9f5 100644
>>>> --- a/drivers/media/platform/aspeed-video.c
>>>> +++ b/drivers/media/platform/aspeed-video.c
>>>> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
>>>> void *arg)
>>>>         * re-initialize
>>>>         */
>>>>        if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>> +                   VE_INTERRUPT_MODE_DETECT_WD);
>>>
>>>
>>> aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
>>> This shouldn't be necessary.
>>
>> In fact, this patch fixes a watch dog reset with printing out a stack
>> trace like below. This happens very rarely but it's critical because it
>> causes a BMC reset. In my experiments, interrupt flags should be cleared
>> even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
>> aspeed_video_off(), or we should add
>> apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
>> before the disabling interrupt. I think the way in this patch is better.
> 
> In general, a driver should certainly be clearing (acking) the
> interrupt bits in the interrupt handler before returning.
> 
> Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
> in the soft lockup situation.
> 
> I took a closer look at this function, and it should probably not
> return IRQ_HANDLED at the bottom, as it may have fallen through all of
> the if statements and not have handled any interrupt. Jae, can you
> take a look at this and send another patch if you think that is
> correct.

You are right. It should return IRQ_NONE when there is any unhanded bit.
I'll look at the interrupt handler again. Thanks for your pointing it
out.

Thanks,

Jae

> Cheers,
> 
> Joel
> 
>>
>> After applying this patch, I've not seen the watch dog reset yet for
>> over a week.
>>
>> Thanks,
>>
>> Jae
>>
>> [ 2174.199114] sched: RT throttling activated
>> [ 2200.009118] watchdog: BUG: soft lockup - CPU#0 stuck for 22s!
>> [irq/23-aspeed-v:609]
>> [ 2200.016802] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted
>> 5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
>> [ 2200.026884] Hardware name: Generic DT based system
>> [ 2200.031705] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
>> [ 2200.037284] LR is at unmask_irq.part.4+0x30/0x44
>> [ 2200.041902] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
>> [ 2200.048159] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
>> [ 2200.053377] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
>> [ 2200.058595] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
>> [ 2200.065112] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
>> [ 2200.071630] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
>> Segment none
>> [ 2200.078754] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
>> [ 2200.084496] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Not tainted
>> 5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
>> [ 2200.094567] Hardware name: Generic DT based system
>> [ 2200.099352] Backtrace:
>> [ 2200.101844] [<80107cdc>] (dump_backtrace) from [<80107f10>]
>> (show_stack+0x20/0x24)
>> [ 2200.109415]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
>> [ 2200.115092] [<80107ef0>] (show_stack) from [<80691844>]
>> (dump_stack+0x20/0x28)
>> [ 2200.122320] [<80691824>] (dump_stack) from [<80103dd8>]
>> (show_regs+0x1c/0x20)
>> [ 2200.129467] [<80103dbc>] (show_regs) from [<8017a940>]
>> (watchdog_timer_fn+0x1c4/0x21c)
>> [ 2200.137393] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>]
>> (__hrtimer_run_queues.constprop.3+0x14c/0x280)
>> [ 2200.147306]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193
>> r6:80a18b80 r5:80a18bc0
>> [ 2200.155121]  r4:80a1ba40
>> [ 2200.157667] [<80159b94>] (__hrtimer_run_queues.constprop.3) from
>> [<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
>> [ 2200.167491]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff
>> r6:00000003 r5:20000193
>> [ 2200.175305]  r4:80a18b80
>> [ 2200.177867] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>]
>> (fttmr010_timer_interrupt+0x20/0x28)
>> [ 2200.186915]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011
>> r6:9d01fd80 r5:80a4af84
>> [ 2200.194727]  r4:9d001b80
>> [ 2200.197292] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>]
>> (__handle_irq_event_percpu+0x48/0x1c4)
>> [ 2200.207125] [<8014be20>] (__handle_irq_event_percpu) from
>> [<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
>> [ 2200.216778]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000
>> r6:9d01fd80 r5:80a4af84
>> [ 2200.224594]  r4:80a07008
>> [ 2200.227139] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>]
>> (handle_irq_event+0x38/0x4c)
>> [ 2200.236003]  r6:00000001 r5:80a4af84 r4:9d01fd80
>> [ 2200.240623] [<8014c074>] (handle_irq_event) from [<8014fa84>]
>> (handle_edge_irq+0xb0/0x1cc)
>> [ 2200.248881]  r5:80a4af84 r4:9d01fd80
>> [ 2200.252462] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>]
>> (generic_handle_irq+0x30/0x44)
>> [ 2200.260805]  r5:80a4af84 r4:00000011
>> [ 2200.264387] [<8014b674>] (generic_handle_irq) from [<8014b710>]
>> (__handle_domain_irq+0x58/0xb8)
>> [ 2200.273091] [<8014b6b8>] (__handle_domain_irq) from [<80102164>]
>> (avic_handle_irq+0x68/0x70)
>> [ 2200.281525]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff
>> r5:9e56bea0 r4:9d002940
>> [ 2200.289267] [<801020fc>] (avic_handle_irq) from [<801019ec>]
>> (__irq_svc+0x6c/0x90)
>> [ 2200.296824] Exception stack(0x9e56bea0 to 0x9e56bee8)
>> [ 2200.301879] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80
>> 9d11ed90 00000001 00000001
>> [ 2200.310060] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000
>> 9e56bef0 8014ecb4 8014c550
>> [ 2200.318222] bee0: a0000013 ffffffff
>> [ 2200.321708]  r5:a0000013 r4:8014c550
>> [ 2200.325290] [<8014c4e4>] (irq_finalize_oneshot.part.0) from
>> [<8014c72c>] (irq_thread_fn+0x7c/0x88)
>> [ 2200.334238]  r5:9d11ed80 r4:9e539540
>> [ 2200.337821] [<8014c6b0>] (irq_thread_fn) from [<8014c974>]
>> (irq_thread+0x104/0x1e0)
>> [ 2200.345473]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
>> [ 2200.351153] [<8014c870>] (irq_thread) from [<80133490>]
>> (kthread+0x14c/0x164)
>> [ 2200.358287]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000
>> r6:00000000 r5:9e542060
>> [ 2200.366104]  r4:9e539b00
>> [ 2200.368650] [<80133344>] (kthread) from [<801010e8>]
>> (ret_from_fork+0x14/0x2c)
>> [ 2200.375865] Exception stack(0x9e56bfb0 to 0x9e56bff8)
>> [ 2200.380911] bfa0:                                     00000000
>> 00000000 00000000 00000000
>> [ 2200.389079] bfc0: 00000000 00000000 00000000 00000000 00000000
>> 00000000 00000000 00000000
>> [ 2200.397248] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
>> [ 2200.403863]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
>> r6:00000000 r5:80133344
>> [ 2200.411681]  r4:9e542060
>> [ 2228.009114] watchdog: BUG: soft lockup - CPU#0 stuck for 22s!
>> [irq/23-aspeed-v:609]
>> [ 2228.016798] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G
>>      L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
>> [ 2228.028268] Hardware name: Generic DT based system
>> [ 2228.033085] PC is at irq_finalize_oneshot.part.0+0x6c/0x11c
>> [ 2228.038670] LR is at unmask_irq.part.4+0x30/0x44
>> [ 2228.043288] pc : [<8014c550>]    lr : [<8014ecb4>]    psr: a0000013
>> [ 2228.049545] sp : 9e56bef0  ip : 00000000  fp : 9e56bf0c
>> [ 2228.054762] r10: 80a07008  r9 : 00000000  r8 : 8014c6b0
>> [ 2228.059980] r7 : 00000001  r6 : 00000001  r5 : 9d11ed90  r4 : 9d11ed80
>> [ 2228.066498] r3 : 02400200  r2 : 9d11ed80  r1 : 00000080  r0 : 9d002940
>> [ 2228.073014] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM
>> Segment none
>> [ 2228.080137] Control: 00c5387d  Table: 9d98c008  DAC: 00000051
>> [ 2228.085881] CPU: 0 PID: 609 Comm: irq/23-aspeed-v Tainted: G
>>      L    5.0.7-e124b50aeacb66baa42541ebc6c3544350f75a79 #1
>> [ 2228.097338] Hardware name: Generic DT based system
>> [ 2228.102123] Backtrace:
>> [ 2228.104609] [<80107cdc>] (dump_backtrace) from [<80107f10>]
>> (show_stack+0x20/0x24)
>> [ 2228.112176]  r7:00000000 r6:9e56a000 r5:80a070d8 r4:80a1ba08
>> [ 2228.117852] [<80107ef0>] (show_stack) from [<80691844>]
>> (dump_stack+0x20/0x28)
>> [ 2228.125080] [<80691824>] (dump_stack) from [<80103dd8>]
>> (show_regs+0x1c/0x20)
>> [ 2228.132234] [<80103dbc>] (show_regs) from [<8017a940>]
>> (watchdog_timer_fn+0x1c4/0x21c)
>> [ 2228.140164] [<8017a77c>] (watchdog_timer_fn) from [<80159ce0>]
>> (__hrtimer_run_queues.constprop.3+0x14c/0x280)
>> [ 2228.150078]  r10:8017a77c r9:80a18cb0 r8:00000006 r7:20000193
>> r6:80a18b80 r5:80a18bc0
>> [ 2228.157891]  r4:80a1ba40
>> [ 2228.160438] [<80159b94>] (__hrtimer_run_queues.constprop.3) from
>> [<8015aac0>] (hrtimer_interrupt+0xf4/0x258)
>> [ 2228.170262]  r10:80a18d00 r9:80a18cb0 r8:ffffffff r7:7fffffff
>> r6:00000003 r5:20000193
>> [ 2228.178077]  r4:80a18b80
>> [ 2228.180637] [<8015a9cc>] (hrtimer_interrupt) from [<8050d51c>]
>> (fttmr010_timer_interrupt+0x20/0x28)
>> [ 2228.189674]  r10:9e56bdf8 r9:00000000 r8:9d013600 r7:00000011
>> r6:9d01fd80 r5:80a4af84
>> [ 2228.197491]  r4:9d001b80
>> [ 2228.200052] [<8050d4fc>] (fttmr010_timer_interrupt) from [<8014be68>]
>> (__handle_irq_event_percpu+0x48/0x1c4)
>> [ 2228.209884] [<8014be20>] (__handle_irq_event_percpu) from
>> [<8014c01c>] (handle_irq_event_percpu+0x38/0x90)
>> [ 2228.219530]  r10:80a07008 r9:9e56a000 r8:9d013600 r7:00000000
>> r6:9d01fd80 r5:80a4af84
>> [ 2228.227348]  r4:80a07008
>> [ 2228.229892] [<8014bfe4>] (handle_irq_event_percpu) from [<8014c0ac>]
>> (handle_irq_event+0x38/0x4c)
>> [ 2228.238758]  r6:00000001 r5:80a4af84 r4:9d01fd80
>> [ 2228.243388] [<8014c074>] (handle_irq_event) from [<8014fa84>]
>> (handle_edge_irq+0xb0/0x1cc)
>> [ 2228.251643]  r5:80a4af84 r4:9d01fd80
>> [ 2228.255224] [<8014f9d4>] (handle_edge_irq) from [<8014b6a4>]
>> (generic_handle_irq+0x30/0x44)
>> [ 2228.263558]  r5:80a4af84 r4:00000011
>> [ 2228.267142] [<8014b674>] (generic_handle_irq) from [<8014b710>]
>> (__handle_domain_irq+0x58/0xb8)
>> [ 2228.275844] [<8014b6b8>] (__handle_domain_irq) from [<80102164>]
>> (avic_handle_irq+0x68/0x70)
>> [ 2228.284279]  r9:9e56a000 r8:8014c6b0 r7:9e56bed4 r6:ffffffff
>> r5:9e56bea0 r4:9d002940
>> [ 2228.292019] [<801020fc>] (avic_handle_irq) from [<801019ec>]
>> (__irq_svc+0x6c/0x90)
>> [ 2228.299577] Exception stack(0x9e56bea0 to 0x9e56bee8)
>> [ 2228.304630] bea0: 9d002940 00000080 9d11ed80 02400200 9d11ed80
>> 9d11ed90 00000001 00000001
>> [ 2228.312803] bec0: 8014c6b0 00000000 80a07008 9e56bf0c 00000000
>> 9e56bef0 8014ecb4 8014c550
>> [ 2228.320967] bee0: a0000013 ffffffff
>> [ 2228.324451]  r5:a0000013 r4:8014c550
>> [ 2228.328038] [<8014c4e4>] (irq_finalize_oneshot.part.0) from
>> [<8014c72c>] (irq_thread_fn+0x7c/0x88)
>> [ 2228.336983]  r5:9d11ed80 r4:9e539540
>> [ 2228.340566] [<8014c6b0>] (irq_thread_fn) from [<8014c974>]
>> (irq_thread+0x104/0x1e0)
>> [ 2228.348219]  r7:00000001 r6:9d11ed80 r5:ffffe000 r4:9e539540
>> [ 2228.353898] [<8014c870>] (irq_thread) from [<80133490>]
>> (kthread+0x14c/0x164)
>> [ 2228.361031]  r10:9d0a3c00 r9:8014c870 r8:9e539540 r7:9e56a000
>> r6:00000000 r5:9e542060
>> [ 2228.368848]  r4:9e539b00
>> [ 2228.371394] [<80133344>] (kthread) from [<801010e8>]
>> (ret_from_fork+0x14/0x2c)
>> [ 2228.378609] Exception stack(0x9e56bfb0 to 0x9e56bff8)
>> [ 2228.383656] bfa0:                                     00000000
>> 00000000 00000000 00000000
>> [ 2228.391833] bfc0: 00000000 00000000 00000000 00000000 00000000
>> 00000000 00000000 00000000
>> [ 2228.400004] bfe0: 00000000 00000000 00000000 00000000 00000013 00000000
>> [ 2228.406617]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
>> r6:00000000 r5:80133344
>> [ 2228.414436]  r4:9e542060
>>
>>>
>>>
>>> The rest looks fine.
>>>
>>> Thanks,
>>>
>>> Eddie
>>>
>>>
>>>>            aspeed_video_irq_res_change(video);
>>>>            return IRQ_HANDLED;
>>>>        }
>>>>        if (sts & VE_INTERRUPT_MODE_DETECT) {
>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>> +                   VE_INTERRUPT_MODE_DETECT);
>>>>            if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
>>>>                aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>>>                            VE_INTERRUPT_MODE_DETECT, 0);
>>>> -            aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>> -                       VE_INTERRUPT_MODE_DETECT);
>>>> -
>>>>                set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
>>>>                wake_up_interruptible_all(&video->wait);
>>>>            } else {
>>>> @@ -574,6 +575,9 @@ static irqreturn_t aspeed_video_irq(int irq, void
>>>> *arg)
>>>>            u32 frame_size = aspeed_video_read(video,
>>>>                               VE_OFFSET_COMP_STREAM);
>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>> +                   VE_INTERRUPT_COMP_COMPLETE);
>>>> +
>>>>            spin_lock(&video->lock);
>>>>            clear_bit(VIDEO_FRAME_INPRG, &video->flags);
>>>>            buf = list_first_entry_or_null(&video->buffers,
>>>> @@ -599,8 +603,6 @@ static irqreturn_t aspeed_video_irq(int irq, void
>>>> *arg)
>>>>                        VE_SEQ_CTRL_TRIG_COMP, 0);
>>>>            aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>>>                        VE_INTERRUPT_COMP_COMPLETE, 0);
>>>> -        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>> -                   VE_INTERRUPT_COMP_COMPLETE);
>>>>            if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>>>>                aspeed_video_start_frame(video);
>>>
Jae Hyun Yoo April 30, 2019, 8 p.m. UTC | #6
On 4/29/2019 11:01 PM, Joel Stanley wrote:
> On Tue, 30 Apr 2019 at 03:04, Joel Stanley <joel@jms.id.au> wrote:
>>
>> On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
>>>
>>> On 4/29/2019 3:29 PM, Eddie James wrote:
>>>>
>>>> On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
>>>>> Interrupt status flags should be cleared immediately otherwise
>>>>> interrupt handler will be called again and again until the flag
>>>>> is cleared, but this driver clears some flags through a 500ms
>>>>> delayed work which is a bad idea in interrupt handling, so this
>>>>> commit makes the interrupt handler clear the status flags
>>>>> immediately.
>>>>>
>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>> ---
>>>>>    drivers/media/platform/aspeed-video.c | 12 +++++++-----
>>>>>    1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/drivers/media/platform/aspeed-video.c
>>>>> b/drivers/media/platform/aspeed-video.c
>>>>> index 77c209a472ca..e218f375b9f5 100644
>>>>> --- a/drivers/media/platform/aspeed-video.c
>>>>> +++ b/drivers/media/platform/aspeed-video.c
>>>>> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
>>>>> void *arg)
>>>>>         * re-initialize
>>>>>         */
>>>>>        if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
>>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>>> +                   VE_INTERRUPT_MODE_DETECT_WD);
>>>>
>>>>
>>>> aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
>>>> This shouldn't be necessary.
>>>
>>> In fact, this patch fixes a watch dog reset with printing out a stack
>>> trace like below. This happens very rarely but it's critical because it
>>> causes a BMC reset. In my experiments, interrupt flags should be cleared
>>> even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
>>> aspeed_video_off(), or we should add
>>> apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
>>> before the disabling interrupt. I think the way in this patch is better.
>>
>> In general, a driver should certainly be clearing (acking) the
>> interrupt bits in the interrupt handler before returning.
>>
>> Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
>> in the soft lockup situation.
>>
>> I took a closer look at this function, and it should probably not
>> return IRQ_HANDLED at the bottom, as it may have fallen through all of
>> the if statements and not have handled any interrupt. Jae, can you
>> take a look at this and send another patch if you think that is
>> correct.
> 
> Something like the patch below. It also made me wonder why you don't
> return  from the flags = VIDEO_RES_DETECT state?

Issue is caused because VE_INTERRUPT_MODE_DETECT_WD or
VE_INTERRUPT_MODE_DETECT is cleared by a 500ms delayed work when the
handler calls aspeed_video_irq_res_change(). As Eddie said, it disables
interrupt through aspeed_video_off() but it doesn't mean that the
interrupt bit is cleared at that timing. Actually, the interrupt bit
is cleared by aspeed_video_resolution_work() 500ms after it gets the
interrupt request so this patch is going to clear the bits immediately.

> I don't have a way to test. Is there a simple command I can run on a
> BMC to test, or do I need the entire stack?

The watch dog reset issue happens when video mode change is occurred. I
use Ubuntu GUI mode and set screen saver to blank screen with 1 minute
expiration time. And then, open bmcweb page and navigate to Server
control -> KVM page. As long as a user stays in this page, KVM video
will be off by the screen saver and then it will be awaken by KVM
service so this sequence will be repeated infinitely. I usually see the
issue at least once if I put my system under this overnight stress test.

> --- a/drivers/media/platform/aspeed-video.c
> +++ b/drivers/media/platform/aspeed-video.c
> @@ -566,8 +566,8 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>                           * detection; reset the engine and re-initialize
>                           */
>                          aspeed_video_irq_res_change(video);
> -                       return IRQ_HANDLED;
>                  }
> +               return IRQ_HANDLED;
>          }
> 
>          if (sts & VE_INTERRUPT_COMP_COMPLETE) {
> @@ -606,9 +606,13 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
> 
>                  if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>                          aspeed_video_start_frame(video);
> +
> +               return IRQ_HANDLED;
>          }
> 
> -       return IRQ_HANDLED;
> +       dev_dbg(video->dev, "unhandled states: %08x\n", sts);
> +
> +       return IRQ_NONE;
>   }
> 

Looks good but we need to consider cases that needs going thru all
if statements other than cases call aspeed_video_irq_res_change().

I made a new patch like below. It has worked well so far. Will submit
v2 after making sufficient test using it.


diff --git a/drivers/media/platform/aspeed-video.c 
b/drivers/media/platform/aspeed-video.c
index 77c209a472ca..8096319733ee 100644
--- a/drivers/media/platform/aspeed-video.c
+++ b/drivers/media/platform/aspeed-video.c
@@ -488,6 +488,7 @@ static void aspeed_video_off(struct aspeed_video *video)

  	/* Disable interrupts */
  	aspeed_video_write(video, VE_INTERRUPT_CTRL, 0);
+	aspeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff);

  	/* Turn off the relevant clocks */
  	clk_disable(video->vclk);
@@ -539,24 +540,23 @@ static void aspeed_video_irq_res_change(struct 
aspeed_video *video)
  static irqreturn_t aspeed_video_irq(int irq, void *arg)
  {
  	struct aspeed_video *video = arg;
-	u32 sts = aspeed_video_read(video, VE_INTERRUPT_STATUS);
+	u32 irq_received = aspeed_video_read(video, VE_INTERRUPT_STATUS);
+	u32 irq_handled = 0;

  	/*
  	 * Resolution changed or signal was lost; reset the engine and
  	 * re-initialize
  	 */
-	if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
+	if (irq_received & VE_INTERRUPT_MODE_DETECT_WD) {
  		aspeed_video_irq_res_change(video);
  		return IRQ_HANDLED;
  	}

-	if (sts & VE_INTERRUPT_MODE_DETECT) {
+	if (irq_received & VE_INTERRUPT_MODE_DETECT) {
  		if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
  			aspeed_video_update(video, VE_INTERRUPT_CTRL,
  					    VE_INTERRUPT_MODE_DETECT, 0);
-			aspeed_video_write(video, VE_INTERRUPT_STATUS,
-					   VE_INTERRUPT_MODE_DETECT);
-
+			irq_handled |= VE_INTERRUPT_MODE_DETECT;
  			set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
  			wake_up_interruptible_all(&video->wait);
  		} else {
@@ -569,7 +569,7 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
  		}
  	}

-	if (sts & VE_INTERRUPT_COMP_COMPLETE) {
+	if (irq_received & VE_INTERRUPT_COMP_COMPLETE) {
  		struct aspeed_video_buffer *buf;
  		u32 frame_size = aspeed_video_read(video,
  						   VE_OFFSET_COMP_STREAM);
@@ -599,14 +599,15 @@ static irqreturn_t aspeed_video_irq(int irq, void 
*arg)
  				    VE_SEQ_CTRL_TRIG_COMP, 0);
  		aspeed_video_update(video, VE_INTERRUPT_CTRL,
  				    VE_INTERRUPT_COMP_COMPLETE, 0);
-		aspeed_video_write(video, VE_INTERRUPT_STATUS,
-				   VE_INTERRUPT_COMP_COMPLETE);
-
+		irq_handled |= VE_INTERRUPT_COMP_COMPLETE;
  		if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
  			aspeed_video_start_frame(video);
  	}

-	return IRQ_HANDLED;
+	if (irq_handled)
+		aspeed_video_write(video, VE_INTERRUPT_STATUS, irq_handled);
+
+	return irq_received == irq_handled ? IRQ_HANDLED : IRQ_NONE;
  }

  static void aspeed_video_check_and_set_polarity(struct aspeed_video 
*video)
Andrew Jeffery May 1, 2019, 1:29 a.m. UTC | #7
On Wed, 1 May 2019, at 05:30, Jae Hyun Yoo wrote:
> On 4/29/2019 11:01 PM, Joel Stanley wrote:
> > On Tue, 30 Apr 2019 at 03:04, Joel Stanley <joel@jms.id.au> wrote:
> >>
> >> On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
> >>>
> >>> On 4/29/2019 3:29 PM, Eddie James wrote:
> >>>>
> >>>> On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
> >>>>> Interrupt status flags should be cleared immediately otherwise
> >>>>> interrupt handler will be called again and again until the flag
> >>>>> is cleared, but this driver clears some flags through a 500ms
> >>>>> delayed work which is a bad idea in interrupt handling, so this
> >>>>> commit makes the interrupt handler clear the status flags
> >>>>> immediately.
> >>>>>
> >>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
> >>>>> ---
> >>>>>    drivers/media/platform/aspeed-video.c | 12 +++++++-----
> >>>>>    1 file changed, 7 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/media/platform/aspeed-video.c
> >>>>> b/drivers/media/platform/aspeed-video.c
> >>>>> index 77c209a472ca..e218f375b9f5 100644
> >>>>> --- a/drivers/media/platform/aspeed-video.c
> >>>>> +++ b/drivers/media/platform/aspeed-video.c
> >>>>> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
> >>>>> void *arg)
> >>>>>         * re-initialize
> >>>>>         */
> >>>>>        if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
> >>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
> >>>>> +                   VE_INTERRUPT_MODE_DETECT_WD);
> >>>>
> >>>>
> >>>> aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
> >>>> This shouldn't be necessary.
> >>>
> >>> In fact, this patch fixes a watch dog reset with printing out a stack
> >>> trace like below. This happens very rarely but it's critical because it
> >>> causes a BMC reset. In my experiments, interrupt flags should be cleared
> >>> even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
> >>> aspeed_video_off(), or we should add
> >>> apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
> >>> before the disabling interrupt. I think the way in this patch is better.
> >>
> >> In general, a driver should certainly be clearing (acking) the
> >> interrupt bits in the interrupt handler before returning.
> >>
> >> Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
> >> in the soft lockup situation.
> >>
> >> I took a closer look at this function, and it should probably not
> >> return IRQ_HANDLED at the bottom, as it may have fallen through all of
> >> the if statements and not have handled any interrupt. Jae, can you
> >> take a look at this and send another patch if you think that is
> >> correct.
> > 
> > Something like the patch below. It also made me wonder why you don't
> > return  from the flags = VIDEO_RES_DETECT state?
> 
> Issue is caused because VE_INTERRUPT_MODE_DETECT_WD or
> VE_INTERRUPT_MODE_DETECT is cleared by a 500ms delayed work when the
> handler calls aspeed_video_irq_res_change(). As Eddie said, it disables
> interrupt through aspeed_video_off() but it doesn't mean that the
> interrupt bit is cleared at that timing. Actually, the interrupt bit
> is cleared by aspeed_video_resolution_work() 500ms after it gets the
> interrupt request so this patch is going to clear the bits immediately.
> 
> > I don't have a way to test. Is there a simple command I can run on a
> > BMC to test, or do I need the entire stack?
> 
> The watch dog reset issue happens when video mode change is occurred. I
> use Ubuntu GUI mode and set screen saver to blank screen with 1 minute
> expiration time. And then, open bmcweb page and navigate to Server
> control -> KVM page. As long as a user stays in this page, KVM video
> will be off by the screen saver and then it will be awaken by KVM
> service so this sequence will be repeated infinitely. I usually see the
> issue at least once if I put my system under this overnight stress test.
> 
> > --- a/drivers/media/platform/aspeed-video.c
> > +++ b/drivers/media/platform/aspeed-video.c
> > @@ -566,8 +566,8 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
> >                           * detection; reset the engine and re-initialize
> >                           */
> >                          aspeed_video_irq_res_change(video);
> > -                       return IRQ_HANDLED;
> >                  }
> > +               return IRQ_HANDLED;
> >          }
> > 
> >          if (sts & VE_INTERRUPT_COMP_COMPLETE) {
> > @@ -606,9 +606,13 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
> > 
> >                  if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
> >                          aspeed_video_start_frame(video);
> > +
> > +               return IRQ_HANDLED;
> >          }
> > 
> > -       return IRQ_HANDLED;
> > +       dev_dbg(video->dev, "unhandled states: %08x\n", sts);
> > +
> > +       return IRQ_NONE;
> >   }
> > 
> 
> Looks good but we need to consider cases that needs going thru all
> if statements other than cases call aspeed_video_irq_res_change().
> 
> I made a new patch like below. It has worked well so far. Will submit
> v2 after making sufficient test using it.
> 
> 
> diff --git a/drivers/media/platform/aspeed-video.c 
> b/drivers/media/platform/aspeed-video.c
> index 77c209a472ca..8096319733ee 100644
> --- a/drivers/media/platform/aspeed-video.c
> +++ b/drivers/media/platform/aspeed-video.c
> @@ -488,6 +488,7 @@ static void aspeed_video_off(struct aspeed_video *video)
> 
>   	/* Disable interrupts */
>   	aspeed_video_write(video, VE_INTERRUPT_CTRL, 0);
> +	aspeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff);
> 
>   	/* Turn off the relevant clocks */
>   	clk_disable(video->vclk);
> @@ -539,24 +540,23 @@ static void aspeed_video_irq_res_change(struct 
> aspeed_video *video)
>   static irqreturn_t aspeed_video_irq(int irq, void *arg)
>   {
>   	struct aspeed_video *video = arg;
> -	u32 sts = aspeed_video_read(video, VE_INTERRUPT_STATUS);
> +	u32 irq_received = aspeed_video_read(video, VE_INTERRUPT_STATUS);
> +	u32 irq_handled = 0;
> 
>   	/*
>   	 * Resolution changed or signal was lost; reset the engine and
>   	 * re-initialize
>   	 */
> -	if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
> +	if (irq_received & VE_INTERRUPT_MODE_DETECT_WD) {
>   		aspeed_video_irq_res_change(video);
>   		return IRQ_HANDLED;
>   	}
> 
> -	if (sts & VE_INTERRUPT_MODE_DETECT) {
> +	if (irq_received & VE_INTERRUPT_MODE_DETECT) {
>   		if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
>   			aspeed_video_update(video, VE_INTERRUPT_CTRL,
>   					    VE_INTERRUPT_MODE_DETECT, 0);
> -			aspeed_video_write(video, VE_INTERRUPT_STATUS,
> -					   VE_INTERRUPT_MODE_DETECT);
> -
> +			irq_handled |= VE_INTERRUPT_MODE_DETECT;
>   			set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
>   			wake_up_interruptible_all(&video->wait);
>   		} else {
> @@ -569,7 +569,7 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>   		}
>   	}
> 
> -	if (sts & VE_INTERRUPT_COMP_COMPLETE) {
> +	if (irq_received & VE_INTERRUPT_COMP_COMPLETE) {
>   		struct aspeed_video_buffer *buf;
>   		u32 frame_size = aspeed_video_read(video,
>   						   VE_OFFSET_COMP_STREAM);
> @@ -599,14 +599,15 @@ static irqreturn_t aspeed_video_irq(int irq, void 
> *arg)
>   				    VE_SEQ_CTRL_TRIG_COMP, 0);
>   		aspeed_video_update(video, VE_INTERRUPT_CTRL,
>   				    VE_INTERRUPT_COMP_COMPLETE, 0);
> -		aspeed_video_write(video, VE_INTERRUPT_STATUS,
> -				   VE_INTERRUPT_COMP_COMPLETE);
> -
> +		irq_handled |= VE_INTERRUPT_COMP_COMPLETE;
>   		if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>   			aspeed_video_start_frame(video);
>   	}
> 
> -	return IRQ_HANDLED;
> +	if (irq_handled)
> +		aspeed_video_write(video, VE_INTERRUPT_STATUS, irq_handled);
> +
> +	return irq_received == irq_handled ? IRQ_HANDLED : IRQ_NONE;

I don't think we need two variables? We can clear bits out of sts as we go,
and then the return condition becomes:

    return sts ? IRQ_NONE : IRQ_HANDLED;

>   }
> 
>   static void aspeed_video_check_and_set_polarity(struct aspeed_video 
> *video)
>
Jae Hyun Yoo May 1, 2019, 4:20 p.m. UTC | #8
On 4/30/2019 6:29 PM, Andrew Jeffery wrote:
> On Wed, 1 May 2019, at 05:30, Jae Hyun Yoo wrote:
>> On 4/29/2019 11:01 PM, Joel Stanley wrote:
>>> On Tue, 30 Apr 2019 at 03:04, Joel Stanley <joel@jms.id.au> wrote:
>>>>
>>>> On Mon, 29 Apr 2019 at 23:38, Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com> wrote:
>>>>>
>>>>> On 4/29/2019 3:29 PM, Eddie James wrote:
>>>>>>
>>>>>> On 4/25/19 5:20 PM, Jae Hyun Yoo wrote:
>>>>>>> Interrupt status flags should be cleared immediately otherwise
>>>>>>> interrupt handler will be called again and again until the flag
>>>>>>> is cleared, but this driver clears some flags through a 500ms
>>>>>>> delayed work which is a bad idea in interrupt handling, so this
>>>>>>> commit makes the interrupt handler clear the status flags
>>>>>>> immediately.
>>>>>>>
>>>>>>> Signed-off-by: Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>
>>>>>>> ---
>>>>>>>     drivers/media/platform/aspeed-video.c | 12 +++++++-----
>>>>>>>     1 file changed, 7 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/media/platform/aspeed-video.c
>>>>>>> b/drivers/media/platform/aspeed-video.c
>>>>>>> index 77c209a472ca..e218f375b9f5 100644
>>>>>>> --- a/drivers/media/platform/aspeed-video.c
>>>>>>> +++ b/drivers/media/platform/aspeed-video.c
>>>>>>> @@ -546,17 +546,18 @@ static irqreturn_t aspeed_video_irq(int irq,
>>>>>>> void *arg)
>>>>>>>          * re-initialize
>>>>>>>          */
>>>>>>>         if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
>>>>>>> +        aspeed_video_write(video, VE_INTERRUPT_STATUS,
>>>>>>> +                   VE_INTERRUPT_MODE_DETECT_WD);
>>>>>>
>>>>>>
>>>>>> aspeed_video_irq_res_change disables all IRQs and turns off the clocks.
>>>>>> This shouldn't be necessary.
>>>>>
>>>>> In fact, this patch fixes a watch dog reset with printing out a stack
>>>>> trace like below. This happens very rarely but it's critical because it
>>>>> causes a BMC reset. In my experiments, interrupt flags should be cleared
>>>>> even with the aspeed_video_write(video, VE_INTERRUPT_CTRL, 0) in
>>>>> aspeed_video_off(), or we should add
>>>>> apeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff)
>>>>> before the disabling interrupt. I think the way in this patch is better.
>>>>
>>>> In general, a driver should certainly be clearing (acking) the
>>>> interrupt bits in the interrupt handler before returning.
>>>>
>>>> Jae, it would be interesting to know the value of VE_INTERRUPT_STATUS
>>>> in the soft lockup situation.
>>>>
>>>> I took a closer look at this function, and it should probably not
>>>> return IRQ_HANDLED at the bottom, as it may have fallen through all of
>>>> the if statements and not have handled any interrupt. Jae, can you
>>>> take a look at this and send another patch if you think that is
>>>> correct.
>>>
>>> Something like the patch below. It also made me wonder why you don't
>>> return  from the flags = VIDEO_RES_DETECT state?
>>
>> Issue is caused because VE_INTERRUPT_MODE_DETECT_WD or
>> VE_INTERRUPT_MODE_DETECT is cleared by a 500ms delayed work when the
>> handler calls aspeed_video_irq_res_change(). As Eddie said, it disables
>> interrupt through aspeed_video_off() but it doesn't mean that the
>> interrupt bit is cleared at that timing. Actually, the interrupt bit
>> is cleared by aspeed_video_resolution_work() 500ms after it gets the
>> interrupt request so this patch is going to clear the bits immediately.
>>
>>> I don't have a way to test. Is there a simple command I can run on a
>>> BMC to test, or do I need the entire stack?
>>
>> The watch dog reset issue happens when video mode change is occurred. I
>> use Ubuntu GUI mode and set screen saver to blank screen with 1 minute
>> expiration time. And then, open bmcweb page and navigate to Server
>> control -> KVM page. As long as a user stays in this page, KVM video
>> will be off by the screen saver and then it will be awaken by KVM
>> service so this sequence will be repeated infinitely. I usually see the
>> issue at least once if I put my system under this overnight stress test.
>>
>>> --- a/drivers/media/platform/aspeed-video.c
>>> +++ b/drivers/media/platform/aspeed-video.c
>>> @@ -566,8 +566,8 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>>>                            * detection; reset the engine and re-initialize
>>>                            */
>>>                           aspeed_video_irq_res_change(video);
>>> -                       return IRQ_HANDLED;
>>>                   }
>>> +               return IRQ_HANDLED;
>>>           }
>>>
>>>           if (sts & VE_INTERRUPT_COMP_COMPLETE) {
>>> @@ -606,9 +606,13 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>>>
>>>                   if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>>>                           aspeed_video_start_frame(video);
>>> +
>>> +               return IRQ_HANDLED;
>>>           }
>>>
>>> -       return IRQ_HANDLED;
>>> +       dev_dbg(video->dev, "unhandled states: %08x\n", sts);
>>> +
>>> +       return IRQ_NONE;
>>>    }
>>>
>>
>> Looks good but we need to consider cases that needs going thru all
>> if statements other than cases call aspeed_video_irq_res_change().
>>
>> I made a new patch like below. It has worked well so far. Will submit
>> v2 after making sufficient test using it.
>>
>>
>> diff --git a/drivers/media/platform/aspeed-video.c
>> b/drivers/media/platform/aspeed-video.c
>> index 77c209a472ca..8096319733ee 100644
>> --- a/drivers/media/platform/aspeed-video.c
>> +++ b/drivers/media/platform/aspeed-video.c
>> @@ -488,6 +488,7 @@ static void aspeed_video_off(struct aspeed_video *video)
>>
>>    	/* Disable interrupts */
>>    	aspeed_video_write(video, VE_INTERRUPT_CTRL, 0);
>> +	aspeed_video_write(video, VE_INTERRUPT_STATUS, 0xffffffff);
>>
>>    	/* Turn off the relevant clocks */
>>    	clk_disable(video->vclk);
>> @@ -539,24 +540,23 @@ static void aspeed_video_irq_res_change(struct
>> aspeed_video *video)
>>    static irqreturn_t aspeed_video_irq(int irq, void *arg)
>>    {
>>    	struct aspeed_video *video = arg;
>> -	u32 sts = aspeed_video_read(video, VE_INTERRUPT_STATUS);
>> +	u32 irq_received = aspeed_video_read(video, VE_INTERRUPT_STATUS);
>> +	u32 irq_handled = 0;
>>
>>    	/*
>>    	 * Resolution changed or signal was lost; reset the engine and
>>    	 * re-initialize
>>    	 */
>> -	if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
>> +	if (irq_received & VE_INTERRUPT_MODE_DETECT_WD) {
>>    		aspeed_video_irq_res_change(video);
>>    		return IRQ_HANDLED;
>>    	}
>>
>> -	if (sts & VE_INTERRUPT_MODE_DETECT) {
>> +	if (irq_received & VE_INTERRUPT_MODE_DETECT) {
>>    		if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
>>    			aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>    					    VE_INTERRUPT_MODE_DETECT, 0);
>> -			aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> -					   VE_INTERRUPT_MODE_DETECT);
>> -
>> +			irq_handled |= VE_INTERRUPT_MODE_DETECT;
>>    			set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
>>    			wake_up_interruptible_all(&video->wait);
>>    		} else {
>> @@ -569,7 +569,7 @@ static irqreturn_t aspeed_video_irq(int irq, void *arg)
>>    		}
>>    	}
>>
>> -	if (sts & VE_INTERRUPT_COMP_COMPLETE) {
>> +	if (irq_received & VE_INTERRUPT_COMP_COMPLETE) {
>>    		struct aspeed_video_buffer *buf;
>>    		u32 frame_size = aspeed_video_read(video,
>>    						   VE_OFFSET_COMP_STREAM);
>> @@ -599,14 +599,15 @@ static irqreturn_t aspeed_video_irq(int irq, void
>> *arg)
>>    				    VE_SEQ_CTRL_TRIG_COMP, 0);
>>    		aspeed_video_update(video, VE_INTERRUPT_CTRL,
>>    				    VE_INTERRUPT_COMP_COMPLETE, 0);
>> -		aspeed_video_write(video, VE_INTERRUPT_STATUS,
>> -				   VE_INTERRUPT_COMP_COMPLETE);
>> -
>> +		irq_handled |= VE_INTERRUPT_COMP_COMPLETE;
>>    		if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
>>    			aspeed_video_start_frame(video);
>>    	}
>>
>> -	return IRQ_HANDLED;
>> +	if (irq_handled)
>> +		aspeed_video_write(video, VE_INTERRUPT_STATUS, irq_handled);
>> +
>> +	return irq_received == irq_handled ? IRQ_HANDLED : IRQ_NONE;
> 
> I don't think we need two variables? We can clear bits out of sts as we go,
> and then the return condition becomes:
> 
>      return sts ? IRQ_NONE : IRQ_HANDLED;

Yeah, that would be neater. I'll change it in v2 like you suggested.

Thanks,
Jae

>>    }
>>
>>    static void aspeed_video_check_and_set_polarity(struct aspeed_video
>> *video)
>>
diff mbox series

Patch

diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c
index 77c209a472ca..e218f375b9f5 100644
--- a/drivers/media/platform/aspeed-video.c
+++ b/drivers/media/platform/aspeed-video.c
@@ -546,17 +546,18 @@  static irqreturn_t aspeed_video_irq(int irq, void *arg)
 	 * re-initialize
 	 */
 	if (sts & VE_INTERRUPT_MODE_DETECT_WD) {
+		aspeed_video_write(video, VE_INTERRUPT_STATUS,
+				   VE_INTERRUPT_MODE_DETECT_WD);
 		aspeed_video_irq_res_change(video);
 		return IRQ_HANDLED;
 	}
 
 	if (sts & VE_INTERRUPT_MODE_DETECT) {
+		aspeed_video_write(video, VE_INTERRUPT_STATUS,
+				   VE_INTERRUPT_MODE_DETECT);
 		if (test_bit(VIDEO_RES_DETECT, &video->flags)) {
 			aspeed_video_update(video, VE_INTERRUPT_CTRL,
 					    VE_INTERRUPT_MODE_DETECT, 0);
-			aspeed_video_write(video, VE_INTERRUPT_STATUS,
-					   VE_INTERRUPT_MODE_DETECT);
-
 			set_bit(VIDEO_MODE_DETECT_DONE, &video->flags);
 			wake_up_interruptible_all(&video->wait);
 		} else {
@@ -574,6 +575,9 @@  static irqreturn_t aspeed_video_irq(int irq, void *arg)
 		u32 frame_size = aspeed_video_read(video,
 						   VE_OFFSET_COMP_STREAM);
 
+		aspeed_video_write(video, VE_INTERRUPT_STATUS,
+				   VE_INTERRUPT_COMP_COMPLETE);
+
 		spin_lock(&video->lock);
 		clear_bit(VIDEO_FRAME_INPRG, &video->flags);
 		buf = list_first_entry_or_null(&video->buffers,
@@ -599,8 +603,6 @@  static irqreturn_t aspeed_video_irq(int irq, void *arg)
 				    VE_SEQ_CTRL_TRIG_COMP, 0);
 		aspeed_video_update(video, VE_INTERRUPT_CTRL,
 				    VE_INTERRUPT_COMP_COMPLETE, 0);
-		aspeed_video_write(video, VE_INTERRUPT_STATUS,
-				   VE_INTERRUPT_COMP_COMPLETE);
 
 		if (test_bit(VIDEO_STREAMING, &video->flags) && buf)
 			aspeed_video_start_frame(video);