diff mbox

[RFC,1/2] genirq: Add runtime resume/suspend support for IRQ chips

Message ID 1447166377-19707-2-git-send-email-jonathanh@nvidia.com
State Superseded, archived
Headers show

Commit Message

Jon Hunter Nov. 10, 2015, 2:39 p.m. UTC
Some IRQ chips may be located in a power domain outside of the CPU subsystem
and hence will require device specific runtime power management. Ideally,
rather than adding more functions to the irq_chip_ops function table, using
existing chip functions such as irq_startup/shutdown or
irq_request/release_resources() would be best. However, these existing chip
functions are called in the context of a spinlock which is not ideal for
power management operations that may take some time to power up a domain.

Two possible solutions are:
1. Move existing chip operators such as irq_request/release_resources()
   outside of the spinlock and use these helpers.
2. Add new chip operators that are called outside of any spinlocks while
   setting up and freeing an IRQ.

Not knowing whether we can safely move irq_request/release_resources() to
outside the spinlock (but hopefully this will solicit some feedback), add
new chip operators for runtime resuming and suspending of an IRQ chip.

With this change the irqchip clocks will be enabled/disabled on allocating
and freeing interrupts.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
---
 include/linux/irq.h    |  4 ++++
 kernel/irq/internals.h | 17 +++++++++++++++++
 kernel/irq/manage.c    |  7 +++++++
 3 files changed, 28 insertions(+)

Comments

Thomas Gleixner Nov. 10, 2015, 3:26 p.m. UTC | #1
Jon,

On Tue, 10 Nov 2015, Jon Hunter wrote:
>  	void		(*irq_suspend)(struct irq_data *data);
>  	void		(*irq_resume)(struct irq_data *data);
> +	int		(*irq_runtime_suspend)(struct irq_data *data);
> +	int		(*irq_runtime_resume)(struct irq_data *data);
>  	void		(*irq_pm_shutdown)(struct irq_data *data);

So this is the second patch within a few days which adds that just
with different names:

http://lkml.kernel.org/r/1446668160-17522-2-git-send-email-soren.brinkmann@xilinx.com

Can you folks please tell me which of the names is the correct one?
  
> +/* Inline functions for support of irq chips that require runtime pm */
> +static inline int chip_runtime_resume(struct irq_desc *desc)
> +{
> +	if (!desc->irq_data.chip->irq_runtime_resume)
> +		return 0;
> +
> +	return desc->irq_data.chip->irq_runtime_resume(&desc->irq_data);
> +}
> +
> +static inline int chip_runtime_suspend(struct irq_desc *desc)
> +{
> +	if (!desc->irq_data.chip->irq_runtime_suspend)
> +		return 0;
> +
> +	return desc->irq_data.chip->irq_runtime_suspend(&desc->irq_data);

We really don't need a return value for that one.

> +}
> +
>  #define _IRQ_DESC_CHECK		(1 << 0)
>  #define _IRQ_DESC_PERCPU	(1 << 1)
>  
> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
> index 0eebaeef317b..66e33df73140 100644
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1116,6 +1116,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
>  	if (!try_module_get(desc->owner))
>  		return -ENODEV;
>  
> +	ret = chip_runtime_resume(desc);
> +	if (ret < 0)
> +		return ret;

Leaks module ref count.

> +
>  	new->irq = irq;
>  
>  	/*
> @@ -1393,6 +1397,7 @@ out_thread:
>  		put_task_struct(t);
>  	}
>  out_mput:
> +	chip_runtime_suspend(desc);
>  	module_put(desc->owner);
>  	return ret;
>  }
> @@ -1506,6 +1511,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
>  		}
>  	}
>  
> +	chip_runtime_suspend(desc);
>  	module_put(desc->owner);
>  	kfree(action->secondary);
>  	return action;
> @@ -1792,6 +1798,7 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
>  
>  	unregister_handler_proc(irq, action);
>  
> +	chip_runtime_suspend(desc);

Where is the corresponding call in request_percpu_irq() ?

Can you folks please agree on something which is correct and complete?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 10, 2015, 3:58 p.m. UTC | #2
Hi Thomas,

On 10/11/15 15:26, Thomas Gleixner wrote:
> Jon,
> 
> On Tue, 10 Nov 2015, Jon Hunter wrote:
>>  	void		(*irq_suspend)(struct irq_data *data);
>>  	void		(*irq_resume)(struct irq_data *data);
>> +	int		(*irq_runtime_suspend)(struct irq_data *data);
>> +	int		(*irq_runtime_resume)(struct irq_data *data);
>>  	void		(*irq_pm_shutdown)(struct irq_data *data);
> 
> So this is the second patch within a few days which adds that just
> with different names:
> 
> http://lkml.kernel.org/r/1446668160-17522-2-git-send-email-soren.brinkmann@xilinx.com
> 
> Can you folks please tell me which of the names is the correct one?

Sorry. I was unaware of that patch.

>> +/* Inline functions for support of irq chips that require runtime pm */
>> +static inline int chip_runtime_resume(struct irq_desc *desc)
>> +{
>> +	if (!desc->irq_data.chip->irq_runtime_resume)
>> +		return 0;
>> +
>> +	return desc->irq_data.chip->irq_runtime_resume(&desc->irq_data);
>> +}
>> +
>> +static inline int chip_runtime_suspend(struct irq_desc *desc)
>> +{
>> +	if (!desc->irq_data.chip->irq_runtime_suspend)
>> +		return 0;
>> +
>> +	return desc->irq_data.chip->irq_runtime_suspend(&desc->irq_data);
> 
> We really don't need a return value for that one.

Ok.

>> +}
>> +
>>  #define _IRQ_DESC_CHECK		(1 << 0)
>>  #define _IRQ_DESC_PERCPU	(1 << 1)
>>  
>> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
>> index 0eebaeef317b..66e33df73140 100644
>> --- a/kernel/irq/manage.c
>> +++ b/kernel/irq/manage.c
>> @@ -1116,6 +1116,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
>>  	if (!try_module_get(desc->owner))
>>  		return -ENODEV;
>>  
>> +	ret = chip_runtime_resume(desc);
>> +	if (ret < 0)
>> +		return ret;
> 
> Leaks module ref count.

Ok.

>> +
>>  	new->irq = irq;
>>  
>>  	/*
>> @@ -1393,6 +1397,7 @@ out_thread:
>>  		put_task_struct(t);
>>  	}
>>  out_mput:
>> +	chip_runtime_suspend(desc);
>>  	module_put(desc->owner);
>>  	return ret;
>>  }
>> @@ -1506,6 +1511,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
>>  		}
>>  	}
>>  
>> +	chip_runtime_suspend(desc);
>>  	module_put(desc->owner);
>>  	kfree(action->secondary);
>>  	return action;
>> @@ -1792,6 +1798,7 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
>>  
>>  	unregister_handler_proc(irq, action);
>>  
>> +	chip_runtime_suspend(desc);
> 
> Where is the corresponding call in request_percpu_irq() ?

I was trying to simplify matters by placing the resume call in
__setup_irq() as opposed to requested_threaded_irq(). However, the would
mean the resume is inside the bus_lock and may be I should not assume
that I can sleep here.

> Can you folks please agree on something which is correct and complete?

Soren I am happy to defer to your patch and drop this. My only comment
would be what about the request_percpu_irq() path in your patch?

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grygorii Strashko Nov. 10, 2015, 4:47 p.m. UTC | #3
Hi Jon,
On 11/10/2015 05:58 PM, Jon Hunter wrote:
> Hi Thomas,
> 
> On 10/11/15 15:26, Thomas Gleixner wrote:
>> Jon,
>>
>> On Tue, 10 Nov 2015, Jon Hunter wrote:
>>>   	void		(*irq_suspend)(struct irq_data *data);
>>>   	void		(*irq_resume)(struct irq_data *data);
>>> +	int		(*irq_runtime_suspend)(struct irq_data *data);
>>> +	int		(*irq_runtime_resume)(struct irq_data *data);
>>>   	void		(*irq_pm_shutdown)(struct irq_data *data);
>>
>> So this is the second patch within a few days which adds that just
>> with different names:
>>
>> http://lkml.kernel.org/r/1446668160-17522-2-git-send-email-soren.brinkmann@xilinx.com
>>
>> Can you folks please tell me which of the names is the correct one?
> 
> Sorry. I was unaware of that patch.
> 
>>> +/* Inline functions for support of irq chips that require runtime pm */
>>> +static inline int chip_runtime_resume(struct irq_desc *desc)
>>> +{
>>> +	if (!desc->irq_data.chip->irq_runtime_resume)
>>> +		return 0;
>>> +
>>> +	return desc->irq_data.chip->irq_runtime_resume(&desc->irq_data);
>>> +}
>>> +
>>> +static inline int chip_runtime_suspend(struct irq_desc *desc)
>>> +{
>>> +	if (!desc->irq_data.chip->irq_runtime_suspend)
>>> +		return 0;
>>> +
>>> +	return desc->irq_data.chip->irq_runtime_suspend(&desc->irq_data);
>>
>> We really don't need a return value for that one.
> 
> Ok.
> 
>>> +}
>>> +
>>>   #define _IRQ_DESC_CHECK		(1 << 0)
>>>   #define _IRQ_DESC_PERCPU	(1 << 1)
>>>   
>>> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
>>> index 0eebaeef317b..66e33df73140 100644
>>> --- a/kernel/irq/manage.c
>>> +++ b/kernel/irq/manage.c
>>> @@ -1116,6 +1116,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
>>>   	if (!try_module_get(desc->owner))
>>>   		return -ENODEV;
>>>   
>>> +	ret = chip_runtime_resume(desc);
>>> +	if (ret < 0)
>>> +		return ret;
>>
>> Leaks module ref count.
> 
> Ok.
> 
>>> +
>>>   	new->irq = irq;
>>>   
>>>   	/*
>>> @@ -1393,6 +1397,7 @@ out_thread:
>>>   		put_task_struct(t);
>>>   	}
>>>   out_mput:
>>> +	chip_runtime_suspend(desc);
>>>   	module_put(desc->owner);
>>>   	return ret;
>>>   }
>>> @@ -1506,6 +1511,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
>>>   		}
>>>   	}
>>>   
>>> +	chip_runtime_suspend(desc);
>>>   	module_put(desc->owner);
>>>   	kfree(action->secondary);
>>>   	return action;
>>> @@ -1792,6 +1798,7 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
>>>   
>>>   	unregister_handler_proc(irq, action);
>>>   
>>> +	chip_runtime_suspend(desc);
>>
>> Where is the corresponding call in request_percpu_irq() ?
> 
> I was trying to simplify matters by placing the resume call in
> __setup_irq() as opposed to requested_threaded_irq(). However, the would
> mean the resume is inside the bus_lock and may be I should not assume
> that I can sleep here.
> 
>> Can you folks please agree on something which is correct and complete?
> 
> Soren I am happy to defer to your patch and drop this. My only comment
> would be what about the request_percpu_irq() path in your patch?
> 

I have the same comment here as I asked Soren:
1) There are no restrictions to call irq set_irq_type() whenever,
as result HW can be accessed before request_x_irq()/__setup_irq().
And this is used quite widely now :(

For example, during OF boot:

[a]  irq_create_of_mapping()
   - irq_create_fwspec_mapping()
     - irq_set_irq_type()

or 
	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);

or
	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
(there are ~200 occurrences of irq set_irq_type in Kernel)

2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()

I'm not saying all these code is correct, but that what's now in kernel :(
I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
Lars-Peter Clausen Nov. 10, 2015, 6:07 p.m. UTC | #4
On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
[...]
>> I was trying to simplify matters by placing the resume call in
>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>> mean the resume is inside the bus_lock and may be I should not assume
>> that I can sleep here.
>>
>>> Can you folks please agree on something which is correct and complete?
>>
>> Soren I am happy to defer to your patch and drop this. My only comment
>> would be what about the request_percpu_irq() path in your patch?
>>
> 
> I have the same comment here as I asked Soren:
> 1) There are no restrictions to call irq set_irq_type() whenever,
> as result HW can be accessed before request_x_irq()/__setup_irq().
> And this is used quite widely now :(
> 

Changing the configuration of a resource that is not owned seems to be
fairly broken. In the worst case this will overwrite the configuration that
was set by owner of the resource.

Especially those that call irq_set_irq_type() directly before request_irq(),
given that you supply the trigger type to request_irq() which will make sure
that there are no conflicts and the configure.

This is a bit like calling gpio_set_direction() before you call
gpio_request(), which will also have PM issues.

> For example, during OF boot:
> 
> [a]  irq_create_of_mapping()
>    - irq_create_fwspec_mapping()
>      - irq_set_irq_type()
> 
> or 
> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
> 
> or
> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
> (there are ~200 occurrences of irq set_irq_type in Kernel)
> 
> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
> 
> I'm not saying all these code is correct, but that what's now in kernel :(
> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.( 

All functions for which are part of the public API and for which it is legal
to call them without calling request_irq() (or similar) first will need to
have pm_get()/pm_put().
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 11, 2015, 10:13 a.m. UTC | #5
On 10/11/15 18:07, Lars-Peter Clausen wrote:
> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
> [...]
>>> I was trying to simplify matters by placing the resume call in
>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>> mean the resume is inside the bus_lock and may be I should not assume
>>> that I can sleep here.
>>>
>>>> Can you folks please agree on something which is correct and complete?
>>>
>>> Soren I am happy to defer to your patch and drop this. My only comment
>>> would be what about the request_percpu_irq() path in your patch?
>>>
>>
>> I have the same comment here as I asked Soren:
>> 1) There are no restrictions to call irq set_irq_type() whenever,
>> as result HW can be accessed before request_x_irq()/__setup_irq().
>> And this is used quite widely now :(
>>
> 
> Changing the configuration of a resource that is not owned seems to be
> fairly broken. In the worst case this will overwrite the configuration that
> was set by owner of the resource.
> 
> Especially those that call irq_set_irq_type() directly before request_irq(),
> given that you supply the trigger type to request_irq() which will make sure
> that there are no conflicts and the configure.
> 
> This is a bit like calling gpio_set_direction() before you call
> gpio_request(), which will also have PM issues.

Yes, I agree that this does sound a bit odd, but ...

>> For example, during OF boot:
>>
>> [a]  irq_create_of_mapping()
>>    - irq_create_fwspec_mapping()
>>      - irq_set_irq_type()

The above means that if someone calls of_irq_get() (or
platform_get_irq()), before request_irq(), then this will call
irq_create_of_mapping() and hence, call irq_set_irq_type. So should
irq_create_fwspec_mapping() be setting the type in the first place? I
can see it is convenient to do it here.

>> or 
>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>
>> or
>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>
>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>
>> I'm not saying all these code is correct, but that what's now in kernel :(
>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.( 
> 
> All functions for which are part of the public API and for which it is legal
> to call them without calling request_irq() (or similar) first will need to
> have pm_get()/pm_put().

Right. May be we can look at the various entry points to the chip
operators to get a feel for which public APIs need to be handled.

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grygorii Strashko Nov. 11, 2015, 3:41 p.m. UTC | #6
On 11/11/2015 12:13 PM, Jon Hunter wrote:
> 
> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>> [...]
>>>> I was trying to simplify matters by placing the resume call in
>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>> that I can sleep here.
>>>>
>>>>> Can you folks please agree on something which is correct and complete?
>>>>
>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>> would be what about the request_percpu_irq() path in your patch?
>>>>
>>>
>>> I have the same comment here as I asked Soren:
>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>> And this is used quite widely now :(
>>>
>>
>> Changing the configuration of a resource that is not owned seems to be
>> fairly broken. In the worst case this will overwrite the configuration that
>> was set by owner of the resource.
>>
>> Especially those that call irq_set_irq_type() directly before request_irq(),
>> given that you supply the trigger type to request_irq() which will make sure
>> that there are no conflicts and the configure.
>>
>> This is a bit like calling gpio_set_direction() before you call
>> gpio_request(), which will also have PM issues.
> 
> Yes, I agree that this does sound a bit odd, but ...
> 
>>> For example, during OF boot:
>>>
>>> [a]  irq_create_of_mapping()
>>>     - irq_create_fwspec_mapping()
>>>       - irq_set_irq_type()
> 
> The above means that if someone calls of_irq_get() (or
> platform_get_irq()), before request_irq(), then this will call
> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
> irq_create_fwspec_mapping() be setting the type in the first place? I
> can see it is convenient to do it here.

In general there is another option - save OF-flags and pass them to
__setup_irq() where they can be processed.

> 
>>> or
[b]
>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);

option: add "flag" parameter to irq_set_chained_handler

>>>
>>> or
[c]
>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>
>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>
>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>
>> All functions for which are part of the public API and for which it is legal
>> to call them without calling request_irq() (or similar) first will need to
>> have pm_get()/pm_put().
> 
> Right. May be we can look at the various entry points to the chip
> operators to get a feel for which public APIs need to be handled.


Seems yes. But we need to be very careful with this, some of functions could be
called recursively (nested), like:
[d]
static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
{
	...
	error = irq_set_irq_wake(gpio->irq_parent, on);


Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
when tried to solve the same problem for omap-gpio driver.
But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
I was lucky).
Jon Hunter Nov. 12, 2015, 10:59 a.m. UTC | #7
On 11/11/15 15:41, Grygorii Strashko wrote:
> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>
>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>> [...]
>>>>> I was trying to simplify matters by placing the resume call in
>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>> that I can sleep here.
>>>>>
>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>
>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>
>>>>
>>>> I have the same comment here as I asked Soren:
>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>> And this is used quite widely now :(
>>>>
>>>
>>> Changing the configuration of a resource that is not owned seems to be
>>> fairly broken. In the worst case this will overwrite the configuration that
>>> was set by owner of the resource.
>>>
>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>> given that you supply the trigger type to request_irq() which will make sure
>>> that there are no conflicts and the configure.
>>>
>>> This is a bit like calling gpio_set_direction() before you call
>>> gpio_request(), which will also have PM issues.
>>
>> Yes, I agree that this does sound a bit odd, but ...
>>
>>>> For example, during OF boot:
>>>>
>>>> [a]  irq_create_of_mapping()
>>>>     - irq_create_fwspec_mapping()
>>>>       - irq_set_irq_type()
>>
>> The above means that if someone calls of_irq_get() (or
>> platform_get_irq()), before request_irq(), then this will call
>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>> irq_create_fwspec_mapping() be setting the type in the first place? I
>> can see it is convenient to do it here.
> 
> In general there is another option - save OF-flags and pass them to
> __setup_irq() where they can be processed.

Right, we could look at doing something like this.

>>>> or
> [b]
>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
> 
> option: add "flag" parameter to irq_set_chained_handler
> 
>>>>
>>>> or
> [c]
>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>
>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>
>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>
>>> All functions for which are part of the public API and for which it is legal
>>> to call them without calling request_irq() (or similar) first will need to
>>> have pm_get()/pm_put().
>>
>> Right. May be we can look at the various entry points to the chip
>> operators to get a feel for which public APIs need to be handled.
> 
> 
> Seems yes. But we need to be very careful with this, some of functions could be
> called recursively (nested), like:
> [d]
> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
> {
> 	...
> 	error = irq_set_irq_wake(gpio->irq_parent, on);
> 
> 
> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
> when tried to solve the same problem for omap-gpio driver.
> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
> I was lucky).

I had a quick peek at the omap-gpio driver and I see that internally you
are using the gpio ref-count to manage RPM and use the bus-lock hooks to
invoke RPM.

This can definitely be complex when considering all the potential paths,
but I think that we need to a look at this from a chip-ops perspective
because only the chip knows if it is accessible or not. That said, what
we need to assess is:

1. Which chip-ops should ONLY be called after an IRQ has been allocated
   (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
   not try to control the chip PM, but should possibly WARN if called
   when  the chip is not accessible.
2. For chip-ops that may be called without allocating an IRQ (eg.
   bus_lock, irq_suspend, etc), can these be called from an atomic
   context? If they might be called from an atomic context then these
   are the chip-ops which will cause problems as we cannot guarantee
   that all IRQ chips can support irq-safe RPM.

AFAICT, looking at the chip-ops, it appears to me that ones that should
be called without allocating a IRQ are:

@irq_cpu_online
@irq_cpu_offline
@irq_bus_lock
@irq_bus_sync_unlock
@irq_suspend
@irq_resume
@irq_pm_shutdown
@irq_calc_mask (not used by any chips?)
@irq_print_chip
@irq_get_irqchip_state? (not sure about this one)

Of the above the only one I can see that would be called in an atomic
context would be irq_get_irqchip_state() and I am not sure if that
should be called without allocating the IRQ first?

Bottom line is that think that the chip can protect itself from an
unexpected access. Yes setting the type needs to be sorted out, but
hopefully this is do-able.

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lars-Peter Clausen Nov. 12, 2015, 1:20 p.m. UTC | #8
On 11/12/2015 11:59 AM, Jon Hunter wrote:
> 
> On 11/11/15 15:41, Grygorii Strashko wrote:
>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>
>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>> [...]
>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>> that I can sleep here.
>>>>>>
>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>
>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>
>>>>>
>>>>> I have the same comment here as I asked Soren:
>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>> And this is used quite widely now :(
>>>>>
>>>>
>>>> Changing the configuration of a resource that is not owned seems to be
>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>> was set by owner of the resource.
>>>>
>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>> given that you supply the trigger type to request_irq() which will make sure
>>>> that there are no conflicts and the configure.
>>>>
>>>> This is a bit like calling gpio_set_direction() before you call
>>>> gpio_request(), which will also have PM issues.
>>>
>>> Yes, I agree that this does sound a bit odd, but ...
>>>
>>>>> For example, during OF boot:
>>>>>
>>>>> [a]  irq_create_of_mapping()
>>>>>     - irq_create_fwspec_mapping()
>>>>>       - irq_set_irq_type()
>>>
>>> The above means that if someone calls of_irq_get() (or
>>> platform_get_irq()), before request_irq(), then this will call
>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>> can see it is convenient to do it here.
>>
>> In general there is another option - save OF-flags and pass them to
>> __setup_irq() where they can be processed.
> 
> Right, we could look at doing something like this.
> 
>>>>> or
>> [b]
>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>
>> option: add "flag" parameter to irq_set_chained_handler
>>
>>>>>
>>>>> or
>> [c]
>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>
>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>
>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>
>>>> All functions for which are part of the public API and for which it is legal
>>>> to call them without calling request_irq() (or similar) first will need to
>>>> have pm_get()/pm_put().
>>>
>>> Right. May be we can look at the various entry points to the chip
>>> operators to get a feel for which public APIs need to be handled.
>>
>>
>> Seems yes. But we need to be very careful with this, some of functions could be
>> called recursively (nested), like:
>> [d]
>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>> {
>> 	...
>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>
>>
>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>> when tried to solve the same problem for omap-gpio driver.
>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>> I was lucky).
> 
> I had a quick peek at the omap-gpio driver and I see that internally you
> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
> invoke RPM.
> 
> This can definitely be complex when considering all the potential paths,
> but I think that we need to a look at this from a chip-ops perspective
> because only the chip knows if it is accessible or not. That said, what
> we need to assess is:
> 
> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>    (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>    not try to control the chip PM, but should possibly WARN if called
>    when  the chip is not accessible.
> 2. For chip-ops that may be called without allocating an IRQ (eg.
>    bus_lock, irq_suspend, etc), can these be called from an atomic
>    context? If they might be called from an atomic context then these
>    are the chip-ops which will cause problems as we cannot guarantee
>    that all IRQ chips can support irq-safe RPM.

They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
be called from atomic context.

One easy way out might be to always call pm_get/pm_but from
bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
accessed happens. In addition pm_get is called when the IRQ is request and
pm_put is called when the IRQ is release, this is to ensure the chip stays
powered when it is actively monitoring the IRQ lines.

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grygorii Strashko Nov. 12, 2015, 1:29 p.m. UTC | #9
On 11/12/2015 03:20 PM, Lars-Peter Clausen wrote:
> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>
>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>
>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>> [...]
>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>> that I can sleep here.
>>>>>>>
>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>
>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>
>>>>>>
>>>>>> I have the same comment here as I asked Soren:
>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>> And this is used quite widely now :(
>>>>>>
>>>>>
>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>> was set by owner of the resource.
>>>>>
>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>> that there are no conflicts and the configure.
>>>>>
>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>> gpio_request(), which will also have PM issues.
>>>>
>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>
>>>>>> For example, during OF boot:
>>>>>>
>>>>>> [a]  irq_create_of_mapping()
>>>>>>      - irq_create_fwspec_mapping()
>>>>>>        - irq_set_irq_type()
>>>>
>>>> The above means that if someone calls of_irq_get() (or
>>>> platform_get_irq()), before request_irq(), then this will call
>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>> can see it is convenient to do it here.
>>>
>>> In general there is another option - save OF-flags and pass them to
>>> __setup_irq() where they can be processed.
>>
>> Right, we could look at doing something like this.
>>
>>>>>> or
>>> [b]
>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>
>>> option: add "flag" parameter to irq_set_chained_handler
>>>
>>>>>>
>>>>>> or
>>> [c]
>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>
>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>
>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>
>>>>> All functions for which are part of the public API and for which it is legal
>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>> have pm_get()/pm_put().
>>>>
>>>> Right. May be we can look at the various entry points to the chip
>>>> operators to get a feel for which public APIs need to be handled.
>>>
>>>
>>> Seems yes. But we need to be very careful with this, some of functions could be
>>> called recursively (nested), like:
>>> [d]
>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>> {
>>> 	...
>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>
>>>
>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>> when tried to solve the same problem for omap-gpio driver.
>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>> I was lucky).
>>
>> I had a quick peek at the omap-gpio driver and I see that internally you
>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>> invoke RPM.
>>
>> This can definitely be complex when considering all the potential paths,
>> but I think that we need to a look at this from a chip-ops perspective
>> because only the chip knows if it is accessible or not. That said, what
>> we need to assess is:
>>
>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>     (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>     not try to control the chip PM, but should possibly WARN if called
>>     when  the chip is not accessible.
>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>     bus_lock, irq_suspend, etc), can these be called from an atomic
>>     context? If they might be called from an atomic context then these
>>     are the chip-ops which will cause problems as we cannot guarantee
>>     that all IRQ chips can support irq-safe RPM.
> 
> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
> be called from atomic context.
> 
> One easy way out might be to always call pm_get/pm_but from
> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
> accessed happens. In addition pm_get is called when the IRQ is request and
> pm_put is called when the IRQ is release, this is to ensure the chip stays
> powered when it is actively monitoring the IRQ lines.
> 

In general, this is simplest possible solution. More over, if irqchip will have
dev field PM runtime could be used directly instead of get/put.  

but.. :( How about problem [d]?
Lars-Peter Clausen Nov. 12, 2015, 1:34 p.m. UTC | #10
On 11/12/2015 02:29 PM, Grygorii Strashko wrote:
> On 11/12/2015 03:20 PM, Lars-Peter Clausen wrote:
>> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>>
>>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>>
>>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>>> [...]
>>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>>> that I can sleep here.
>>>>>>>>
>>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>>
>>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>>
>>>>>>>
>>>>>>> I have the same comment here as I asked Soren:
>>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>>> And this is used quite widely now :(
>>>>>>>
>>>>>>
>>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>>> was set by owner of the resource.
>>>>>>
>>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>>> that there are no conflicts and the configure.
>>>>>>
>>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>>> gpio_request(), which will also have PM issues.
>>>>>
>>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>>
>>>>>>> For example, during OF boot:
>>>>>>>
>>>>>>> [a]  irq_create_of_mapping()
>>>>>>>      - irq_create_fwspec_mapping()
>>>>>>>        - irq_set_irq_type()
>>>>>
>>>>> The above means that if someone calls of_irq_get() (or
>>>>> platform_get_irq()), before request_irq(), then this will call
>>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>>> can see it is convenient to do it here.
>>>>
>>>> In general there is another option - save OF-flags and pass them to
>>>> __setup_irq() where they can be processed.
>>>
>>> Right, we could look at doing something like this.
>>>
>>>>>>> or
>>>> [b]
>>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>>
>>>> option: add "flag" parameter to irq_set_chained_handler
>>>>
>>>>>>>
>>>>>>> or
>>>> [c]
>>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>>
>>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>>
>>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>>
>>>>>> All functions for which are part of the public API and for which it is legal
>>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>>> have pm_get()/pm_put().
>>>>>
>>>>> Right. May be we can look at the various entry points to the chip
>>>>> operators to get a feel for which public APIs need to be handled.
>>>>
>>>>
>>>> Seems yes. But we need to be very careful with this, some of functions could be
>>>> called recursively (nested), like:
>>>> [d]
>>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>>> {
>>>> 	...
>>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>>
>>>>
>>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>>> when tried to solve the same problem for omap-gpio driver.
>>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>>> I was lucky).
>>>
>>> I had a quick peek at the omap-gpio driver and I see that internally you
>>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>>> invoke RPM.
>>>
>>> This can definitely be complex when considering all the potential paths,
>>> but I think that we need to a look at this from a chip-ops perspective
>>> because only the chip knows if it is accessible or not. That said, what
>>> we need to assess is:
>>>
>>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>>     (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>>     not try to control the chip PM, but should possibly WARN if called
>>>     when  the chip is not accessible.
>>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>>     bus_lock, irq_suspend, etc), can these be called from an atomic
>>>     context? If they might be called from an atomic context then these
>>>     are the chip-ops which will cause problems as we cannot guarantee
>>>     that all IRQ chips can support irq-safe RPM.
>>
>> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
>> be called from atomic context.
>>
>> One easy way out might be to always call pm_get/pm_but from
>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>> accessed happens. In addition pm_get is called when the IRQ is request and
>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>> powered when it is actively monitoring the IRQ lines.
>>
> 
> In general, this is simplest possible solution. More over, if irqchip will have
> dev field PM runtime could be used directly instead of get/put.  
> 
> but.. :( How about problem [d]?
> 

Can you explain why you thing this is a problem? I don't see the issue.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 12, 2015, 1:35 p.m. UTC | #11
On 12/11/15 13:20, Lars-Peter Clausen wrote:
> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>
>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>
>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>> [...]
>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>> that I can sleep here.
>>>>>>>
>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>
>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>
>>>>>>
>>>>>> I have the same comment here as I asked Soren:
>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>> And this is used quite widely now :(
>>>>>>
>>>>>
>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>> was set by owner of the resource.
>>>>>
>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>> that there are no conflicts and the configure.
>>>>>
>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>> gpio_request(), which will also have PM issues.
>>>>
>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>
>>>>>> For example, during OF boot:
>>>>>>
>>>>>> [a]  irq_create_of_mapping()
>>>>>>     - irq_create_fwspec_mapping()
>>>>>>       - irq_set_irq_type()
>>>>
>>>> The above means that if someone calls of_irq_get() (or
>>>> platform_get_irq()), before request_irq(), then this will call
>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>> can see it is convenient to do it here.
>>>
>>> In general there is another option - save OF-flags and pass them to
>>> __setup_irq() where they can be processed.
>>
>> Right, we could look at doing something like this.
>>
>>>>>> or
>>> [b]
>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>
>>> option: add "flag" parameter to irq_set_chained_handler
>>>
>>>>>>
>>>>>> or
>>> [c]
>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>
>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>
>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>
>>>>> All functions for which are part of the public API and for which it is legal
>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>> have pm_get()/pm_put().
>>>>
>>>> Right. May be we can look at the various entry points to the chip
>>>> operators to get a feel for which public APIs need to be handled.
>>>
>>>
>>> Seems yes. But we need to be very careful with this, some of functions could be
>>> called recursively (nested), like:
>>> [d]
>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>> {
>>> 	...
>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>
>>>
>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>> when tried to solve the same problem for omap-gpio driver.
>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>> I was lucky).
>>
>> I had a quick peek at the omap-gpio driver and I see that internally you
>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>> invoke RPM.
>>
>> This can definitely be complex when considering all the potential paths,
>> but I think that we need to a look at this from a chip-ops perspective
>> because only the chip knows if it is accessible or not. That said, what
>> we need to assess is:
>>
>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>    (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>    not try to control the chip PM, but should possibly WARN if called
>>    when  the chip is not accessible.
>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>    bus_lock, irq_suspend, etc), can these be called from an atomic
>>    context? If they might be called from an atomic context then these
>>    are the chip-ops which will cause problems as we cannot guarantee
>>    that all IRQ chips can support irq-safe RPM.
> 
> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
> be called from atomic context.

Sorry, what can't? Yes I understand that we cannot call anything that
locks the bus from an atomic context.

> One easy way out might be to always call pm_get/pm_but from
> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
> accessed happens. In addition pm_get is called when the IRQ is request and
> pm_put is called when the IRQ is release, this is to ensure the chip stays
> powered when it is actively monitoring the IRQ lines.

Yes I had thought about that, but it is not quite that easy, because in
the case of request_irq() you don't want to pm_put() after the
bus_unlock(). However, the bus_lock/unlock() are good indicators of
different paths.

Cheers
Jon



--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lars-Peter Clausen Nov. 12, 2015, 1:47 p.m. UTC | #12
On 11/12/2015 02:35 PM, Jon Hunter wrote:
> 
> On 12/11/15 13:20, Lars-Peter Clausen wrote:
>> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>>
>>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>>
>>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>>> [...]
>>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>>> that I can sleep here.
>>>>>>>>
>>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>>
>>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>>
>>>>>>>
>>>>>>> I have the same comment here as I asked Soren:
>>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>>> And this is used quite widely now :(
>>>>>>>
>>>>>>
>>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>>> was set by owner of the resource.
>>>>>>
>>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>>> that there are no conflicts and the configure.
>>>>>>
>>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>>> gpio_request(), which will also have PM issues.
>>>>>
>>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>>
>>>>>>> For example, during OF boot:
>>>>>>>
>>>>>>> [a]  irq_create_of_mapping()
>>>>>>>     - irq_create_fwspec_mapping()
>>>>>>>       - irq_set_irq_type()
>>>>>
>>>>> The above means that if someone calls of_irq_get() (or
>>>>> platform_get_irq()), before request_irq(), then this will call
>>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>>> can see it is convenient to do it here.
>>>>
>>>> In general there is another option - save OF-flags and pass them to
>>>> __setup_irq() where they can be processed.
>>>
>>> Right, we could look at doing something like this.
>>>
>>>>>>> or
>>>> [b]
>>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>>
>>>> option: add "flag" parameter to irq_set_chained_handler
>>>>
>>>>>>>
>>>>>>> or
>>>> [c]
>>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>>
>>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>>
>>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>>
>>>>>> All functions for which are part of the public API and for which it is legal
>>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>>> have pm_get()/pm_put().
>>>>>
>>>>> Right. May be we can look at the various entry points to the chip
>>>>> operators to get a feel for which public APIs need to be handled.
>>>>
>>>>
>>>> Seems yes. But we need to be very careful with this, some of functions could be
>>>> called recursively (nested), like:
>>>> [d]
>>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>>> {
>>>> 	...
>>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>>
>>>>
>>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>>> when tried to solve the same problem for omap-gpio driver.
>>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>>> I was lucky).
>>>
>>> I had a quick peek at the omap-gpio driver and I see that internally you
>>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>>> invoke RPM.
>>>
>>> This can definitely be complex when considering all the potential paths,
>>> but I think that we need to a look at this from a chip-ops perspective
>>> because only the chip knows if it is accessible or not. That said, what
>>> we need to assess is:
>>>
>>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>>    (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>>    not try to control the chip PM, but should possibly WARN if called
>>>    when  the chip is not accessible.
>>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>>    bus_lock, irq_suspend, etc), can these be called from an atomic
>>>    context? If they might be called from an atomic context then these
>>>    are the chip-ops which will cause problems as we cannot guarantee
>>>    that all IRQ chips can support irq-safe RPM.
>>
>> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
>> be called from atomic context.
> 
> Sorry, what can't? Yes I understand that we cannot call anything that
> locks the bus from an atomic context.

They can't be called from atomic context. The chip_bus_lock() function may
sleep and you can't access the device without previously locking the bus.
Since the device only needs to be powered up when it is accessed its safe to
assume that the places where you need pm_get()/pm_put() are only called from
non-atomic context.

> 
>> One easy way out might be to always call pm_get/pm_but from
>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>> accessed happens. In addition pm_get is called when the IRQ is request and
>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>> powered when it is actively monitoring the IRQ lines.
> 
> Yes I had thought about that, but it is not quite that easy, because in
> the case of request_irq() you don't want to pm_put() after the
> bus_unlock(). However, the bus_lock/unlock() are good indicators of
> different paths.

You'd call pm_get() twice in request_irq() once from bus_lock() and once
independently, that way you still have a reference even after the bus_unlock().
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 12, 2015, 2:02 p.m. UTC | #13
On 12/11/15 13:47, Lars-Peter Clausen wrote:
> On 11/12/2015 02:35 PM, Jon Hunter wrote:
>>
>> On 12/11/15 13:20, Lars-Peter Clausen wrote:
>>> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>>>
>>>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>>>
>>>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>>>> [...]
>>>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>>>> that I can sleep here.
>>>>>>>>>
>>>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>>>
>>>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>>>
>>>>>>>>
>>>>>>>> I have the same comment here as I asked Soren:
>>>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>>>> And this is used quite widely now :(
>>>>>>>>
>>>>>>>
>>>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>>>> was set by owner of the resource.
>>>>>>>
>>>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>>>> that there are no conflicts and the configure.
>>>>>>>
>>>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>>>> gpio_request(), which will also have PM issues.
>>>>>>
>>>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>>>
>>>>>>>> For example, during OF boot:
>>>>>>>>
>>>>>>>> [a]  irq_create_of_mapping()
>>>>>>>>     - irq_create_fwspec_mapping()
>>>>>>>>       - irq_set_irq_type()
>>>>>>
>>>>>> The above means that if someone calls of_irq_get() (or
>>>>>> platform_get_irq()), before request_irq(), then this will call
>>>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>>>> can see it is convenient to do it here.
>>>>>
>>>>> In general there is another option - save OF-flags and pass them to
>>>>> __setup_irq() where they can be processed.
>>>>
>>>> Right, we could look at doing something like this.
>>>>
>>>>>>>> or
>>>>> [b]
>>>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>>>
>>>>> option: add "flag" parameter to irq_set_chained_handler
>>>>>
>>>>>>>>
>>>>>>>> or
>>>>> [c]
>>>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>>>
>>>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>>>
>>>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>>>
>>>>>>> All functions for which are part of the public API and for which it is legal
>>>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>>>> have pm_get()/pm_put().
>>>>>>
>>>>>> Right. May be we can look at the various entry points to the chip
>>>>>> operators to get a feel for which public APIs need to be handled.
>>>>>
>>>>>
>>>>> Seems yes. But we need to be very careful with this, some of functions could be
>>>>> called recursively (nested), like:
>>>>> [d]
>>>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>>>> {
>>>>> 	...
>>>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>>>
>>>>>
>>>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>>>> when tried to solve the same problem for omap-gpio driver.
>>>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>>>> I was lucky).
>>>>
>>>> I had a quick peek at the omap-gpio driver and I see that internally you
>>>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>>>> invoke RPM.
>>>>
>>>> This can definitely be complex when considering all the potential paths,
>>>> but I think that we need to a look at this from a chip-ops perspective
>>>> because only the chip knows if it is accessible or not. That said, what
>>>> we need to assess is:
>>>>
>>>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>>>    (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>>>    not try to control the chip PM, but should possibly WARN if called
>>>>    when  the chip is not accessible.
>>>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>>>    bus_lock, irq_suspend, etc), can these be called from an atomic
>>>>    context? If they might be called from an atomic context then these
>>>>    are the chip-ops which will cause problems as we cannot guarantee
>>>>    that all IRQ chips can support irq-safe RPM.
>>>
>>> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
>>> be called from atomic context.
>>
>> Sorry, what can't? Yes I understand that we cannot call anything that
>> locks the bus from an atomic context.
> 
> They can't be called from atomic context. The chip_bus_lock() function may
> sleep and you can't access the device without previously locking the bus.
> Since the device only needs to be powered up when it is accessed its safe to
> assume that the places where you need pm_get()/pm_put() are only called from
> non-atomic context.

Right, absolutely.

>>> One easy way out might be to always call pm_get/pm_but from
>>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>>> accessed happens. In addition pm_get is called when the IRQ is request and
>>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>>> powered when it is actively monitoring the IRQ lines.
>>
>> Yes I had thought about that, but it is not quite that easy, because in
>> the case of request_irq() you don't want to pm_put() after the
>> bus_unlock(). However, the bus_lock/unlock() are good indicators of
>> different paths.
> 
> You'd call pm_get() twice in request_irq() once from bus_lock() and once
> independently, that way you still have a reference even after the bus_unlock().

Yes that is a possibility. However, there are places such as
show_interrupts() (kernel/irq/proc.c) and irq_gc_suspend() that do not
call bus_lock/unlock() which would need to be handled for PM. May be
these should also call bus_lock() as well?

Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lars-Peter Clausen Nov. 12, 2015, 2:37 p.m. UTC | #14
On 11/12/2015 03:02 PM, Jon Hunter wrote:
[...]
>>>> One easy way out might be to always call pm_get/pm_but from
>>>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>>>> accessed happens. In addition pm_get is called when the IRQ is request and
>>>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>>>> powered when it is actively monitoring the IRQ lines.
>>>
>>> Yes I had thought about that, but it is not quite that easy, because in
>>> the case of request_irq() you don't want to pm_put() after the
>>> bus_unlock(). However, the bus_lock/unlock() are good indicators of
>>> different paths.
>>
>> You'd call pm_get() twice in request_irq() once from bus_lock() and once
>> independently, that way you still have a reference even after the bus_unlock().
> 
> Yes that is a possibility. However, there are places such as
> show_interrupts() (kernel/irq/proc.c) and irq_gc_suspend() that do not
> call bus_lock/unlock() which would need to be handled for PM. May be
> these should also call bus_lock() as well?

show_interrupts() only accesses software state, not hardware state, or does it?

suspend/resume is a bit tricky. It's kind of driver specific if it needs to
actually access the hardware or whether the state is already shadowed in
software. Maybe we can make this an exception for now and let drivers handle
this on their own.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 12, 2015, 3:38 p.m. UTC | #15
On 12/11/15 14:37, Lars-Peter Clausen wrote:
> On 11/12/2015 03:02 PM, Jon Hunter wrote:
> [...]
>>>>> One easy way out might be to always call pm_get/pm_but from
>>>>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>>>>> accessed happens. In addition pm_get is called when the IRQ is request and
>>>>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>>>>> powered when it is actively monitoring the IRQ lines.
>>>>
>>>> Yes I had thought about that, but it is not quite that easy, because in
>>>> the case of request_irq() you don't want to pm_put() after the
>>>> bus_unlock(). However, the bus_lock/unlock() are good indicators of
>>>> different paths.
>>>
>>> You'd call pm_get() twice in request_irq() once from bus_lock() and once
>>> independently, that way you still have a reference even after the bus_unlock().
>>
>> Yes that is a possibility. However, there are places such as
>> show_interrupts() (kernel/irq/proc.c) and irq_gc_suspend() that do not
>> call bus_lock/unlock() which would need to be handled for PM. May be
>> these should also call bus_lock() as well?
> 
> show_interrupts() only accesses software state, not hardware state, or does it?

Good point. Today there only appears to be one user:

arch/powerpc/sysdev/fsl_msi.c:	.irq_print_chip = fsl_msi_print_chip,

This one is purely software. However, it would be easy to handle the
show_interrupts case if needed.

> suspend/resume is a bit tricky. It's kind of driver specific if it needs to
> actually access the hardware or whether the state is already shadowed in
> software. Maybe we can make this an exception for now and let drivers handle
> this on their own.

Yes I would agree with you on that.

Cheers
Jon

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Soren Brinkmann Nov. 12, 2015, 5:04 p.m. UTC | #16
Hi Jon,

On Tue, 2015-11-10 at 03:58PM +0000, Jon Hunter wrote:
> Hi Thomas,
> 
> On 10/11/15 15:26, Thomas Gleixner wrote:
[...]
> > Can you folks please agree on something which is correct and complete?
> 
> Soren I am happy to defer to your patch and drop this. My only comment
> would be what about the request_percpu_irq() path in your patch?

Sorry, I couldn't spent any time on this the last days. And now it seems
you are already a lot closer to a solution than the initial patches were.
I'm happy to step back from this patch set, but I'd appreciate if you
kept me in the loop.

	Thanks,
	Sören
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kevin Hilman Nov. 12, 2015, 11:20 p.m. UTC | #17
Jon Hunter <jonathanh@nvidia.com> writes:

> Some IRQ chips may be located in a power domain outside of the CPU subsystem
> and hence will require device specific runtime power management. Ideally,
> rather than adding more functions to the irq_chip_ops function table, using
> existing chip functions such as irq_startup/shutdown or
> irq_request/release_resources() would be best. However, these existing chip
> functions are called in the context of a spinlock which is not ideal for
> power management operations that may take some time to power up a domain.
>
> Two possible solutions are:
> 1. Move existing chip operators such as irq_request/release_resources()
>    outside of the spinlock and use these helpers.
> 2. Add new chip operators that are called outside of any spinlocks while
>    setting up and freeing an IRQ.

> Not knowing whether we can safely move irq_request/release_resources() to
> outside the spinlock (but hopefully this will solicit some feedback), add
> new chip operators for runtime resuming and suspending of an IRQ chip.

I'm not quite seeing how this would connect to the actual hardware
power domain (presumabaly managed by genpd) and any other devices in
that domain (presumably managed by runtime PM.)

If all the RPM devices in the domain go idle, it will be powered off
independently of the status of the irqchip because the irqchip isn't
using RPM. 

Is there a longer-term plan to handle the irqchips as a "normal" device
and use RPM?  IMO, that approach would be helpful even for irqchips that
share power domains with CPUs, since there are efforts working towards
using genpd/RPM to manage CPUs/clusters.

Kevin
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 13, 2015, 9:01 a.m. UTC | #18
On 12/11/15 23:20, Kevin Hilman wrote:
> Jon Hunter <jonathanh@nvidia.com> writes:
> 
>> Some IRQ chips may be located in a power domain outside of the CPU subsystem
>> and hence will require device specific runtime power management. Ideally,
>> rather than adding more functions to the irq_chip_ops function table, using
>> existing chip functions such as irq_startup/shutdown or
>> irq_request/release_resources() would be best. However, these existing chip
>> functions are called in the context of a spinlock which is not ideal for
>> power management operations that may take some time to power up a domain.
>>
>> Two possible solutions are:
>> 1. Move existing chip operators such as irq_request/release_resources()
>>    outside of the spinlock and use these helpers.
>> 2. Add new chip operators that are called outside of any spinlocks while
>>    setting up and freeing an IRQ.
> 
>> Not knowing whether we can safely move irq_request/release_resources() to
>> outside the spinlock (but hopefully this will solicit some feedback), add
>> new chip operators for runtime resuming and suspending of an IRQ chip.
> 
> I'm not quite seeing how this would connect to the actual hardware
> power domain (presumabaly managed by genpd) and any other devices in
> that domain (presumably managed by runtime PM.)

So this patch is just providing some hooks that an irqchip can use to
perform any PM related operations. If you look at the 2nd patch in the
series you will see for the GIC that these helpers are used to call
pm_runtime_get/put() which would handle the power-domain.

> If all the RPM devices in the domain go idle, it will be powered off
> independently of the status of the irqchip because the irqchip isn't
> using RPM. 

That's dependent on how the irqchip uses these helpers. If these helpers
invoke RPM then that will not be the case.

> Is there a longer-term plan to handle the irqchips as a "normal" device
> and use RPM?  IMO, that approach would be helpful even for irqchips that
> share power domains with CPUs, since there are efforts working towards
> using genpd/RPM to manage CPUs/clusters.

That would ideal. However, the majority of irqchips today
create/register them with IRQCHIP_DECLARE() and not as "normal" devices.
Therefore, I was reluctant to add "struct device" to the irqchip
structure. However, if this is what you would prefer and Thomas is ok
with it, then that would be fine with me.

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Grygorii Strashko Nov. 13, 2015, 6:07 p.m. UTC | #19
On 11/12/2015 03:34 PM, Lars-Peter Clausen wrote:
> On 11/12/2015 02:29 PM, Grygorii Strashko wrote:
>> On 11/12/2015 03:20 PM, Lars-Peter Clausen wrote:
>>> On 11/12/2015 11:59 AM, Jon Hunter wrote:
>>>>
>>>> On 11/11/15 15:41, Grygorii Strashko wrote:
>>>>> On 11/11/2015 12:13 PM, Jon Hunter wrote:
>>>>>>
>>>>>> On 10/11/15 18:07, Lars-Peter Clausen wrote:
>>>>>>> On 11/10/2015 05:47 PM, Grygorii Strashko wrote:
>>>>>>> [...]
>>>>>>>>> I was trying to simplify matters by placing the resume call in
>>>>>>>>> __setup_irq() as opposed to requested_threaded_irq(). However, the would
>>>>>>>>> mean the resume is inside the bus_lock and may be I should not assume
>>>>>>>>> that I can sleep here.
>>>>>>>>>
>>>>>>>>>> Can you folks please agree on something which is correct and complete?
>>>>>>>>>
>>>>>>>>> Soren I am happy to defer to your patch and drop this. My only comment
>>>>>>>>> would be what about the request_percpu_irq() path in your patch?
>>>>>>>>>
>>>>>>>>
>>>>>>>> I have the same comment here as I asked Soren:
>>>>>>>> 1) There are no restrictions to call irq set_irq_type() whenever,
>>>>>>>> as result HW can be accessed before request_x_irq()/__setup_irq().
>>>>>>>> And this is used quite widely now :(
>>>>>>>>
>>>>>>>
>>>>>>> Changing the configuration of a resource that is not owned seems to be
>>>>>>> fairly broken. In the worst case this will overwrite the configuration that
>>>>>>> was set by owner of the resource.
>>>>>>>
>>>>>>> Especially those that call irq_set_irq_type() directly before request_irq(),
>>>>>>> given that you supply the trigger type to request_irq() which will make sure
>>>>>>> that there are no conflicts and the configure.
>>>>>>>
>>>>>>> This is a bit like calling gpio_set_direction() before you call
>>>>>>> gpio_request(), which will also have PM issues.
>>>>>>
>>>>>> Yes, I agree that this does sound a bit odd, but ...
>>>>>>
>>>>>>>> For example, during OF boot:
>>>>>>>>
>>>>>>>> [a]  irq_create_of_mapping()
>>>>>>>>       - irq_create_fwspec_mapping()
>>>>>>>>         - irq_set_irq_type()
>>>>>>
>>>>>> The above means that if someone calls of_irq_get() (or
>>>>>> platform_get_irq()), before request_irq(), then this will call
>>>>>> irq_create_of_mapping() and hence, call irq_set_irq_type. So should
>>>>>> irq_create_fwspec_mapping() be setting the type in the first place? I
>>>>>> can see it is convenient to do it here.
>>>>>
>>>>> In general there is another option - save OF-flags and pass them to
>>>>> __setup_irq() where they can be processed.
>>>>
>>>> Right, we could look at doing something like this.
>>>>
>>>>>>>> or
>>>>> [b]
>>>>>>>> 	irq_set_irq_type(irq, IRQ_TYPE_LEVEL_HIGH);
>>>>>>>> 	irq_set_chained_handler(irq, mx31ads_expio_irq_handler);
>>>>>
>>>>> option: add "flag" parameter to irq_set_chained_handler
>>>>>
>>>>>>>>
>>>>>>>> or
>>>>> [c]
>>>>>>>> 	irq_set_irq_type(alarm_irq, IRQ_TYPE_EDGE_BOTH);
>>>>>>>> 	err = devm_request_irq(&pdev->dev, alarm_irq, fan_alarm_irq_handler,
>>>>>>>> (there are ~200 occurrences of irq set_irq_type in Kernel)
>>>>>>>>
>>>>>>>> 2) if i'm not wrong, the same is valid for irq_set_irq_wake() and irq_set_affinity()
>>>>>>>>
>>>>>>>> I'm not saying all these code is correct, but that what's now in kernel :(
>>>>>>>> I've tried to test Soren's patch with omap-gpio and immediately hit case [a] :.(
>>>>>>>
>>>>>>> All functions for which are part of the public API and for which it is legal
>>>>>>> to call them without calling request_irq() (or similar) first will need to
>>>>>>> have pm_get()/pm_put().
>>>>>>
>>>>>> Right. May be we can look at the various entry points to the chip
>>>>>> operators to get a feel for which public APIs need to be handled.
>>>>>
>>>>>
>>>>> Seems yes. But we need to be very careful with this, some of functions could be
>>>>> called recursively (nested), like:
>>>>> [d]
>>>>> static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
>>>>> {
>>>>> 	...
>>>>> 	error = irq_set_irq_wake(gpio->irq_parent, on);
>>>>>
>>>>>
>>>>> Personally, I have nothing against irq_pm_(get|put) :) and thought about similar things
>>>>> when tried to solve the same problem for omap-gpio driver.
>>>>> But :(, I have to fall back to irq_bus_lock/sync_unlock, because of [a,b,c] - all above
>>>>> APIs surrounded by chip_bus_lock/chip_bus_sync_unlock. ([d] - I've not hit it just because
>>>>> I was lucky).
>>>>
>>>> I had a quick peek at the omap-gpio driver and I see that internally you
>>>> are using the gpio ref-count to manage RPM and use the bus-lock hooks to
>>>> invoke RPM.
>>>>
>>>> This can definitely be complex when considering all the potential paths,
>>>> but I think that we need to a look at this from a chip-ops perspective
>>>> because only the chip knows if it is accessible or not. That said, what
>>>> we need to assess is:
>>>>
>>>> 1. Which chip-ops should ONLY be called after an IRQ has been allocated
>>>>      (eg, enable/disable, mask/unmask, type, etc). These chip-ops should
>>>>      not try to control the chip PM, but should possibly WARN if called
>>>>      when  the chip is not accessible.
>>>> 2. For chip-ops that may be called without allocating an IRQ (eg.
>>>>      bus_lock, irq_suspend, etc), can these be called from an atomic
>>>>      context? If they might be called from an atomic context then these
>>>>      are the chip-ops which will cause problems as we cannot guarantee
>>>>      that all IRQ chips can support irq-safe RPM.
>>>
>>> They can't. chip_bus_lock() can sleep, so anything that locks the bus can't
>>> be called from atomic context.
>>>
>>> One easy way out might be to always call pm_get/pm_but from
>>> bus_lock,/bus_unlock. This way the chip is guaranteed to be powered up when
>>> accessed happens. In addition pm_get is called when the IRQ is request and
>>> pm_put is called when the IRQ is release, this is to ensure the chip stays
>>> powered when it is actively monitoring the IRQ lines.
>>>
>>
>> In general, this is simplest possible solution. More over, if irqchip will have
>> dev field PM runtime could be used directly instead of get/put.
>>
>> but.. :( How about problem [d]?
>>
> 
> Can you explain why you thing this is a problem? I don't see the issue.
> 

oh, Sorry missed your e-mail.

static int pcf857x_irq_set_wake(struct irq_data *data, unsigned int on)
{
	...
	error = irq_set_irq_wake(gpio->irq_parent, on);
	|-irq_get_desc_buslock
          |- chip_bus_lock
             |- irq_pm_get
                |- agic/zynq - pm_runtime_get_sync()
		   |- might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe);
- same for put

As result, it will be mandatory to define irq_safe = true, but in this case it will be
impossible to use clk_prepare_enable()/unprepare() in PM runtime callbacks, for example.


Also, discussed approach will work for all only if it will be guaranteed that
irq_pm_get/put() will be called only once, like PM runtime API. Otherwise
it will be very hard to reuse them for chips which do not use PM runtime
for PM management - those chips will need to implement own counters to 
avoid re-enabling/disabling of an already active/inactive devices.

So, decision has to be made - will irqchip PM management be PM runtime based only or not?
 - if it will be PM runtime based only (agic/zynq):
   - there are should be device always
   - PM runtime API can be called where needed directly -> no new callbacks.

 - if not:
   - some additional sync/protection will need to be added to irqchip

In addition, we seems missed irq_set_chained_handler*() :( - It do not call
__setup_irq().
Thomas Gleixner Nov. 13, 2015, 8:01 p.m. UTC | #20
On Fri, 13 Nov 2015, Jon Hunter wrote:
> On 12/11/15 23:20, Kevin Hilman wrote:
> > If all the RPM devices in the domain go idle, it will be powered off
> > independently of the status of the irqchip because the irqchip isn't
> > using RPM. 
> 
> That's dependent on how the irqchip uses these helpers. If these helpers
> invoke RPM then that will not be the case.

You need a very proper description of how that domain is working. If
all devices are idle, it's not necessary correct to power down the
irqchip as is might serve other devices as well.

OTOH, if it can be powered down then all idle devices need to release
the irq they requested because request_irq() would hold a ref on the
power domain.

I have no idea how you can describe that proper.

> > Is there a longer-term plan to handle the irqchips as a "normal" device
> > and use RPM?  IMO, that approach would be helpful even for irqchips that
> > share power domains with CPUs, since there are efforts working towards
> > using genpd/RPM to manage CPUs/clusters.
> 
> That would ideal. However, the majority of irqchips today
> create/register them with IRQCHIP_DECLARE() and not as "normal" devices.
> Therefore, I was reluctant to add "struct device" to the irqchip
> structure. However, if this is what you would prefer and Thomas is ok
> with it, then that would be fine with me.

I have no objections against that, but how is the 'struct device'
going to be initialized?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 16, 2015, 9:46 a.m. UTC | #21
On 13/11/15 20:01, Thomas Gleixner wrote:
> On Fri, 13 Nov 2015, Jon Hunter wrote:
>> On 12/11/15 23:20, Kevin Hilman wrote:
>>> If all the RPM devices in the domain go idle, it will be powered off
>>> independently of the status of the irqchip because the irqchip isn't
>>> using RPM. 
>>
>> That's dependent on how the irqchip uses these helpers. If these helpers
>> invoke RPM then that will not be the case.
> 
> You need a very proper description of how that domain is working. If
> all devices are idle, it's not necessary correct to power down the
> irqchip as is might serve other devices as well.

Agreed. The irqchip should only be powered down if there are no
interrupts in-use/requested. Runtime-pm will keep a reference count for
all requested IRQs.

> OTOH, if it can be powered down then all idle devices need to release
> the irq they requested because request_irq() would hold a ref on the
> power domain.

Yes.

> I have no idea how you can describe that proper.

Do you mean properly describe the interaction between runtime-pm and the
irqchip?

>>> Is there a longer-term plan to handle the irqchips as a "normal" device
>>> and use RPM?  IMO, that approach would be helpful even for irqchips that
>>> share power domains with CPUs, since there are efforts working towards
>>> using genpd/RPM to manage CPUs/clusters.
>>
>> That would ideal. However, the majority of irqchips today
>> create/register them with IRQCHIP_DECLARE() and not as "normal" devices.
>> Therefore, I was reluctant to add "struct device" to the irqchip
>> structure. However, if this is what you would prefer and Thomas is ok
>> with it, then that would be fine with me.
> 
> I have no objections against that, but how is the 'struct device'
> going to be initialized?

It would be initialised by the irqchip driver. However, it would be
optional. The genirq core could simply check to see if the chip->dev
member is initialised and if so enable runtime-pm.

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Geert Uytterhoeven Nov. 16, 2015, 9:49 a.m. UTC | #22
On Mon, Nov 16, 2015 at 10:46 AM, Jon Hunter <jonathanh@nvidia.com> wrote:
> On 13/11/15 20:01, Thomas Gleixner wrote:
>> On Fri, 13 Nov 2015, Jon Hunter wrote:
>>> On 12/11/15 23:20, Kevin Hilman wrote:
>>>> If all the RPM devices in the domain go idle, it will be powered off
>>>> independently of the status of the irqchip because the irqchip isn't
>>>> using RPM.
>>>
>>> That's dependent on how the irqchip uses these helpers. If these helpers
>>> invoke RPM then that will not be the case.
>>
>> You need a very proper description of how that domain is working. If
>> all devices are idle, it's not necessary correct to power down the
>> irqchip as is might serve other devices as well.
>
> Agreed. The irqchip should only be powered down if there are no
> interrupts in-use/requested. Runtime-pm will keep a reference count for
> all requested IRQs.

That means the irqchip won't be powered down automatically when the
last user is powered down, unless all users release their irqs during
suspend.

Handling it automatically needs more bookkeeping than a simple reference
count.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 16, 2015, 10:34 a.m. UTC | #23
On 16/11/15 09:49, Geert Uytterhoeven wrote:
> On Mon, Nov 16, 2015 at 10:46 AM, Jon Hunter <jonathanh@nvidia.com> wrote:
>> On 13/11/15 20:01, Thomas Gleixner wrote:
>>> On Fri, 13 Nov 2015, Jon Hunter wrote:
>>>> On 12/11/15 23:20, Kevin Hilman wrote:
>>>>> If all the RPM devices in the domain go idle, it will be powered off
>>>>> independently of the status of the irqchip because the irqchip isn't
>>>>> using RPM.
>>>>
>>>> That's dependent on how the irqchip uses these helpers. If these helpers
>>>> invoke RPM then that will not be the case.
>>>
>>> You need a very proper description of how that domain is working. If
>>> all devices are idle, it's not necessary correct to power down the
>>> irqchip as is might serve other devices as well.
>>
>> Agreed. The irqchip should only be powered down if there are no
>> interrupts in-use/requested. Runtime-pm will keep a reference count for
>> all requested IRQs.
> 
> That means the irqchip won't be powered down automatically when the
> last user is powered down, unless all users release their irqs during
> suspend.

Right.

> Handling it automatically needs more bookkeeping than a simple reference
> count.

So what would you suggest? Adding a pm_runtime_register_irq() API that
would register an IRQ with the device that you want RPM to handle? Not
sure if there is a better/easier way to handle this.

Cheers
Jon



--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Geert Uytterhoeven Nov. 16, 2015, 10:48 a.m. UTC | #24
Hi Jon,

On Mon, Nov 16, 2015 at 11:34 AM, Jon Hunter <jonathanh@nvidia.com> wrote:
> On 16/11/15 09:49, Geert Uytterhoeven wrote:
>> On Mon, Nov 16, 2015 at 10:46 AM, Jon Hunter <jonathanh@nvidia.com> wrote:
>>> On 13/11/15 20:01, Thomas Gleixner wrote:
>>>> On Fri, 13 Nov 2015, Jon Hunter wrote:
>>>>> On 12/11/15 23:20, Kevin Hilman wrote:
>>>>>> If all the RPM devices in the domain go idle, it will be powered off
>>>>>> independently of the status of the irqchip because the irqchip isn't
>>>>>> using RPM.
>>>>>
>>>>> That's dependent on how the irqchip uses these helpers. If these helpers
>>>>> invoke RPM then that will not be the case.
>>>>
>>>> You need a very proper description of how that domain is working. If
>>>> all devices are idle, it's not necessary correct to power down the
>>>> irqchip as is might serve other devices as well.
>>>
>>> Agreed. The irqchip should only be powered down if there are no
>>> interrupts in-use/requested. Runtime-pm will keep a reference count for
>>> all requested IRQs.
>>
>> That means the irqchip won't be powered down automatically when the
>> last user is powered down, unless all users release their irqs during
>> suspend.
>
> Right.
>
>> Handling it automatically needs more bookkeeping than a simple reference
>> count.
>
> So what would you suggest? Adding a pm_runtime_register_irq() API that
> would register an IRQ with the device that you want RPM to handle? Not
> sure if there is a better/easier way to handle this.

The irqchip needs to keep track how many times request_irq() has been
called, cfr. your suggestion above.

On the other side, the system needs to keep track how many times request_irq()
has been called for each irqchip, so it can subtract those numbers from the
irqchip's counters during suspend of the device, and re-add them during resume.
So we need at least a "struct device *" parameter for request_irq().
devm_request_irq() already has that, but not all drivers use that.

However, I think this should be looked at into the context of "[RFD]
Functional dependencies between devices".
https://lwn.net/Articles/662205/
https://lkml.org/lkml/2015/10/27/388

There can be other dependencies than interrupts between devices.
All functions using dependencies need a "struct device *" parameter to
record information.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jon Hunter Nov. 17, 2015, 11:57 a.m. UTC | #25
Hi Geert,

On 16/11/15 10:48, Geert Uytterhoeven wrote:
> On Mon, Nov 16, 2015 at 11:34 AM, Jon Hunter <jonathanh@nvidia.com> wrote:

[snip]

>>> Handling it automatically needs more bookkeeping than a simple reference
>>> count.
>>
>> So what would you suggest? Adding a pm_runtime_register_irq() API that
>> would register an IRQ with the device that you want RPM to handle? Not
>> sure if there is a better/easier way to handle this.
> 
> The irqchip needs to keep track how many times request_irq() has been
> called, cfr. your suggestion above.
> 
> On the other side, the system needs to keep track how many times request_irq()
> has been called for each irqchip, so it can subtract those numbers from the
> irqchip's counters during suspend of the device, and re-add them during resume.
> So we need at least a "struct device *" parameter for request_irq().
> devm_request_irq() already has that, but not all drivers use that.

Yes that would make sense. However, I am wondering if the
syscore suspend/resume operators could be used here to do something
like ...

	pm_runtime_disable(dev);
	if (!pm_runtime_status_suspended(dev))
		chip->irq_runtime_suspend(data);

> However, I think this should be looked at into the context of "[RFD]
> Functional dependencies between devices".
> https://lwn.net/Articles/662205/
> https://lkml.org/lkml/2015/10/27/388
> 
> There can be other dependencies than interrupts between devices.
> All functions using dependencies need a "struct device *" parameter to
> record information.

Yes I like the sound of that. That would be ideal. However, I am
guessing that that is a way off at the moment ...

Cheers
Jon
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 3c1c96786248..a29050cfb5f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -329,6 +329,8 @@  static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  *			chip, when one or more interrupts are installed
  * @irq_resume:		function called from core code on resume once per chip,
  *			when one ore more interrupts are installed
+ * @irq_runtime_suspend:function called to runtime suspend a chip
+ * @irq_runtime_resume:	function called to runtime resume a chip
  * @irq_pm_shutdown:	function called from core code on shutdown once per chip
  * @irq_calc_mask:	Optional function to set irq_data.mask for special cases
  * @irq_print_chip:	optional to print special chip info in show_interrupts
@@ -369,6 +371,8 @@  struct irq_chip {
 
 	void		(*irq_suspend)(struct irq_data *data);
 	void		(*irq_resume)(struct irq_data *data);
+	int		(*irq_runtime_suspend)(struct irq_data *data);
+	int		(*irq_runtime_resume)(struct irq_data *data);
 	void		(*irq_pm_shutdown)(struct irq_data *data);
 
 	void		(*irq_calc_mask)(struct irq_data *data);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 05c2188271b8..187e49369c16 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -125,6 +125,23 @@  static inline void chip_bus_sync_unlock(struct irq_desc *desc)
 		desc->irq_data.chip->irq_bus_sync_unlock(&desc->irq_data);
 }
 
+/* Inline functions for support of irq chips that require runtime pm */
+static inline int chip_runtime_resume(struct irq_desc *desc)
+{
+	if (!desc->irq_data.chip->irq_runtime_resume)
+		return 0;
+
+	return desc->irq_data.chip->irq_runtime_resume(&desc->irq_data);
+}
+
+static inline int chip_runtime_suspend(struct irq_desc *desc)
+{
+	if (!desc->irq_data.chip->irq_runtime_suspend)
+		return 0;
+
+	return desc->irq_data.chip->irq_runtime_suspend(&desc->irq_data);
+}
+
 #define _IRQ_DESC_CHECK		(1 << 0)
 #define _IRQ_DESC_PERCPU	(1 << 1)
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0eebaeef317b..66e33df73140 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1116,6 +1116,10 @@  __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	if (!try_module_get(desc->owner))
 		return -ENODEV;
 
+	ret = chip_runtime_resume(desc);
+	if (ret < 0)
+		return ret;
+
 	new->irq = irq;
 
 	/*
@@ -1393,6 +1397,7 @@  out_thread:
 		put_task_struct(t);
 	}
 out_mput:
+	chip_runtime_suspend(desc);
 	module_put(desc->owner);
 	return ret;
 }
@@ -1506,6 +1511,7 @@  static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		}
 	}
 
+	chip_runtime_suspend(desc);
 	module_put(desc->owner);
 	kfree(action->secondary);
 	return action;
@@ -1792,6 +1798,7 @@  static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 
 	unregister_handler_proc(irq, action);
 
+	chip_runtime_suspend(desc);
 	module_put(desc->owner);
 	return action;