diff mbox

[1/6] iommu/arm-smmu: add support for specifying clocks

Message ID 1407891099-24641-2-git-send-email-mitchelh@codeaurora.org
State Superseded, archived
Headers show

Commit Message

Mitchel Humpherys Aug. 13, 2014, 12:51 a.m. UTC
On some platforms with tight power constraints it is polite to only
leave your clocks on for as long as you absolutely need them. Currently
we assume that all clocks necessary for SMMU register access are always
on.

Add some optional device tree properties to specify any clocks that are
necessary for SMMU register access and turn them on and off as needed.

If no clocks are specified in the device tree things continue to work
the way they always have: we assume all necessary clocks are always
turned on.

Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
---
 .../devicetree/bindings/iommu/arm,smmu.txt         |  11 ++
 drivers/iommu/arm-smmu.c                           | 127 +++++++++++++++++++--
 2 files changed, 129 insertions(+), 9 deletions(-)

Comments

Mitchel Humpherys Aug. 13, 2014, 9:07 p.m. UTC | #1
Well hopefully this isn't too Nick Krouse-esque, but I have some
comments on my own patch below. I sat on these for a few days but have
noticed a few things after testing on another platform...

On Tue, Aug 12 2014 at 05:51:34 PM, Mitchel Humpherys <mitchelh@codeaurora.org> wrote:
> On some platforms with tight power constraints it is polite to only
> leave your clocks on for as long as you absolutely need them. Currently
> we assume that all clocks necessary for SMMU register access are always
> on.
>
> Add some optional device tree properties to specify any clocks that are
> necessary for SMMU register access and turn them on and off as needed.
>
> If no clocks are specified in the device tree things continue to work
> the way they always have: we assume all necessary clocks are always
> turned on.
>
> Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
> ---
>  .../devicetree/bindings/iommu/arm,smmu.txt         |  11 ++
>  drivers/iommu/arm-smmu.c                           | 127 +++++++++++++++++++--
>  2 files changed, 129 insertions(+), 9 deletions(-)

[...]

> -static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
> +static int arm_smmu_init_clocks(struct arm_smmu_device *smmu)
> +{
> +	const char *cname;
> +	struct property *prop;
> +	int i;
> +	struct device *dev = smmu->dev;
> +
> +	smmu->num_clocks = of_property_count_strings(dev->of_node,
> +						"clock-names");
> +
> +	if (!smmu->num_clocks)
> +		return 0;
> +
> +	smmu->clocks = devm_kzalloc(
> +		dev, sizeof(*smmu->clocks) * smmu->num_clocks,
> +		GFP_KERNEL);
> +
> +	if (!smmu->clocks) {
> +		dev_err(dev,
> +			"Failed to allocate memory for clocks\n");
> +		return -ENODEV;
> +	}
> +
> +	i = 0;
> +	of_property_for_each_string(dev->of_node, "clock-names",
> +				prop, cname) {
> +		struct clk *c = devm_clk_get(dev, cname);
> +		if (IS_ERR(c)) {
> +			dev_err(dev, "Couldn't get clock: %s",
> +				cname);
> +			return -ENODEV;
> +		}
> +
> +		if (clk_get_rate(c) == 0) {
> +			long rate = clk_round_rate(c, 1000);
> +			clk_set_rate(c, rate);
> +		}
> +
> +		smmu->clocks[i] = c;
> +
> +		++i;
> +	}
> +	return 0;
> +}
> +
> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

The `static' was dropped unintentionally here.


-Mitch
Will Deacon Aug. 19, 2014, 12:58 p.m. UTC | #2
On Wed, Aug 13, 2014 at 01:51:34AM +0100, Mitchel Humpherys wrote:
> On some platforms with tight power constraints it is polite to only
> leave your clocks on for as long as you absolutely need them. Currently
> we assume that all clocks necessary for SMMU register access are always
> on.
> 
> Add some optional device tree properties to specify any clocks that are
> necessary for SMMU register access and turn them on and off as needed.
> 
> If no clocks are specified in the device tree things continue to work
> the way they always have: we assume all necessary clocks are always
> turned on.

How does this interact with an SMMU in bypass mode?

[...]

> +static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
> +{
> +	int i, ret = 0;
> +
> +	for (i = 0; i < smmu->num_clocks; ++i) {
> +		ret = clk_prepare_enable(smmu->clocks[i]);
> +		if (ret) {
> +			dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
> +			while (i--)
> +				clk_disable_unprepare(smmu->clocks[i]);
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
> +{
> +	int i;
> +
> +	for (i = 0; i < smmu->num_clocks; ++i)
> +		clk_disable_unprepare(smmu->clocks[i]);
> +}

What stops theses from racing with each other when there are multiple
clocks? I also assume that the clk API ignores calls to clk_enable_prepare
for a clk that's already enabled? I couldn't find that code...

>  /* Wait for any pending TLB invalidations to complete */
>  static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>  {
> @@ -644,11 +672,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>  	void __iomem *cb_base;
>  
> +	arm_smmu_enable_clocks(smmu);

How can I get a context interrupt from an SMMU without its clocks enabled?

[...]

> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>  {
>  	unsigned long size;
>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
> @@ -2027,10 +2124,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>  	}
>  	dev_notice(dev, "registered %d master devices\n", i);
>  
> -	err = arm_smmu_device_cfg_probe(smmu);
> +	err = arm_smmu_init_clocks(smmu);
>  	if (err)
>  		goto out_put_masters;
>  
> +	arm_smmu_enable_clocks(smmu);
> +
> +	err = arm_smmu_device_cfg_probe(smmu);
> +	if (err)
> +		goto out_disable_clocks;
> +
>  	parse_driver_options(smmu);
>  
>  	if (smmu->version > 1 &&
> @@ -2039,7 +2142,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>  			"found only %d context interrupt(s) but %d required\n",
>  			smmu->num_context_irqs, smmu->num_context_banks);
>  		err = -ENODEV;
> -		goto out_put_masters;
> +		goto out_disable_clocks;
>  	}
>  
>  	for (i = 0; i < smmu->num_global_irqs; ++i) {
> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>  	spin_unlock(&arm_smmu_devices_lock);
>  
>  	arm_smmu_device_reset(smmu);
> +	arm_smmu_disable_clocks(smmu);

I wonder if this is really the right thing to do. Rather than the
fine-grained clock enable/disable you have, why don't we just enable in
domain_init and disable in domain_destroy, with refcounting for the clocks?

Will
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olav Haugan Aug. 19, 2014, 7:03 p.m. UTC | #3
Hi Will,

On 8/19/2014 5:58 AM, Will Deacon wrote:
> On Wed, Aug 13, 2014 at 01:51:34AM +0100, Mitchel Humpherys wrote:
>> On some platforms with tight power constraints it is polite to only
>> leave your clocks on for as long as you absolutely need them. Currently
>> we assume that all clocks necessary for SMMU register access are always
>> on.
>>
>> Add some optional device tree properties to specify any clocks that are
>> necessary for SMMU register access and turn them on and off as needed.
>>
>> If no clocks are specified in the device tree things continue to work
>> the way they always have: we assume all necessary clocks are always
>> turned on.
> 
> How does this interact with an SMMU in bypass mode?

Do you mean if you have a platform that requires clock and power
management but we leave the SMMU in bypass (i.e. no one calls into the
SMMU driver) how are the clock/power managed?

Clients of the SMMU driver are required to vote for clocks and power
when they know they need to use the SMMU. However, the clock and power
needed to be on for the SMMU to service bus masters aren't necessarily
the same as the ones needed to read/write registers...See below.

> 
> [...]
> 
>> +static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
>> +{
>> +	int i, ret = 0;
>> +
>> +	for (i = 0; i < smmu->num_clocks; ++i) {
>> +		ret = clk_prepare_enable(smmu->clocks[i]);
>> +		if (ret) {
>> +			dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
>> +			while (i--)
>> +				clk_disable_unprepare(smmu->clocks[i]);
>> +			break;
>> +		}
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < smmu->num_clocks; ++i)
>> +		clk_disable_unprepare(smmu->clocks[i]);
>> +}
> 
> What stops theses from racing with each other when there are multiple
> clocks? I also assume that the clk API ignores calls to clk_enable_prepare
> for a clk that's already enabled? I couldn't find that code...

All the clock APIs are reference counted yes. Not sure what you mean by
racing with each other? When you call to enable a clock the call does
not return until the clock is already ON (or OFF).

>>  /* Wait for any pending TLB invalidations to complete */
>>  static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
>>  {
>> @@ -644,11 +672,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
>>  	struct arm_smmu_device *smmu = smmu_domain->smmu;
>>  	void __iomem *cb_base;
>>  
>> +	arm_smmu_enable_clocks(smmu);
> 
> How can I get a context interrupt from an SMMU without its clocks enabled?

Good question. At least in our SoC we usually have at least 1 "core"
clock and 1 "programming" clock. Both clocks are needed to read/write
registers but only 1 of the clocks is needed for the SMMU to service bus
master requests.

> 
> [...]
> 
>> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>>  {
>>  	unsigned long size;
>>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
>> @@ -2027,10 +2124,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>>  	}
>>  	dev_notice(dev, "registered %d master devices\n", i);
>>  
>> -	err = arm_smmu_device_cfg_probe(smmu);
>> +	err = arm_smmu_init_clocks(smmu);
>>  	if (err)
>>  		goto out_put_masters;
>>  
>> +	arm_smmu_enable_clocks(smmu);
>> +
>> +	err = arm_smmu_device_cfg_probe(smmu);
>> +	if (err)
>> +		goto out_disable_clocks;
>> +
>>  	parse_driver_options(smmu);
>>  
>>  	if (smmu->version > 1 &&
>> @@ -2039,7 +2142,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>>  			"found only %d context interrupt(s) but %d required\n",
>>  			smmu->num_context_irqs, smmu->num_context_banks);
>>  		err = -ENODEV;
>> -		goto out_put_masters;
>> +		goto out_disable_clocks;
>>  	}
>>  
>>  	for (i = 0; i < smmu->num_global_irqs; ++i) {
>> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>>  	spin_unlock(&arm_smmu_devices_lock);
>>  
>>  	arm_smmu_device_reset(smmu);
>> +	arm_smmu_disable_clocks(smmu);
> 
> I wonder if this is really the right thing to do. Rather than the
> fine-grained clock enable/disable you have, why don't we just enable in
> domain_init and disable in domain_destroy, with refcounting for the clocks?
> 

So the whole point of all of this is that we try to save power. As Mitch
wrote in the commit text we want to only leave the clock and power on
for as short period of time as possible.

Thanks,

Olav
Mitchel Humpherys Aug. 19, 2014, 7:28 p.m. UTC | #4
On Tue, Aug 19 2014 at 05:58:34 AM, Will Deacon <will.deacon@arm.com> wrote:
> I also assume that the clk API ignores calls to clk_enable_prepare
> for a clk that's already enabled? I couldn't find that code...

That's clk_prepare_enable, not clk_enable_prepare. It's in
<linux/clk.h>.


-Mitch
Will Deacon Aug. 26, 2014, 2:27 p.m. UTC | #5
[adding Mike]

On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
> Hi Will,

Hi Olav,

> On 8/19/2014 5:58 AM, Will Deacon wrote:
> > On Wed, Aug 13, 2014 at 01:51:34AM +0100, Mitchel Humpherys wrote:
> >> On some platforms with tight power constraints it is polite to only
> >> leave your clocks on for as long as you absolutely need them. Currently
> >> we assume that all clocks necessary for SMMU register access are always
> >> on.
> >>
> >> Add some optional device tree properties to specify any clocks that are
> >> necessary for SMMU register access and turn them on and off as needed.
> >>
> >> If no clocks are specified in the device tree things continue to work
> >> the way they always have: we assume all necessary clocks are always
> >> turned on.
> > 
> > How does this interact with an SMMU in bypass mode?
> 
> Do you mean if you have a platform that requires clock and power
> management but we leave the SMMU in bypass (i.e. no one calls into the
> SMMU driver) how are the clock/power managed?
> 
> Clients of the SMMU driver are required to vote for clocks and power
> when they know they need to use the SMMU. However, the clock and power
> needed to be on for the SMMU to service bus masters aren't necessarily
> the same as the ones needed to read/write registers...See below.

The case I'm thinking of is where a device masters through the IOMMU, but
doesn't make use of any translations. In this case, its transactions will
bypass the SMMU and I want to ensure that continues to happen, regardless of
the power state of the SMMU.

> >> +static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
> >> +{
> >> +	int i, ret = 0;
> >> +
> >> +	for (i = 0; i < smmu->num_clocks; ++i) {
> >> +		ret = clk_prepare_enable(smmu->clocks[i]);
> >> +		if (ret) {
> >> +			dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
> >> +			while (i--)
> >> +				clk_disable_unprepare(smmu->clocks[i]);
> >> +			break;
> >> +		}
> >> +	}
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
> >> +{
> >> +	int i;
> >> +
> >> +	for (i = 0; i < smmu->num_clocks; ++i)
> >> +		clk_disable_unprepare(smmu->clocks[i]);
> >> +}
> > 
> > What stops theses from racing with each other when there are multiple
> > clocks? I also assume that the clk API ignores calls to clk_enable_prepare
> > for a clk that's already enabled? I couldn't find that code...
> 
> All the clock APIs are reference counted yes. Not sure what you mean by
> racing with each other? When you call to enable a clock the call does
> not return until the clock is already ON (or OFF).

I was thinking of an interrupt handler racing with normal code, but actually
you balance the clk enable/disable in the interrupt handlers. However, it's
not safe to call these clk functions from irq context anyway, since
clk_prepare may sleep.

> >> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
> >>  {
> >>  	unsigned long size;
> >>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
> >> @@ -2027,10 +2124,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
> >>  	}
> >>  	dev_notice(dev, "registered %d master devices\n", i);
> >>  
> >> -	err = arm_smmu_device_cfg_probe(smmu);
> >> +	err = arm_smmu_init_clocks(smmu);
> >>  	if (err)
> >>  		goto out_put_masters;
> >>  
> >> +	arm_smmu_enable_clocks(smmu);
> >> +
> >> +	err = arm_smmu_device_cfg_probe(smmu);
> >> +	if (err)
> >> +		goto out_disable_clocks;
> >> +
> >>  	parse_driver_options(smmu);
> >>  
> >>  	if (smmu->version > 1 &&
> >> @@ -2039,7 +2142,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
> >>  			"found only %d context interrupt(s) but %d required\n",
> >>  			smmu->num_context_irqs, smmu->num_context_banks);
> >>  		err = -ENODEV;
> >> -		goto out_put_masters;
> >> +		goto out_disable_clocks;
> >>  	}
> >>  
> >>  	for (i = 0; i < smmu->num_global_irqs; ++i) {
> >> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
> >>  	spin_unlock(&arm_smmu_devices_lock);
> >>  
> >>  	arm_smmu_device_reset(smmu);
> >> +	arm_smmu_disable_clocks(smmu);
> > 
> > I wonder if this is really the right thing to do. Rather than the
> > fine-grained clock enable/disable you have, why don't we just enable in
> > domain_init and disable in domain_destroy, with refcounting for the clocks?
> > 
> 
> So the whole point of all of this is that we try to save power. As Mitch
> wrote in the commit text we want to only leave the clock and power on
> for as short period of time as possible.

Understood, but if the clocks are going up and down like yo-yos, then it's
not obvious that you end up saving any power at all. Have you tried
measuring the power consumption with different granularities for the clocks?

The code you're proposing seems to take the approach of `we're going to
access registers so enable the clocks, access the registers then disable the
clocks', which is simple but may not be particularly effective.

Will
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mitchel Humpherys Sept. 10, 2014, 1:29 a.m. UTC | #6
On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon <will.deacon@arm.com> wrote:
> [adding Mike]
>
> On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>> Hi Will,
>
> Hi Olav,
>
>> On 8/19/2014 5:58 AM, Will Deacon wrote:
>> > On Wed, Aug 13, 2014 at 01:51:34AM +0100, Mitchel Humpherys wrote:
>> >> On some platforms with tight power constraints it is polite to only
>> >> leave your clocks on for as long as you absolutely need them. Currently
>> >> we assume that all clocks necessary for SMMU register access are always
>> >> on.
>> >>
>> >> Add some optional device tree properties to specify any clocks that are
>> >> necessary for SMMU register access and turn them on and off as needed.
>> >>
>> >> If no clocks are specified in the device tree things continue to work
>> >> the way they always have: we assume all necessary clocks are always
>> >> turned on.
>> > 
>> > How does this interact with an SMMU in bypass mode?
>> 
>> Do you mean if you have a platform that requires clock and power
>> management but we leave the SMMU in bypass (i.e. no one calls into the
>> SMMU driver) how are the clock/power managed?
>> 
>> Clients of the SMMU driver are required to vote for clocks and power
>> when they know they need to use the SMMU. However, the clock and power
>> needed to be on for the SMMU to service bus masters aren't necessarily
>> the same as the ones needed to read/write registers...See below.
>
> The case I'm thinking of is where a device masters through the IOMMU, but
> doesn't make use of any translations. In this case, its transactions will
> bypass the SMMU and I want to ensure that continues to happen, regardless of
> the power state of the SMMU.

Then I assume the driver for such a device wouldn't be attaching to (or
detaching from) the IOMMU, so we won't be touching it at all either
way. Or am I missing something?

>
>> >> +static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
>> >> +{
>> >> +	int i, ret = 0;
>> >> +
>> >> +	for (i = 0; i < smmu->num_clocks; ++i) {
>> >> +		ret = clk_prepare_enable(smmu->clocks[i]);
>> >> +		if (ret) {
>> >> +			dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
>> >> +			while (i--)
>> >> +				clk_disable_unprepare(smmu->clocks[i]);
>> >> +			break;
>> >> +		}
>> >> +	}
>> >> +
>> >> +	return ret;
>> >> +}
>> >> +
>> >> +static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
>> >> +{
>> >> +	int i;
>> >> +
>> >> +	for (i = 0; i < smmu->num_clocks; ++i)
>> >> +		clk_disable_unprepare(smmu->clocks[i]);
>> >> +}
>> > 
>> > What stops theses from racing with each other when there are multiple
>> > clocks? I also assume that the clk API ignores calls to clk_enable_prepare
>> > for a clk that's already enabled? I couldn't find that code...
>> 
>> All the clock APIs are reference counted yes. Not sure what you mean by
>> racing with each other? When you call to enable a clock the call does
>> not return until the clock is already ON (or OFF).
>
> I was thinking of an interrupt handler racing with normal code, but actually
> you balance the clk enable/disable in the interrupt handlers. However, it's
> not safe to call these clk functions from irq context anyway, since
> clk_prepare may sleep.

Ah yes. You okay with moving to a threaded IRQ?

>
>> >> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>> >>  {
>> >>  	unsigned long size;
>> >>  	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
>> >> @@ -2027,10 +2124,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>> >>  	}
>> >>  	dev_notice(dev, "registered %d master devices\n", i);
>> >>  
>> >> -	err = arm_smmu_device_cfg_probe(smmu);
>> >> +	err = arm_smmu_init_clocks(smmu);
>> >>  	if (err)
>> >>  		goto out_put_masters;
>> >>  
>> >> +	arm_smmu_enable_clocks(smmu);
>> >> +
>> >> +	err = arm_smmu_device_cfg_probe(smmu);
>> >> +	if (err)
>> >> +		goto out_disable_clocks;
>> >> +
>> >>  	parse_driver_options(smmu);
>> >>  
>> >>  	if (smmu->version > 1 &&
>> >> @@ -2039,7 +2142,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>> >>  			"found only %d context interrupt(s) but %d required\n",
>> >>  			smmu->num_context_irqs, smmu->num_context_banks);
>> >>  		err = -ENODEV;
>> >> -		goto out_put_masters;
>> >> +		goto out_disable_clocks;
>> >>  	}
>> >>  
>> >>  	for (i = 0; i < smmu->num_global_irqs; ++i) {
>> >> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>> >>  	spin_unlock(&arm_smmu_devices_lock);
>> >>  
>> >>  	arm_smmu_device_reset(smmu);
>> >> +	arm_smmu_disable_clocks(smmu);
>> > 
>> > I wonder if this is really the right thing to do. Rather than the
>> > fine-grained clock enable/disable you have, why don't we just enable in
>> > domain_init and disable in domain_destroy, with refcounting for the clocks?
>> > 
>> 
>> So the whole point of all of this is that we try to save power. As Mitch
>> wrote in the commit text we want to only leave the clock and power on
>> for as short period of time as possible.
>
> Understood, but if the clocks are going up and down like yo-yos, then it's
> not obvious that you end up saving any power at all. Have you tried
> measuring the power consumption with different granularities for the
> clocks?

This has been profiled extensively and for some use cases it's a huge
win. Unfortunately we don't have any numbers for public sharing :( but
you can imagine a use case where some multimedia framework maps a bunch
of buffers into an SMMU at the beginning of some interactive user
session and doesn't unmap them until the (human) user decides they are
done. This could be a long time, all the while these clocks could be
off, saving power.

> The code you're proposing seems to take the approach of `we're going to
> access registers so enable the clocks, access the registers then disable the
> clocks', which is simple but may not be particularly effective.

Yes, that's a good summary of the approach here. It has been effective
in saving power for us in the past...


-Mitch
Will Deacon Sept. 10, 2014, 6:27 p.m. UTC | #7
On Wed, Sep 10, 2014 at 02:29:42AM +0100, Mitchel Humpherys wrote:
> On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
> >> Clients of the SMMU driver are required to vote for clocks and power
> >> when they know they need to use the SMMU. However, the clock and power
> >> needed to be on for the SMMU to service bus masters aren't necessarily
> >> the same as the ones needed to read/write registers...See below.
> >
> > The case I'm thinking of is where a device masters through the IOMMU, but
> > doesn't make use of any translations. In this case, its transactions will
> > bypass the SMMU and I want to ensure that continues to happen, regardless of
> > the power state of the SMMU.
> 
> Then I assume the driver for such a device wouldn't be attaching to (or
> detaching from) the IOMMU, so we won't be touching it at all either
> way. Or am I missing something?

As long as its only the register file that gets powered down, then there's
no issue. However, that's making assumptions about what these clocks are
controlling. Is there a way for the driver to know which aspects of the
device are controlled by which clock?

> >> > What stops theses from racing with each other when there are multiple
> >> > clocks? I also assume that the clk API ignores calls to clk_enable_prepare
> >> > for a clk that's already enabled? I couldn't find that code...
> >> 
> >> All the clock APIs are reference counted yes. Not sure what you mean by
> >> racing with each other? When you call to enable a clock the call does
> >> not return until the clock is already ON (or OFF).
> >
> > I was thinking of an interrupt handler racing with normal code, but actually
> > you balance the clk enable/disable in the interrupt handlers. However, it's
> > not safe to call these clk functions from irq context anyway, since
> > clk_prepare may sleep.
> 
> Ah yes. You okay with moving to a threaded IRQ?

A threaded IRQ already makes sense for context interrupts (if anybody has a
platform that can do stalls properly), but it seems a bit weird for the
global fault handler. Is there no way to fix the race instead?

> >> >> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
> >> >>  	spin_unlock(&arm_smmu_devices_lock);
> >> >>  
> >> >>  	arm_smmu_device_reset(smmu);
> >> >> +	arm_smmu_disable_clocks(smmu);
> >> > 
> >> > I wonder if this is really the right thing to do. Rather than the
> >> > fine-grained clock enable/disable you have, why don't we just enable in
> >> > domain_init and disable in domain_destroy, with refcounting for the clocks?
> >> > 
> >> 
> >> So the whole point of all of this is that we try to save power. As Mitch
> >> wrote in the commit text we want to only leave the clock and power on
> >> for as short period of time as possible.
> >
> > Understood, but if the clocks are going up and down like yo-yos, then it's
> > not obvious that you end up saving any power at all. Have you tried
> > measuring the power consumption with different granularities for the
> > clocks?
> 
> This has been profiled extensively and for some use cases it's a huge
> win. Unfortunately we don't have any numbers for public sharing :( but
> you can imagine a use case where some multimedia framework maps a bunch
> of buffers into an SMMU at the beginning of some interactive user
> session and doesn't unmap them until the (human) user decides they are
> done. This could be a long time, all the while these clocks could be
> off, saving power.

Ok, I can see that. I wonder whether we could wrap all of the iommu_ops
functions with the clock enable/disable code, instead of putting it directly
into the drivers?

> > The code you're proposing seems to take the approach of `we're going to
> > access registers so enable the clocks, access the registers then disable the
> > clocks', which is simple but may not be particularly effective.
> 
> Yes, that's a good summary of the approach here. It has been effective
> in saving power for us in the past...

Mike, do you have any experience with this sort of stuff?

Will
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mitchel Humpherys Sept. 10, 2014, 7:09 p.m. UTC | #8
On Wed, Sep 10 2014 at 11:27:39 AM, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, Sep 10, 2014 at 02:29:42AM +0100, Mitchel Humpherys wrote:
>> On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon <will.deacon@arm.com> wrote:
>> > On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>> >> Clients of the SMMU driver are required to vote for clocks and power
>> >> when they know they need to use the SMMU. However, the clock and power
>> >> needed to be on for the SMMU to service bus masters aren't necessarily
>> >> the same as the ones needed to read/write registers...See below.
>> >
>> > The case I'm thinking of is where a device masters through the IOMMU, but
>> > doesn't make use of any translations. In this case, its transactions will
>> > bypass the SMMU and I want to ensure that continues to happen, regardless of
>> > the power state of the SMMU.
>> 
>> Then I assume the driver for such a device wouldn't be attaching to (or
>> detaching from) the IOMMU, so we won't be touching it at all either
>> way. Or am I missing something?
>
> As long as its only the register file that gets powered down, then there's
> no issue. However, that's making assumptions about what these clocks are
> controlling. Is there a way for the driver to know which aspects of the
> device are controlled by which clock?

Yes, folks should only be putting "config" clocks here. In our system,
at least, the clocks for configuring the SMMU are different than those
for using it. Maybe I should make a note about what "kinds" of clocks
can be specified here in the bindings (i.e. only those that can be
safely disabled while still allowing translations to occur)?

>
>> >> > What stops theses from racing with each other when there are multiple
>> >> > clocks? I also assume that the clk API ignores calls to clk_enable_prepare
>> >> > for a clk that's already enabled? I couldn't find that code...
>> >> 
>> >> All the clock APIs are reference counted yes. Not sure what you mean by
>> >> racing with each other? When you call to enable a clock the call does
>> >> not return until the clock is already ON (or OFF).
>> >
>> > I was thinking of an interrupt handler racing with normal code, but actually
>> > you balance the clk enable/disable in the interrupt handlers. However, it's
>> > not safe to call these clk functions from irq context anyway, since
>> > clk_prepare may sleep.
>> 
>> Ah yes. You okay with moving to a threaded IRQ?
>
> A threaded IRQ already makes sense for context interrupts (if anybody has a
> platform that can do stalls properly), but it seems a bit weird for the
> global fault handler. Is there no way to fix the race instead?

Are you referring to the scenario where someone might be disabling
clocks at the same time? This isn't a problem since the clocks are
refcounted. I believe the main problem here is calling clk_enable from
atomic context since it might sleep.

For my own edification, why would it be weird to move to a threaded IRQ
here? We're not really doing any important work here (just printing an
informational message) so moving to a threaded IRQ actually seems like
the courteous thing to do...

>
>> >> >> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
>> >> >>  	spin_unlock(&arm_smmu_devices_lock);
>> >> >>  
>> >> >>  	arm_smmu_device_reset(smmu);
>> >> >> +	arm_smmu_disable_clocks(smmu);
>> >> > 
>> >> > I wonder if this is really the right thing to do. Rather than the
>> >> > fine-grained clock enable/disable you have, why don't we just enable in
>> >> > domain_init and disable in domain_destroy, with refcounting for the clocks?
>> >> > 
>> >> 
>> >> So the whole point of all of this is that we try to save power. As Mitch
>> >> wrote in the commit text we want to only leave the clock and power on
>> >> for as short period of time as possible.
>> >
>> > Understood, but if the clocks are going up and down like yo-yos, then it's
>> > not obvious that you end up saving any power at all. Have you tried
>> > measuring the power consumption with different granularities for the
>> > clocks?
>> 
>> This has been profiled extensively and for some use cases it's a huge
>> win. Unfortunately we don't have any numbers for public sharing :( but
>> you can imagine a use case where some multimedia framework maps a bunch
>> of buffers into an SMMU at the beginning of some interactive user
>> session and doesn't unmap them until the (human) user decides they are
>> done. This could be a long time, all the while these clocks could be
>> off, saving power.
>
> Ok, I can see that. I wonder whether we could wrap all of the iommu_ops
> functions with the clock enable/disable code, instead of putting it directly
> into the drivers?

Interesting idea... I'm up for it if the IOMMU core and driver
maintainers are okay with it.

>
>> > The code you're proposing seems to take the approach of `we're going to
>> > access registers so enable the clocks, access the registers then disable the
>> > clocks', which is simple but may not be particularly effective.
>> 
>> Yes, that's a good summary of the approach here. It has been effective
>> in saving power for us in the past...
>
> Mike, do you have any experience with this sort of stuff?
>
> Will



-Mitch
Mitchel Humpherys Sept. 15, 2014, 6:38 p.m. UTC | #9
On Wed, Sep 10 2014 at 12:09:06 PM, Mitchel Humpherys <mitchelh@codeaurora.org> wrote:
> On Wed, Sep 10 2014 at 11:27:39 AM, Will Deacon <will.deacon@arm.com> wrote:
>> On Wed, Sep 10, 2014 at 02:29:42AM +0100, Mitchel Humpherys wrote:
>>> On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon <will.deacon@arm.com> wrote:
>>> > On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>>> >> Clients of the SMMU driver are required to vote for clocks and power
>>> >> when they know they need to use the SMMU. However, the clock and power
>>> >> needed to be on for the SMMU to service bus masters aren't necessarily
>>> >> the same as the ones needed to read/write registers...See below.
>>> >
>>> > The case I'm thinking of is where a device masters through the IOMMU, but
>>> > doesn't make use of any translations. In this case, its transactions will
>>> > bypass the SMMU and I want to ensure that continues to happen, regardless of
>>> > the power state of the SMMU.
>>> 
>>> Then I assume the driver for such a device wouldn't be attaching to (or
>>> detaching from) the IOMMU, so we won't be touching it at all either
>>> way. Or am I missing something?
>>
>> As long as its only the register file that gets powered down, then there's
>> no issue. However, that's making assumptions about what these clocks are
>> controlling. Is there a way for the driver to know which aspects of the
>> device are controlled by which clock?
>
> Yes, folks should only be putting "config" clocks here. In our system,
> at least, the clocks for configuring the SMMU are different than those
> for using it. Maybe I should make a note about what "kinds" of clocks
> can be specified here in the bindings (i.e. only those that can be
> safely disabled while still allowing translations to occur)?

Let me amend this statement slightly.  Folks should be putting all
clocks necessary to program SMMU registers here.  On our system, this
actually does include the "core" clocks in addition to the "config"
clocks.  Clients won't vote for "config" clocks since they have no
business programming SMMU registers, so those will get shut down when we
remove our vote for them.  Clients *should* hold their votes for "core"
clocks for as long as they want to use the SMMU.  Also, for the bypass
case, clients should be voting for clocks and power for the SMMU
themselves.

In light of all this I guess there isn't really anything to say in the
DT bindings.


-Mitch
diff mbox

Patch

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 2d0f7cd867..ceae3fe207 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -48,6 +48,17 @@  conditions.
                   aliases of secure registers have to be used during
                   SMMU configuration.
 
+- clocks        : List of clocks to be used during SMMU register access. See
+                  Documentation/devicetree/bindings/clock/clock-bindings.txt
+                  for information about the format. For each clock specified
+                  here, there must be a corresponding entery in clock-names
+                  (see below).
+
+- clock-names   : List of clock names corresponding to the clocks specified in
+                  the "clocks" property (above). See
+                  Documentation/devicetree/bindings/clock/clock-bindings.txt
+                  for more info.
+
 Example:
 
         smmu {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9fd8754db0..e123d75db3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -399,6 +399,9 @@  struct arm_smmu_device {
 
 	struct list_head		list;
 	struct rb_root			masters;
+
+	int num_clocks;
+	struct clk **clocks;
 };
 
 struct arm_smmu_cfg {
@@ -589,6 +592,31 @@  static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
 	clear_bit(idx, map);
 }
 
+static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
+{
+	int i, ret = 0;
+
+	for (i = 0; i < smmu->num_clocks; ++i) {
+		ret = clk_prepare_enable(smmu->clocks[i]);
+		if (ret) {
+			dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
+			while (i--)
+				clk_disable_unprepare(smmu->clocks[i]);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
+{
+	int i;
+
+	for (i = 0; i < smmu->num_clocks; ++i)
+		clk_disable_unprepare(smmu->clocks[i]);
+}
+
 /* Wait for any pending TLB invalidations to complete */
 static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -644,11 +672,15 @@  static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	void __iomem *cb_base;
 
+	arm_smmu_enable_clocks(smmu);
+
 	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
 
-	if (!(fsr & FSR_FAULT))
+	if (!(fsr & FSR_FAULT)) {
+		arm_smmu_disable_clocks(smmu);
 		return IRQ_NONE;
+	}
 
 	if (fsr & FSR_IGN)
 		dev_err_ratelimited(smmu->dev,
@@ -683,6 +715,8 @@  static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 	if (fsr & FSR_SS)
 		writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
 
+	arm_smmu_disable_clocks(smmu);
+
 	return ret;
 }
 
@@ -692,13 +726,17 @@  static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
 	struct arm_smmu_device *smmu = dev;
 	void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
 
+	arm_smmu_enable_clocks(smmu);
+
 	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
 	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
 	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
 	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
 
-	if (!gfsr)
+	if (!gfsr) {
+		arm_smmu_disable_clocks(smmu);
 		return IRQ_NONE;
+	}
 
 	dev_err_ratelimited(smmu->dev,
 		"Unexpected global fault, this could be serious\n");
@@ -707,6 +745,7 @@  static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
 		gfsr, gfsynr0, gfsynr1, gfsynr2);
 
 	writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+	arm_smmu_disable_clocks(smmu);
 	return IRQ_HANDLED;
 }
 
@@ -991,10 +1030,12 @@  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
 	if (!smmu)
 		return;
 
+	arm_smmu_enable_clocks(smmu_domain->smmu);
 	/* Disable the context bank and nuke the TLB before freeing it. */
 	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
 	writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
 	arm_smmu_tlb_inv_context(smmu_domain);
+	arm_smmu_disable_clocks(smmu_domain->smmu);
 
 	if (cfg->irptndx != INVALID_IRPTNDX) {
 		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
@@ -1222,6 +1263,7 @@  static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
 	 * We *must* clear the S2CR first, because freeing the SMR means
 	 * that it can be re-allocated immediately.
 	 */
+	arm_smmu_enable_clocks(smmu);
 	for (i = 0; i < cfg->num_streamids; ++i) {
 		u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
 
@@ -1230,6 +1272,7 @@  static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
 	}
 
 	arm_smmu_master_free_smrs(smmu, cfg);
+	arm_smmu_disable_clocks(smmu);
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1250,6 +1293,8 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		return -EEXIST;
 	}
 
+	arm_smmu_enable_clocks(smmu);
+
 	/*
 	 * Sanity check the domain. We don't support domains across
 	 * different SMMUs.
@@ -1259,7 +1304,7 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		/* Now that we have a master, we can finalise the domain */
 		ret = arm_smmu_init_domain_context(domain, smmu);
 		if (IS_ERR_VALUE(ret))
-			return ret;
+			goto disable_clocks;
 
 		dom_smmu = smmu_domain->smmu;
 	}
@@ -1268,17 +1313,22 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		dev_err(dev,
 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
-		return -EINVAL;
+		ret = -EINVAL;
+		goto disable_clocks;
 	}
 
 	/* Looks ok, so add the device to the domain */
 	cfg = find_smmu_master_cfg(dev);
-	if (!cfg)
-		return -ENODEV;
+	if (!cfg) {
+		ret = -ENODEV;
+		goto disable_clocks;
+	}
 
 	ret = arm_smmu_domain_add_master(smmu_domain, cfg);
 	if (!ret)
 		dev->archdata.iommu = domain;
+disable_clocks:
+	arm_smmu_disable_clocks(smmu);
 	return ret;
 }
 
@@ -1544,7 +1594,9 @@  static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	struct arm_smmu_domain *smmu_domain = domain->priv;
 
 	ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0);
+	arm_smmu_enable_clocks(smmu_domain->smmu);
 	arm_smmu_tlb_inv_context(smmu_domain);
+	arm_smmu_disable_clocks(smmu_domain->smmu);
 	return ret ? 0 : size;
 }
 
@@ -1792,7 +1844,52 @@  static int arm_smmu_id_size_to_bits(int size)
 	}
 }
 
-static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
+static int arm_smmu_init_clocks(struct arm_smmu_device *smmu)
+{
+	const char *cname;
+	struct property *prop;
+	int i;
+	struct device *dev = smmu->dev;
+
+	smmu->num_clocks = of_property_count_strings(dev->of_node,
+						"clock-names");
+
+	if (!smmu->num_clocks)
+		return 0;
+
+	smmu->clocks = devm_kzalloc(
+		dev, sizeof(*smmu->clocks) * smmu->num_clocks,
+		GFP_KERNEL);
+
+	if (!smmu->clocks) {
+		dev_err(dev,
+			"Failed to allocate memory for clocks\n");
+		return -ENODEV;
+	}
+
+	i = 0;
+	of_property_for_each_string(dev->of_node, "clock-names",
+				prop, cname) {
+		struct clk *c = devm_clk_get(dev, cname);
+		if (IS_ERR(c)) {
+			dev_err(dev, "Couldn't get clock: %s",
+				cname);
+			return -ENODEV;
+		}
+
+		if (clk_get_rate(c) == 0) {
+			long rate = clk_round_rate(c, 1000);
+			clk_set_rate(c, rate);
+		}
+
+		smmu->clocks[i] = c;
+
+		++i;
+	}
+	return 0;
+}
+
+int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
 {
 	unsigned long size;
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
@@ -2027,10 +2124,16 @@  static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	}
 	dev_notice(dev, "registered %d master devices\n", i);
 
-	err = arm_smmu_device_cfg_probe(smmu);
+	err = arm_smmu_init_clocks(smmu);
 	if (err)
 		goto out_put_masters;
 
+	arm_smmu_enable_clocks(smmu);
+
+	err = arm_smmu_device_cfg_probe(smmu);
+	if (err)
+		goto out_disable_clocks;
+
 	parse_driver_options(smmu);
 
 	if (smmu->version > 1 &&
@@ -2039,7 +2142,7 @@  static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 			"found only %d context interrupt(s) but %d required\n",
 			smmu->num_context_irqs, smmu->num_context_banks);
 		err = -ENODEV;
-		goto out_put_masters;
+		goto out_disable_clocks;
 	}
 
 	for (i = 0; i < smmu->num_global_irqs; ++i) {
@@ -2061,12 +2164,16 @@  static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 	spin_unlock(&arm_smmu_devices_lock);
 
 	arm_smmu_device_reset(smmu);
+	arm_smmu_disable_clocks(smmu);
 	return 0;
 
 out_free_irqs:
 	while (i--)
 		free_irq(smmu->irqs[i], smmu);
 
+out_disable_clocks:
+	arm_smmu_disable_clocks(smmu);
+
 out_put_masters:
 	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
 		struct arm_smmu_master *master
@@ -2110,7 +2217,9 @@  static int arm_smmu_device_remove(struct platform_device *pdev)
 		free_irq(smmu->irqs[i], smmu);
 
 	/* Turn the thing off */
+	arm_smmu_enable_clocks(smmu);
 	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+	arm_smmu_disable_clocks(smmu);
 	return 0;
 }