diff mbox series

[v2,47/48] ARM: tegra: ventana: Support CPU voltage scaling and thermal throttling

Message ID 20201217180638.22748-48-digetx@gmail.com
State New
Headers show
Series [v2,01/48] dt-bindings: memory: tegra20: emc: Replace core regulator with power domain | expand

Commit Message

Dmitry Osipenko Dec. 17, 2020, 6:06 p.m. UTC
Enable CPU voltage scaling and thermal throttling on Tegra20 Ventana board.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 arch/arm/boot/dts/tegra20-ventana.dts | 40 ++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

Comments

Daniel Lezcano Dec. 17, 2020, 6:28 p.m. UTC | #1
On 17/12/2020 19:06, Dmitry Osipenko wrote:
> Enable CPU voltage scaling and thermal throttling on Tegra20 Ventana board.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  arch/arm/boot/dts/tegra20-ventana.dts | 40 ++++++++++++++++++++++++++-
>  1 file changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts
> index 14ace2ef749c..c2d9f38960bc 100644
> --- a/arch/arm/boot/dts/tegra20-ventana.dts
> +++ b/arch/arm/boot/dts/tegra20-ventana.dts
> @@ -2,8 +2,10 @@
>  /dts-v1/;
>  
>  #include <dt-bindings/input/input.h>
> +#include <dt-bindings/thermal/thermal.h>
>  #include "tegra20.dtsi"
>  #include "tegra20-cpu-opp.dtsi"
> +#include "tegra20-cpu-opp-microvolt.dtsi"
>  
>  / {
>  	model = "NVIDIA Tegra20 Ventana evaluation board";
> @@ -527,9 +529,10 @@ ldo_rtc {
>  			};
>  		};
>  
> -		temperature-sensor@4c {
> +		nct1008: temperature-sensor@4c {
>  			compatible = "onnn,nct1008";
>  			reg = <0x4c>;
> +			#thermal-sensor-cells = <1>;
>  		};
>  	};
>  
> @@ -615,10 +618,13 @@ clk32k_in: clock@0 {
>  
>  	cpus {
>  		cpu0: cpu@0 {
> +			cpu-supply = <&vdd_cpu>;
>  			operating-points-v2 = <&cpu0_opp_table>;
> +			#cooling-cells = <2>;
>  		};
>  
>  		cpu@1 {
> +			cpu-supply = <&vdd_cpu>;
>  			operating-points-v2 = <&cpu0_opp_table>;
>  		};
>  	};
> @@ -717,4 +723,36 @@ sound {
>  			 <&tegra_car TEGRA20_CLK_CDEV1>;
>  		clock-names = "pll_a", "pll_a_out0", "mclk";
>  	};
> +
> +	thermal-zones {
> +		cpu-thermal {
> +			polling-delay-passive = <1000>; /* milliseconds */
> +			polling-delay = <5000>; /* milliseconds */
> +
> +			thermal-sensors = <&nct1008 1>;
> +
> +			trips {
> +				trip0: cpu-alert0 {
> +					/* start throttling at 50C */
> +					temperature = <50000>;
> +					hysteresis = <200>;

Did you mean <2000> ?

> +					type = "passive";
> +				};
> +
> +				trip1: cpu-crit {
> +					/* shut down at 60C */
> +					temperature = <60000>;
> +					hysteresis = <2000>;

I think you can drop the hysteresis here, when the critical temperature
is reached, there is an emergency shutdown.

50°C and 60°C sound very low values, no ?

> +					type = "critical";
> +				};
> +			};
> +
> +			cooling-maps {
> +				map0 {
> +					trip = <&trip0>;
> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;

You should add all CPUs here.

> +				};
> +			};
> +		};
> +	};
>  };
>
Dmitry Osipenko Dec. 17, 2020, 7:01 p.m. UTC | #2
17.12.2020 21:28, Daniel Lezcano пишет:
> On 17/12/2020 19:06, Dmitry Osipenko wrote:
>> Enable CPU voltage scaling and thermal throttling on Tegra20 Ventana board.
>>
>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>> ---
>>  arch/arm/boot/dts/tegra20-ventana.dts | 40 ++++++++++++++++++++++++++-
>>  1 file changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts
>> index 14ace2ef749c..c2d9f38960bc 100644
>> --- a/arch/arm/boot/dts/tegra20-ventana.dts
>> +++ b/arch/arm/boot/dts/tegra20-ventana.dts
>> @@ -2,8 +2,10 @@
>>  /dts-v1/;
>>  
>>  #include <dt-bindings/input/input.h>
>> +#include <dt-bindings/thermal/thermal.h>
>>  #include "tegra20.dtsi"
>>  #include "tegra20-cpu-opp.dtsi"
>> +#include "tegra20-cpu-opp-microvolt.dtsi"
>>  
>>  / {
>>  	model = "NVIDIA Tegra20 Ventana evaluation board";
>> @@ -527,9 +529,10 @@ ldo_rtc {
>>  			};
>>  		};
>>  
>> -		temperature-sensor@4c {
>> +		nct1008: temperature-sensor@4c {
>>  			compatible = "onnn,nct1008";
>>  			reg = <0x4c>;
>> +			#thermal-sensor-cells = <1>;
>>  		};
>>  	};
>>  
>> @@ -615,10 +618,13 @@ clk32k_in: clock@0 {
>>  
>>  	cpus {
>>  		cpu0: cpu@0 {
>> +			cpu-supply = <&vdd_cpu>;
>>  			operating-points-v2 = <&cpu0_opp_table>;
>> +			#cooling-cells = <2>;
>>  		};
>>  
>>  		cpu@1 {
>> +			cpu-supply = <&vdd_cpu>;
>>  			operating-points-v2 = <&cpu0_opp_table>;
>>  		};
>>  	};
>> @@ -717,4 +723,36 @@ sound {
>>  			 <&tegra_car TEGRA20_CLK_CDEV1>;
>>  		clock-names = "pll_a", "pll_a_out0", "mclk";
>>  	};
>> +
>> +	thermal-zones {
>> +		cpu-thermal {
>> +			polling-delay-passive = <1000>; /* milliseconds */
>> +			polling-delay = <5000>; /* milliseconds */
>> +
>> +			thermal-sensors = <&nct1008 1>;
>> +
>> +			trips {
>> +				trip0: cpu-alert0 {
>> +					/* start throttling at 50C */
>> +					temperature = <50000>;
>> +					hysteresis = <200>;
> 
> Did you mean <2000> ?

The <200> is correct.

Please see this commit for example:

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2b22393b27670b45a2c870bce3df6579efd9a86a

>> +					type = "passive";
>> +				};
>> +
>> +				trip1: cpu-crit {
>> +					/* shut down at 60C */
>> +					temperature = <60000>;
>> +					hysteresis = <2000>;
> 
> I think you can drop the hysteresis here, when the critical temperature
> is reached, there is an emergency shutdown.

Yes, perhaps you're right. Hysteresis doesn't make sense for a critical
trip. I'll improve it in the next revision, thanks.

> 50°C and 60°C sound very low values, no ?

Tegra20 CPU is very cold in comparison to later generations, it should
stay under 50°C even at a full load.

>> +					type = "critical";
>> +				};
>> +			};
>> +
>> +			cooling-maps {
>> +				map0 {
>> +					trip = <&trip0>;
>> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
> 
> You should add all CPUs here.


All CPU cores are coupled on Tegra in regards to CPUFreq, hence I think
it won't make any difference if secondary CPU cores will be added here,
isn't it?
Daniel Lezcano Dec. 17, 2020, 7:36 p.m. UTC | #3
On 17/12/2020 20:01, Dmitry Osipenko wrote:
> 17.12.2020 21:28, Daniel Lezcano пишет:
>> On 17/12/2020 19:06, Dmitry Osipenko wrote:
>>> Enable CPU voltage scaling and thermal throttling on Tegra20 Ventana board.
>>>
>>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>>> ---
>>>  arch/arm/boot/dts/tegra20-ventana.dts | 40 ++++++++++++++++++++++++++-
>>>  1 file changed, 39 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts
>>> index 14ace2ef749c..c2d9f38960bc 100644
>>> --- a/arch/arm/boot/dts/tegra20-ventana.dts
>>> +++ b/arch/arm/boot/dts/tegra20-ventana.dts
>>> @@ -2,8 +2,10 @@
>>>  /dts-v1/;
>>>  
>>>  #include <dt-bindings/input/input.h>
>>> +#include <dt-bindings/thermal/thermal.h>
>>>  #include "tegra20.dtsi"
>>>  #include "tegra20-cpu-opp.dtsi"
>>> +#include "tegra20-cpu-opp-microvolt.dtsi"
>>>  
>>>  / {
>>>  	model = "NVIDIA Tegra20 Ventana evaluation board";
>>> @@ -527,9 +529,10 @@ ldo_rtc {
>>>  			};
>>>  		};
>>>  
>>> -		temperature-sensor@4c {
>>> +		nct1008: temperature-sensor@4c {
>>>  			compatible = "onnn,nct1008";
>>>  			reg = <0x4c>;
>>> +			#thermal-sensor-cells = <1>;
>>>  		};
>>>  	};
>>>  
>>> @@ -615,10 +618,13 @@ clk32k_in: clock@0 {
>>>  
>>>  	cpus {
>>>  		cpu0: cpu@0 {
>>> +			cpu-supply = <&vdd_cpu>;
>>>  			operating-points-v2 = <&cpu0_opp_table>;
>>> +			#cooling-cells = <2>;
>>>  		};
>>>  
>>>  		cpu@1 {
>>> +			cpu-supply = <&vdd_cpu>;
>>>  			operating-points-v2 = <&cpu0_opp_table>;
>>>  		};
>>>  	};
>>> @@ -717,4 +723,36 @@ sound {
>>>  			 <&tegra_car TEGRA20_CLK_CDEV1>;
>>>  		clock-names = "pll_a", "pll_a_out0", "mclk";
>>>  	};
>>> +
>>> +	thermal-zones {
>>> +		cpu-thermal {
>>> +			polling-delay-passive = <1000>; /* milliseconds */
>>> +			polling-delay = <5000>; /* milliseconds */
>>> +
>>> +			thermal-sensors = <&nct1008 1>;
>>> +
>>> +			trips {
>>> +				trip0: cpu-alert0 {
>>> +					/* start throttling at 50C */
>>> +					temperature = <50000>;
>>> +					hysteresis = <200>;
>>
>> Did you mean <2000> ?
> 
> The <200> is correct.
> 
> Please see this commit for example:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=2b22393b27670b45a2c870bce3df6579efd9a86a

Ok, thanks for the pointer.

>>> +					type = "passive";
>>> +				};
>>> +
>>> +				trip1: cpu-crit {
>>> +					/* shut down at 60C */
>>> +					temperature = <60000>;
>>> +					hysteresis = <2000>;
>>
>> I think you can drop the hysteresis here, when the critical temperature
>> is reached, there is an emergency shutdown.
> 
> Yes, perhaps you're right. Hysteresis doesn't make sense for a critical
> trip. I'll improve it in the next revision, thanks.
> 
>> 50°C and 60°C sound very low values, no ?
> 
> Tegra20 CPU is very cold in comparison to later generations, it should
> stay under 50°C even at a full load.

Interesting, thanks.

>>> +					type = "critical";
>>> +				};
>>> +			};
>>> +
>>> +			cooling-maps {
>>> +				map0 {
>>> +					trip = <&trip0>;
>>> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>>
>> You should add all CPUs here.
> 
> 
> All CPU cores are coupled on Tegra in regards to CPUFreq, hence I think
> it won't make any difference if secondary CPU cores will be added here,
> isn't it?

The explanation is in the description of commit ef4734500407ce4d
Dmitry Osipenko Dec. 17, 2020, 8:28 p.m. UTC | #4
17.12.2020 22:36, Daniel Lezcano пишет:
>>>> +					type = "critical";
>>>> +				};
>>>> +			};
>>>> +
>>>> +			cooling-maps {
>>>> +				map0 {
>>>> +					trip = <&trip0>;
>>>> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>>> You should add all CPUs here.
>>
>> All CPU cores are coupled on Tegra in regards to CPUFreq, hence I think
>> it won't make any difference if secondary CPU cores will be added here,
>> isn't it?
> The explanation is in the description of commit ef4734500407ce4d

I think that really only makes sense if CPU cores have independent clock
rate management. IIRC, I actually made some research about this in the
past and intentionally removed the secondary cores from the
cooling-device since they didn't make any difference for a coupled CPU
cores.

That commit also says:

"But as soon as this CPU ordering changes and any other CPU is used to
bring up the cooling device, we will start seeing failures."

I don't quite understand to what "failures" that commit referrers. I
tried to change the cpu0 to cpu1 in the cooling-device and don't see any
failures. Could you please clarify this?

In general it should be fine to add all the cores to the cooling-device
and I'll do it in v3, but I want to make it clear why this is needed.
Daniel Lezcano Dec. 17, 2020, 9:19 p.m. UTC | #5
On 17/12/2020 21:28, Dmitry Osipenko wrote:
> 17.12.2020 22:36, Daniel Lezcano пишет:
>>>>> +					type = "critical";
>>>>> +				};
>>>>> +			};
>>>>> +
>>>>> +			cooling-maps {
>>>>> +				map0 {
>>>>> +					trip = <&trip0>;
>>>>> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>>>> You should add all CPUs here.
>>>
>>> All CPU cores are coupled on Tegra in regards to CPUFreq, hence I think
>>> it won't make any difference if secondary CPU cores will be added here,
>>> isn't it?
>> The explanation is in the description of commit ef4734500407ce4d
> 
> I think that really only makes sense if CPU cores have independent clock
> rate management. 

ATM I did not see any ARM platform having a clock line per CPU but I may
be wrong.

> IIRC, I actually made some research about this in the
> past and intentionally removed the secondary cores from the
> cooling-device since they didn't make any difference for a coupled CPU
> cores.
> 
> That commit also says:
> 
> "But as soon as this CPU ordering changes and any other CPU is used to
> bring up the cooling device, we will start seeing failures."
> 
> I don't quite understand to what "failures" that commit referrers. I
> tried to change the cpu0 to cpu1 in the cooling-device and don't see any
> failures. Could you please clarify this?
> 
> In general it should be fine to add all the cores to the cooling-device
> and I'll do it in v3, but I want to make it clear why this is needed.

AFAIR, if CPU0 is unplugged the cooling device can not rebind to CPU1.
And if CPU0 is plugged in again, the cooling device fails to initialize.

And, if the CPUs are mapped with the physical CPU0 to Linux numbering
CPU1, the cooling device mapping will fail.
Dmitry Osipenko Dec. 17, 2020, 9:56 p.m. UTC | #6
18.12.2020 00:19, Daniel Lezcano пишет:
> On 17/12/2020 21:28, Dmitry Osipenko wrote:
>> 17.12.2020 22:36, Daniel Lezcano пишет:
>>>>>> +					type = "critical";
>>>>>> +				};
>>>>>> +			};
>>>>>> +
>>>>>> +			cooling-maps {
>>>>>> +				map0 {
>>>>>> +					trip = <&trip0>;
>>>>>> +					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
>>>>> You should add all CPUs here.
>>>>
>>>> All CPU cores are coupled on Tegra in regards to CPUFreq, hence I think
>>>> it won't make any difference if secondary CPU cores will be added here,
>>>> isn't it?
>>> The explanation is in the description of commit ef4734500407ce4d
>>
>> I think that really only makes sense if CPU cores have independent clock
>> rate management. 
> 
> ATM I did not see any ARM platform having a clock line per CPU but I may
> be wrong.
> 
>> IIRC, I actually made some research about this in the
>> past and intentionally removed the secondary cores from the
>> cooling-device since they didn't make any difference for a coupled CPU
>> cores.
>>
>> That commit also says:
>>
>> "But as soon as this CPU ordering changes and any other CPU is used to
>> bring up the cooling device, we will start seeing failures."
>>
>> I don't quite understand to what "failures" that commit referrers. I
>> tried to change the cpu0 to cpu1 in the cooling-device and don't see any
>> failures. Could you please clarify this?
>>
>> In general it should be fine to add all the cores to the cooling-device
>> and I'll do it in v3, but I want to make it clear why this is needed.
> 
> AFAIR, if CPU0 is unplugged the cooling device can not rebind to CPU1.
> And if CPU0 is plugged in again, the cooling device fails to initialize.
> 
> And, if the CPUs are mapped with the physical CPU0 to Linux numbering
> CPU1, the cooling device mapping will fail.

Alright, thank you.
diff mbox series

Patch

diff --git a/arch/arm/boot/dts/tegra20-ventana.dts b/arch/arm/boot/dts/tegra20-ventana.dts
index 14ace2ef749c..c2d9f38960bc 100644
--- a/arch/arm/boot/dts/tegra20-ventana.dts
+++ b/arch/arm/boot/dts/tegra20-ventana.dts
@@ -2,8 +2,10 @@ 
 /dts-v1/;
 
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/thermal/thermal.h>
 #include "tegra20.dtsi"
 #include "tegra20-cpu-opp.dtsi"
+#include "tegra20-cpu-opp-microvolt.dtsi"
 
 / {
 	model = "NVIDIA Tegra20 Ventana evaluation board";
@@ -527,9 +529,10 @@  ldo_rtc {
 			};
 		};
 
-		temperature-sensor@4c {
+		nct1008: temperature-sensor@4c {
 			compatible = "onnn,nct1008";
 			reg = <0x4c>;
+			#thermal-sensor-cells = <1>;
 		};
 	};
 
@@ -615,10 +618,13 @@  clk32k_in: clock@0 {
 
 	cpus {
 		cpu0: cpu@0 {
+			cpu-supply = <&vdd_cpu>;
 			operating-points-v2 = <&cpu0_opp_table>;
+			#cooling-cells = <2>;
 		};
 
 		cpu@1 {
+			cpu-supply = <&vdd_cpu>;
 			operating-points-v2 = <&cpu0_opp_table>;
 		};
 	};
@@ -717,4 +723,36 @@  sound {
 			 <&tegra_car TEGRA20_CLK_CDEV1>;
 		clock-names = "pll_a", "pll_a_out0", "mclk";
 	};
+
+	thermal-zones {
+		cpu-thermal {
+			polling-delay-passive = <1000>; /* milliseconds */
+			polling-delay = <5000>; /* milliseconds */
+
+			thermal-sensors = <&nct1008 1>;
+
+			trips {
+				trip0: cpu-alert0 {
+					/* start throttling at 50C */
+					temperature = <50000>;
+					hysteresis = <200>;
+					type = "passive";
+				};
+
+				trip1: cpu-crit {
+					/* shut down at 60C */
+					temperature = <60000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&trip0>;
+					cooling-device = <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
 };