diff mbox

[1/3] ARM: tegra: add EMC clock scaling API

Message ID 1355516086-11116-2-git-send-email-dev@lynxeye.de
State Changes Requested, archived
Headers show

Commit Message

Lucas Stach Dec. 14, 2012, 8:14 p.m. UTC
This allows memory clients to collaboratively control the EMC
performance.
Each client may set its performance demands. EMC driver then tries to
find a DRAM performance level which is able to statisfy all those
demands.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
---
 arch/arm/mach-tegra/tegra2_emc.c       | 84 ++++++++++++++++++++++++++++++++--
 include/memory/tegra_emc_performance.h | 79 ++++++++++++++++++++++++++++++++
 2 Dateien geändert, 160 Zeilen hinzugefügt(+), 3 Zeilen entfernt(-)
 create mode 100644 include/memory/tegra_emc_performance.h

Comments

Mark Zhang Dec. 17, 2012, 11:08 a.m. UTC | #1
If my understanding is right, I think this is a temporary solution.
Because this kind of requirement should be addressed by DVFS subsystem.

Anyway, check the comments below.

On 12/15/2012 04:14 AM, Lucas Stach wrote:
> This allows memory clients to collaboratively control the EMC
> performance.
> Each client may set its performance demands. EMC driver then tries to
> find a DRAM performance level which is able to statisfy all those
> demands.
> 
> Signed-off-by: Lucas Stach <dev@lynxeye.de>
> ---
>  arch/arm/mach-tegra/tegra2_emc.c       | 84 ++++++++++++++++++++++++++++++++--
>  include/memory/tegra_emc_performance.h | 79 ++++++++++++++++++++++++++++++++
>  2 Dateien geändert, 160 Zeilen hinzugefügt(+), 3 Zeilen entfernt(-)
>  create mode 100644 include/memory/tegra_emc_performance.h
> 
> diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c
> index 837c7b9..d2cf7a1 100644
> --- a/arch/arm/mach-tegra/tegra2_emc.c
> +++ b/arch/arm/mach-tegra/tegra2_emc.c
> @@ -15,6 +15,7 @@
>   *
>   */
>  
> +#include <asm/div64.h>
>  #include <linux/kernel.h>
>  #include <linux/device.h>
>  #include <linux/clk.h>
> @@ -24,6 +25,8 @@
>  #include <linux/of.h>
>  #include <linux/platform_device.h>
>  #include <linux/platform_data/tegra_emc.h>
> +#include <linux/types.h>
> +#include <memory/tegra_emc_performance.h>

Why don't you put this file into the directory which "tegra_emc.h"
resides? I think that'll be easy to find and understand.

>  
>  #include "tegra2_emc.h"
>  #include "fuse.h"
> @@ -35,8 +38,16 @@ static bool emc_enable;
>  #endif
>  module_param(emc_enable, bool, 0644);
>  
> +struct emc_superclient_constraint {
> +	int bandwidth;
> +	enum tegra_emc_perf_level perf_level;
> +};
> +
> +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
> +
>  static struct platform_device *emc_pdev;
>  static void __iomem *emc_regbase;
> +static struct clk *emc_clk;

Instead of using global variables, it's better to save it into a driver
private structure. Although this way is easy to write. :)
I think when we start upstream works on DVFS, an emc driver private
structure is needed so we can do this right now.

>  
>  static inline void emc_writel(u32 val, unsigned long addr)
>  {
> @@ -176,6 +187,64 @@ int tegra_emc_set_rate(unsigned long rate)
>  	return 0;
>  }
>  
> +static void tegra_emc_scale_clock(void)
> +{
> +	u64 bandwidth_floor = 0;
> +	enum tegra_emc_perf_level highest_level = TEGRA_EMC_PL_LOW;
> +	unsigned long clock_rate;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(constraints); i++) {
> +		bandwidth_floor += constraints[i].bandwidth;

Get confused here. Why we add all bandwidth up? We should loop all the
bandwidth requests in "constraints" array, find out the largest one,
compare with emc table then finally write the registers and set the emc
clock, isn't it?

> +		if (constraints[i].perf_level > highest_level)
> +			highest_level = constraints[i].perf_level;
> +	}
> +
> +	/*
> +	 * Add some headroom to the bandwidth, we use a conservative 60%
> +	 * DRAM bandwidth efficiency factor
> +	 */
> +	bandwidth_floor *= 5;
> +	do_div(bandwidth_floor, 3);
> +
> +	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */
> +
> +	/* boost clock according to perf level */
> +	switch (highest_level) {
> +	case TEGRA_EMC_PL_MID:
> +		clock_rate += 300000000;
> +		break;
> +	case TEGRA_EMC_PL_HIGH:
> +		clock_rate += 600000000;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	/* cap clock_rate at 600MHz, enough to select highest rate table */
> +	clock_rate = min(600000000UL, clock_rate);
> +
> +	clk_set_rate(emc_clk, clock_rate);
> +}
> +
> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
> +				 int bandwidth)
> +{
> +	constraints[client].bandwidth = bandwidth;
> +
> +	tegra_emc_scale_clock();
> +}
> +EXPORT_SYMBOL_GPL(tegra_emc_request_bandwidth);
> +
> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
> +				  enum tegra_emc_perf_level level)
> +{
> +	constraints[client].perf_level = level;
> +
> +	tegra_emc_scale_clock();
> +}
> +EXPORT_SYMBOL_GPL(tegra_emc_request_perf_level);
> +
>  #ifdef CONFIG_OF
>  static struct device_node *tegra_emc_ramcode_devnode(struct device_node *np)
>  {
> @@ -270,19 +339,17 @@ static struct tegra_emc_pdata *tegra_emc_dt_parse_pdata(
>  
>  static struct tegra_emc_pdata __devinit *tegra_emc_fill_pdata(struct platform_device *pdev)
>  {
> -	struct clk *c = clk_get_sys(NULL, "emc");
>  	struct tegra_emc_pdata *pdata;
>  	unsigned long khz;
>  	int i;
>  
>  	WARN_ON(pdev->dev.platform_data);
> -	BUG_ON(IS_ERR_OR_NULL(c));
>  
>  	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
>  	pdata->tables = devm_kzalloc(&pdev->dev, sizeof(*pdata->tables),
>  				     GFP_KERNEL);
>  
> -	pdata->tables[0].rate = clk_get_rate(c) / 2 / 1000;
> +	pdata->tables[0].rate = clk_get_rate(emc_clk) / 2 / 1000;
>  
>  	for (i = 0; i < TEGRA_EMC_NUM_REGS; i++)
>  		pdata->tables[0].regs[i] = emc_readl(emc_reg_addr[i]);
> @@ -306,6 +373,9 @@ static int __devinit tegra_emc_probe(struct platform_device *pdev)
>  		return -ENODEV;
>  	}
>  
> +	emc_clk = clk_get_sys(NULL, "emc");
> +	BUG_ON(IS_ERR_OR_NULL(emc_clk));
> +
>  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>  	if (!res) {
>  		dev_err(&pdev->dev, "missing register base\n");
> @@ -333,6 +403,13 @@ static int __devinit tegra_emc_probe(struct platform_device *pdev)
>  	return 0;
>  }
>  
> +static int tegra_emc_remove(struct platform_device *pdev)
> +{
> +	clk_put(emc_clk);
> +
> +	return 0;
> +}
> +
>  static struct of_device_id tegra_emc_of_match[] __devinitdata = {
>  	{ .compatible = "nvidia,tegra20-emc", },
>  	{ },
> @@ -345,6 +422,7 @@ static struct platform_driver tegra_emc_driver = {
>  		.of_match_table = tegra_emc_of_match,
>  	},
>  	.probe          = tegra_emc_probe,
> +	.remove         = tegra_emc_remove,
>  };
>  
>  static int __init tegra_emc_init(void)
> diff --git a/include/memory/tegra_emc_performance.h b/include/memory/tegra_emc_performance.h
> new file mode 100644
> index 0000000..6c23099
> --- /dev/null
> +++ b/include/memory/tegra_emc_performance.h
> @@ -0,0 +1,79 @@
> +/*
> + * Copyright (C) 2012 Lucas Stach
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef __TEGRA_EMC_PERFORMANCE_H
> +#define __TEGRA_EMC_PERFORMANCE_H
> +
> +enum tegra_emc_superclient {
> +	TEGRA_EMC_SC_AFI = 0,	/* Tegra30 only */
> +	TEGRA_EMC_SC_AVP,
> +	TEGRA_EMC_SC_DC,
> +	TEGRA_EMC_SC_DCB,
> +	TEGRA_EMC_SC_EPP,
> +	TEGRA_EMC_SC_G2,
> +	TEGRA_EMC_SC_HC,
> +	TEGRA_EMC_SC_HDA,	/* Tegra30 only */
> +	TEGRA_EMC_SC_ISP,
> +	TEGRA_EMC_SC_MPCORE,
> +	TEGRA_EMC_SC_MPCORELP,	/* Tegra30 only */
> +	TEGRA_EMC_SC_MPE,
> +	TEGRA_EMC_SC_NV,
> +	TEGRA_EMC_SC_NV2,	/* Tegra30 only */
> +	TEGRA_EMC_SC_PPCS,
> +	TEGRA_EMC_SC_SATA,	/* Tegra30 only */
> +	TEGRA_EMC_SC_VDE,
> +	TEGRA_EMC_SC_VI,
> +
> +	TEGRA_EMC_SC_NUM
> +};
> +
> +enum tegra_emc_perf_level {
> +	TEGRA_EMC_PL_LOW = 0,	/* no boost */
> +	TEGRA_EMC_PL_MID,	/* half frequency boost */
> +	TEGRA_EMC_PL_HIGH	/* max frequency boost */
> +};
> +
> +/**
> + * tegra_emc_request_bandwidth - request bandwidth for isochronous clients
> + * @client: the EMC superclient which requests bandwidth
> + * @bandwidth: needed bandwidth in bytes/s
> + *
> + * Use this function to request bandwidth for isochronous clients, which
> + * need a specific bandwidth to operate properly. EMC will make sure to not
> + * drop below this limit while scaling DRAM frequency.
> + * Superclients have to add up the required bandwidth for their subunits
> + * themselves.
> + */
> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
> +				 int bandwidth);
> +
> +/**
> + * tegra_emc_request_perf_level - request DRAM frequency boost
> + * @client: the EMC superclient which requests boost
> + * @level: the requested boost level
> + *
> + * Use this function to boost DRAM frequency for clients, which don't need a
> + * specific bandwidth but want to scale up DRAM bandwidth to implement
> + * race-to-idle.
> + */
> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
> +				  enum tegra_emc_perf_level level);
> +
> +/* FIXME: Tegra 30 has no EMC driver yet, stub out performance functions */
> +#ifndef CONFIG_ARCH_TEGRA_2x_SOC
> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
> +				 int bandwidth)
> +{
> +}
> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
> +				  enum tegra_emc_perf_level level)
> +{
> +}
> +#endif /* !CONFIG_ARCH_TEGRA_2x_SOC */
> +
> +#endif /* __TEGRA_EMC_PERFORMANCE_H */
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Zhang Dec. 17, 2012, 11:18 a.m. UTC | #2
Sorry forgot one comment: Change the type of variable "bandwidth"(e.g:
in function "tegra_emc_request_bandwidth") from "int" to "unsigned int"
or "unsigned long". It's very likely that "bandwidth" exceeds the range
of type int. Although you use it in correct way but it's better to
declare it as the right type.

Mark
On 12/17/2012 07:08 PM, Mark Zhang wrote:
> If my understanding is right, I think this is a temporary solution.
> Because this kind of requirement should be addressed by DVFS subsystem.
> 
> Anyway, check the comments below.
> 
> On 12/15/2012 04:14 AM, Lucas Stach wrote:
>> This allows memory clients to collaboratively control the EMC
>> performance.
>> Each client may set its performance demands. EMC driver then tries to
>> find a DRAM performance level which is able to statisfy all those
>> demands.
>>
>> Signed-off-by: Lucas Stach <dev@lynxeye.de>
>> ---
>>  arch/arm/mach-tegra/tegra2_emc.c       | 84 ++++++++++++++++++++++++++++++++--
>>  include/memory/tegra_emc_performance.h | 79 ++++++++++++++++++++++++++++++++
>>  2 Dateien geändert, 160 Zeilen hinzugefügt(+), 3 Zeilen entfernt(-)
>>  create mode 100644 include/memory/tegra_emc_performance.h
>>
>> diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c
>> index 837c7b9..d2cf7a1 100644
>> --- a/arch/arm/mach-tegra/tegra2_emc.c
>> +++ b/arch/arm/mach-tegra/tegra2_emc.c
>> @@ -15,6 +15,7 @@
>>   *
>>   */
>>  
>> +#include <asm/div64.h>
>>  #include <linux/kernel.h>
>>  #include <linux/device.h>
>>  #include <linux/clk.h>
>> @@ -24,6 +25,8 @@
>>  #include <linux/of.h>
>>  #include <linux/platform_device.h>
>>  #include <linux/platform_data/tegra_emc.h>
>> +#include <linux/types.h>
>> +#include <memory/tegra_emc_performance.h>
> 
> Why don't you put this file into the directory which "tegra_emc.h"
> resides? I think that'll be easy to find and understand.
> 
>>  
>>  #include "tegra2_emc.h"
>>  #include "fuse.h"
>> @@ -35,8 +38,16 @@ static bool emc_enable;
>>  #endif
>>  module_param(emc_enable, bool, 0644);
>>  
>> +struct emc_superclient_constraint {
>> +	int bandwidth;
>> +	enum tegra_emc_perf_level perf_level;
>> +};
>> +
>> +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
>> +
>>  static struct platform_device *emc_pdev;
>>  static void __iomem *emc_regbase;
>> +static struct clk *emc_clk;
> 
> Instead of using global variables, it's better to save it into a driver
> private structure. Although this way is easy to write. :)
> I think when we start upstream works on DVFS, an emc driver private
> structure is needed so we can do this right now.
> 
>>  
>>  static inline void emc_writel(u32 val, unsigned long addr)
>>  {
>> @@ -176,6 +187,64 @@ int tegra_emc_set_rate(unsigned long rate)
>>  	return 0;
>>  }
>>  
>> +static void tegra_emc_scale_clock(void)
>> +{
>> +	u64 bandwidth_floor = 0;
>> +	enum tegra_emc_perf_level highest_level = TEGRA_EMC_PL_LOW;
>> +	unsigned long clock_rate;
>> +	int i;
>> +
>> +	for (i = 0; i < ARRAY_SIZE(constraints); i++) {
>> +		bandwidth_floor += constraints[i].bandwidth;
> 
> Get confused here. Why we add all bandwidth up? We should loop all the
> bandwidth requests in "constraints" array, find out the largest one,
> compare with emc table then finally write the registers and set the emc
> clock, isn't it?
> 
>> +		if (constraints[i].perf_level > highest_level)
>> +			highest_level = constraints[i].perf_level;
>> +	}
>> +
>> +	/*
>> +	 * Add some headroom to the bandwidth, we use a conservative 60%
>> +	 * DRAM bandwidth efficiency factor
>> +	 */
>> +	bandwidth_floor *= 5;
>> +	do_div(bandwidth_floor, 3);
>> +
>> +	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */
>> +
>> +	/* boost clock according to perf level */
>> +	switch (highest_level) {
>> +	case TEGRA_EMC_PL_MID:
>> +		clock_rate += 300000000;
>> +		break;
>> +	case TEGRA_EMC_PL_HIGH:
>> +		clock_rate += 600000000;
>> +		break;
>> +	default:
>> +		break;
>> +	}
>> +
>> +	/* cap clock_rate at 600MHz, enough to select highest rate table */
>> +	clock_rate = min(600000000UL, clock_rate);
>> +
>> +	clk_set_rate(emc_clk, clock_rate);
>> +}
>> +
>> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
>> +				 int bandwidth)
>> +{
>> +	constraints[client].bandwidth = bandwidth;
>> +
>> +	tegra_emc_scale_clock();
>> +}
>> +EXPORT_SYMBOL_GPL(tegra_emc_request_bandwidth);
>> +
>> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
>> +				  enum tegra_emc_perf_level level)
>> +{
>> +	constraints[client].perf_level = level;
>> +
>> +	tegra_emc_scale_clock();
>> +}
>> +EXPORT_SYMBOL_GPL(tegra_emc_request_perf_level);
>> +
>>  #ifdef CONFIG_OF
>>  static struct device_node *tegra_emc_ramcode_devnode(struct device_node *np)
>>  {
>> @@ -270,19 +339,17 @@ static struct tegra_emc_pdata *tegra_emc_dt_parse_pdata(
>>  
>>  static struct tegra_emc_pdata __devinit *tegra_emc_fill_pdata(struct platform_device *pdev)
>>  {
>> -	struct clk *c = clk_get_sys(NULL, "emc");
>>  	struct tegra_emc_pdata *pdata;
>>  	unsigned long khz;
>>  	int i;
>>  
>>  	WARN_ON(pdev->dev.platform_data);
>> -	BUG_ON(IS_ERR_OR_NULL(c));
>>  
>>  	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
>>  	pdata->tables = devm_kzalloc(&pdev->dev, sizeof(*pdata->tables),
>>  				     GFP_KERNEL);
>>  
>> -	pdata->tables[0].rate = clk_get_rate(c) / 2 / 1000;
>> +	pdata->tables[0].rate = clk_get_rate(emc_clk) / 2 / 1000;
>>  
>>  	for (i = 0; i < TEGRA_EMC_NUM_REGS; i++)
>>  		pdata->tables[0].regs[i] = emc_readl(emc_reg_addr[i]);
>> @@ -306,6 +373,9 @@ static int __devinit tegra_emc_probe(struct platform_device *pdev)
>>  		return -ENODEV;
>>  	}
>>  
>> +	emc_clk = clk_get_sys(NULL, "emc");
>> +	BUG_ON(IS_ERR_OR_NULL(emc_clk));
>> +
>>  	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>>  	if (!res) {
>>  		dev_err(&pdev->dev, "missing register base\n");
>> @@ -333,6 +403,13 @@ static int __devinit tegra_emc_probe(struct platform_device *pdev)
>>  	return 0;
>>  }
>>  
>> +static int tegra_emc_remove(struct platform_device *pdev)
>> +{
>> +	clk_put(emc_clk);
>> +
>> +	return 0;
>> +}
>> +
>>  static struct of_device_id tegra_emc_of_match[] __devinitdata = {
>>  	{ .compatible = "nvidia,tegra20-emc", },
>>  	{ },
>> @@ -345,6 +422,7 @@ static struct platform_driver tegra_emc_driver = {
>>  		.of_match_table = tegra_emc_of_match,
>>  	},
>>  	.probe          = tegra_emc_probe,
>> +	.remove         = tegra_emc_remove,
>>  };
>>  
>>  static int __init tegra_emc_init(void)
>> diff --git a/include/memory/tegra_emc_performance.h b/include/memory/tegra_emc_performance.h
>> new file mode 100644
>> index 0000000..6c23099
>> --- /dev/null
>> +++ b/include/memory/tegra_emc_performance.h
>> @@ -0,0 +1,79 @@
>> +/*
>> + * Copyright (C) 2012 Lucas Stach
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#ifndef __TEGRA_EMC_PERFORMANCE_H
>> +#define __TEGRA_EMC_PERFORMANCE_H
>> +
>> +enum tegra_emc_superclient {
>> +	TEGRA_EMC_SC_AFI = 0,	/* Tegra30 only */
>> +	TEGRA_EMC_SC_AVP,
>> +	TEGRA_EMC_SC_DC,
>> +	TEGRA_EMC_SC_DCB,
>> +	TEGRA_EMC_SC_EPP,
>> +	TEGRA_EMC_SC_G2,
>> +	TEGRA_EMC_SC_HC,
>> +	TEGRA_EMC_SC_HDA,	/* Tegra30 only */
>> +	TEGRA_EMC_SC_ISP,
>> +	TEGRA_EMC_SC_MPCORE,
>> +	TEGRA_EMC_SC_MPCORELP,	/* Tegra30 only */
>> +	TEGRA_EMC_SC_MPE,
>> +	TEGRA_EMC_SC_NV,
>> +	TEGRA_EMC_SC_NV2,	/* Tegra30 only */
>> +	TEGRA_EMC_SC_PPCS,
>> +	TEGRA_EMC_SC_SATA,	/* Tegra30 only */
>> +	TEGRA_EMC_SC_VDE,
>> +	TEGRA_EMC_SC_VI,
>> +
>> +	TEGRA_EMC_SC_NUM
>> +};
>> +
>> +enum tegra_emc_perf_level {
>> +	TEGRA_EMC_PL_LOW = 0,	/* no boost */
>> +	TEGRA_EMC_PL_MID,	/* half frequency boost */
>> +	TEGRA_EMC_PL_HIGH	/* max frequency boost */
>> +};
>> +
>> +/**
>> + * tegra_emc_request_bandwidth - request bandwidth for isochronous clients
>> + * @client: the EMC superclient which requests bandwidth
>> + * @bandwidth: needed bandwidth in bytes/s
>> + *
>> + * Use this function to request bandwidth for isochronous clients, which
>> + * need a specific bandwidth to operate properly. EMC will make sure to not
>> + * drop below this limit while scaling DRAM frequency.
>> + * Superclients have to add up the required bandwidth for their subunits
>> + * themselves.
>> + */
>> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
>> +				 int bandwidth);
>> +
>> +/**
>> + * tegra_emc_request_perf_level - request DRAM frequency boost
>> + * @client: the EMC superclient which requests boost
>> + * @level: the requested boost level
>> + *
>> + * Use this function to boost DRAM frequency for clients, which don't need a
>> + * specific bandwidth but want to scale up DRAM bandwidth to implement
>> + * race-to-idle.
>> + */
>> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
>> +				  enum tegra_emc_perf_level level);
>> +
>> +/* FIXME: Tegra 30 has no EMC driver yet, stub out performance functions */
>> +#ifndef CONFIG_ARCH_TEGRA_2x_SOC
>> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
>> +				 int bandwidth)
>> +{
>> +}
>> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
>> +				  enum tegra_emc_perf_level level)
>> +{
>> +}
>> +#endif /* !CONFIG_ARCH_TEGRA_2x_SOC */
>> +
>> +#endif /* __TEGRA_EMC_PERFORMANCE_H */
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Dec. 17, 2012, 11:22 a.m. UTC | #3
Hi Mark,

Am Montag, den 17.12.2012, 19:08 +0800 schrieb Mark Zhang:
> If my understanding is right, I think this is a temporary solution.
> Because this kind of requirement should be addressed by DVFS subsystem.
> 
Yes, this can go away, once proper DVFS is added. But maybe it can also
be an example of how things could be done there.

> Anyway, check the comments below.
> 
> On 12/15/2012 04:14 AM, Lucas Stach wrote:
> > This allows memory clients to collaboratively control the EMC
> > performance.
> > Each client may set its performance demands. EMC driver then tries to
> > find a DRAM performance level which is able to statisfy all those
> > demands.
> > 
> > Signed-off-by: Lucas Stach <dev@lynxeye.de>
> > ---
> >  arch/arm/mach-tegra/tegra2_emc.c       | 84 ++++++++++++++++++++++++++++++++--
> >  include/memory/tegra_emc_performance.h | 79 ++++++++++++++++++++++++++++++++
> >  2 Dateien geändert, 160 Zeilen hinzugefügt(+), 3 Zeilen entfernt(-)
> >  create mode 100644 include/memory/tegra_emc_performance.h
> > 
> > diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c
> > index 837c7b9..d2cf7a1 100644
> > --- a/arch/arm/mach-tegra/tegra2_emc.c
> > +++ b/arch/arm/mach-tegra/tegra2_emc.c
> > @@ -15,6 +15,7 @@
> >   *
> >   */
> >  
> > +#include <asm/div64.h>
> >  #include <linux/kernel.h>
> >  #include <linux/device.h>
> >  #include <linux/clk.h>
> > @@ -24,6 +25,8 @@
> >  #include <linux/of.h>
> >  #include <linux/platform_device.h>
> >  #include <linux/platform_data/tegra_emc.h>
> > +#include <linux/types.h>
> > +#include <memory/tegra_emc_performance.h>
> 
> Why don't you put this file into the directory which "tegra_emc.h"
> resides? I think that'll be easy to find and understand.
> 
Because my understanding is that mach-tegra is not the right place for
include files that are used by normal drivers and not just the platform
code.

> >  
> >  #include "tegra2_emc.h"
> >  #include "fuse.h"
> > @@ -35,8 +38,16 @@ static bool emc_enable;
> >  #endif
> >  module_param(emc_enable, bool, 0644);
> >  
> > +struct emc_superclient_constraint {
> > +	int bandwidth;
> > +	enum tegra_emc_perf_level perf_level;
> > +};
> > +
> > +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
> > +
> >  static struct platform_device *emc_pdev;
> >  static void __iomem *emc_regbase;
> > +static struct clk *emc_clk;
> 
> Instead of using global variables, it's better to save it into a driver
> private structure. Although this way is easy to write. :)
> I think when we start upstream works on DVFS, an emc driver private
> structure is needed so we can do this right now.
> 
I can certainly do this. It will increase the churn of the patch a bit,
but I think your concern is valid and I'll address it in V2.

> >  
> >  static inline void emc_writel(u32 val, unsigned long addr)
> >  {
> > @@ -176,6 +187,64 @@ int tegra_emc_set_rate(unsigned long rate)
> >  	return 0;
> >  }
> >  
> > +static void tegra_emc_scale_clock(void)
> > +{
> > +	u64 bandwidth_floor = 0;
> > +	enum tegra_emc_perf_level highest_level = TEGRA_EMC_PL_LOW;
> > +	unsigned long clock_rate;
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(constraints); i++) {
> > +		bandwidth_floor += constraints[i].bandwidth;
> 
> Get confused here. Why we add all bandwidth up? We should loop all the
> bandwidth requests in "constraints" array, find out the largest one,
> compare with emc table then finally write the registers and set the emc
> clock, isn't it?
> 
No, bandwidth constraints add up. Imagine two display clients scanning
out at the same time. Both on their own may be satisfied with 83MHz DRAM
clock, but if they are running at the same time you have to account for
that and maybe bump DRAM freq to 133MHz.

Regards,
Lucas


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Dec. 17, 2012, 11:23 a.m. UTC | #4
Am Montag, den 17.12.2012, 19:18 +0800 schrieb Mark Zhang:
> Sorry forgot one comment: Change the type of variable "bandwidth"(e.g:
> in function "tegra_emc_request_bandwidth") from "int" to "unsigned int"
> or "unsigned long". It's very likely that "bandwidth" exceeds the range
> of type int. Although you use it in correct way but it's better to
> declare it as the right type.
> 
I wouldn't call more than 2GB/s bandwidth for a single client likely,
but yes I'll fix it up.




--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren Dec. 17, 2012, 9:23 p.m. UTC | #5
On 12/14/2012 01:14 PM, Lucas Stach wrote:
> This allows memory clients to collaboratively control the EMC
> performance.
> Each client may set its performance demands. EMC driver then tries to
> find a DRAM performance level which is able to statisfy all those
> demands.

> diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c

> +struct emc_superclient_constraint {

Why "superclient" not just "client"?

> +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];

This way, the EMC driver must know all the possible clients. Wouldn't it
be better to maintain some kind of dynamic list/... so drivers can just
add/remove to/from this list. I'm not familiar with other
perf/constraints APIs already in the kernel, but I /think/ that's how
they tend to work. This would also mean not having to update the EMC
driver for new Tegra SoCs if all that changed was the set of clients
attached to the EMC.

> +static void tegra_emc_scale_clock(void)

> +	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */

That assumes a 32-bit SDRAM interface. I'm sure that won't always be
true. Perhaps we should invent a #define for this so it stands out
slightly more if/when this needs to change later.

> +	switch (highest_level) {
> +	case TEGRA_EMC_PL_MID:
> +		clock_rate += 300000000;
> +		break;
> +	case TEGRA_EMC_PL_HIGH:
> +		clock_rate += 600000000;
> +		break;
> +	default:
> +		break;
> +	}
> +
> +	/* cap clock_rate at 600MHz, enough to select highest rate table */
> +	clock_rate = min(600000000UL, clock_rate);

I think we should parameterize the 600MHz max clock rate, and perhaps
even get it from some kind of clk_get_max_rate() API; this 600MHz value
is highly unlikely to be invariant across SoC versions.

> +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
> +				 int bandwidth)
> +{
> +	constraints[client].bandwidth = bandwidth;
> +
> +	tegra_emc_scale_clock();

Locking?

> +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
> +				  enum tegra_emc_perf_level level)
> +{
> +	constraints[client].perf_level = level;
> +
> +	tegra_emc_scale_clock();

Locking?

>  static struct tegra_emc_pdata __devinit *tegra_emc_fill_pdata(struct platform_device *pdev)

> +	emc_clk = clk_get_sys(NULL, "emc");

Is there a devm_clk_get_sys()? That would avoid the need to add
tegra_emc_remove().

Can we use (devm_)clk_get() instead, so we don't have to hard-code clock
names? That will help when clocks are in DT.

> diff --git a/include/memory/tegra_emc_performance.h b/include/memory/tegra_emc_performance.h

tegra_emc.h seems a better/shorter name.

BTW, did you check our downstream Android kernel to see what we already
have for EMC scaling constraints? It might provide some useful
ideas/background.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Dec. 17, 2012, 9:43 p.m. UTC | #6
Am Montag, den 17.12.2012, 14:23 -0700 schrieb Stephen Warren:
> On 12/14/2012 01:14 PM, Lucas Stach wrote:
> > This allows memory clients to collaboratively control the EMC
> > performance.
> > Each client may set its performance demands. EMC driver then tries to
> > find a DRAM performance level which is able to statisfy all those
> > demands.
> 
> > diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c
> 
> > +struct emc_superclient_constraint {
> 
> Why "superclient" not just "client"?
> 
Because I would like to keep this API at the superclient level. For
example the display engines have 5 memory clients each, which interact
in different ways when computing required bandwidth. I would like to
keep the complexity of calculating the bandwidth requirements in the
device specific drivers and not add all this to the EMC API.

Superclient is the term used in TRM to describe the collection of memory
clients used by one engine.

> > +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
> 
> This way, the EMC driver must know all the possible clients. Wouldn't it
> be better to maintain some kind of dynamic list/... so drivers can just
> add/remove to/from this list. I'm not familiar with other
> perf/constraints APIs already in the kernel, but I /think/ that's how
> they tend to work. This would also mean not having to update the EMC
> driver for new Tegra SoCs if all that changed was the set of clients
> attached to the EMC.
> 
This will make things a fair bit more complex, but I'll happily make
this change if you are okay with the general direction of the approach.

> > +static void tegra_emc_scale_clock(void)
> 
> > +	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */
> 
> That assumes a 32-bit SDRAM interface. I'm sure that won't always be
> true. Perhaps we should invent a #define for this so it stands out
> slightly more if/when this needs to change later.
> 
Hm, maybe we can add a DT property within the EMC node for this.

> > +	switch (highest_level) {
> > +	case TEGRA_EMC_PL_MID:
> > +		clock_rate += 300000000;
> > +		break;
> > +	case TEGRA_EMC_PL_HIGH:
> > +		clock_rate += 600000000;
> > +		break;
> > +	default:
> > +		break;
> > +	}
> > +
> > +	/* cap clock_rate at 600MHz, enough to select highest rate table */
> > +	clock_rate = min(600000000UL, clock_rate);
> 
> I think we should parameterize the 600MHz max clock rate, and perhaps
> even get it from some kind of clk_get_max_rate() API; this 600MHz value
> is highly unlikely to be invariant across SoC versions.
> 
Yeah, I think I'll just look at the provided tables and take highest
table rate for the high boost and half of that for the mid boost.

> > +void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
> > +				 int bandwidth)
> > +{
> > +	constraints[client].bandwidth = bandwidth;
> > +
> > +	tegra_emc_scale_clock();
> 
> Locking?
> 
Right.

> > +void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
> > +				  enum tegra_emc_perf_level level)
> > +{
> > +	constraints[client].perf_level = level;
> > +
> > +	tegra_emc_scale_clock();
> 
> Locking?
> 
> >  static struct tegra_emc_pdata __devinit *tegra_emc_fill_pdata(struct platform_device *pdev)
> 
> > +	emc_clk = clk_get_sys(NULL, "emc");
> 
> Is there a devm_clk_get_sys()? That would avoid the need to add
> tegra_emc_remove().
> 
> Can we use (devm_)clk_get() instead, so we don't have to hard-code clock
> names? That will help when clocks are in DT.
> 
Ok, I'll look into this.

> > diff --git a/include/memory/tegra_emc_performance.h b/include/memory/tegra_emc_performance.h
> 
> tegra_emc.h seems a better/shorter name.
> 
> BTW, did you check our downstream Android kernel to see what we already
> have for EMC scaling constraints? It might provide some useful
> ideas/background.
> 
Only very briefly. It seemed that Tegra 20 isn't really implemented
downstream. And I think the idea of this EMC clock scaling is easy
enough to not require too much thought. Most of the complexity has to
live within the drivers which make use of this API and for this we might
want to look at what has been done downstream.


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Warren Dec. 17, 2012, 9:57 p.m. UTC | #7
On 12/17/2012 02:43 PM, Lucas Stach wrote:
> Am Montag, den 17.12.2012, 14:23 -0700 schrieb Stephen Warren:
>> On 12/14/2012 01:14 PM, Lucas Stach wrote:
>>> This allows memory clients to collaboratively control the EMC
>>> performance.
>>> Each client may set its performance demands. EMC driver then tries to
>>> find a DRAM performance level which is able to statisfy all those
>>> demands.

>>> +static void tegra_emc_scale_clock(void)
>>
>>> +	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */
>>
>> That assumes a 32-bit SDRAM interface. I'm sure that won't always be
>> true. Perhaps we should invent a #define for this so it stands out
>> slightly more if/when this needs to change later.
>>
> Hm, maybe we can add a DT property within the EMC node for this.

I expect you can derive the memory bus width from a combination of the
HW version and current register settings.
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Zhang Dec. 18, 2012, 8:09 a.m. UTC | #8
On 12/17/2012 07:22 PM, Lucas Stach wrote:
> Hi Mark,
> 
> Am Montag, den 17.12.2012, 19:08 +0800 schrieb Mark Zhang:
>> If my understanding is right, I think this is a temporary solution.
>> Because this kind of requirement should be addressed by DVFS subsystem.
>>
> Yes, this can go away, once proper DVFS is added. But maybe it can also
> be an example of how things could be done there.
[...]
> 
>>>  #include <linux/platform_device.h>
>>>  #include <linux/platform_data/tegra_emc.h>
>>> +#include <linux/types.h>
>>> +#include <memory/tegra_emc_performance.h>
>>
>> Why don't you put this file into the directory which "tegra_emc.h"
>> resides? I think that'll be easy to find and understand.
>>
> Because my understanding is that mach-tegra is not the right place for
> include files that are used by normal drivers and not just the platform
> code.
> 

Tegra emc driver is not "normal" driver, it's highly tegra related.
"tegra_emc_performance.h" is also tegra related so I think it's not good
to put it into a generic directory, just like "include/memory" here.

>>>  
>>>  #include "tegra2_emc.h"
>>>  #include "fuse.h"
>>> @@ -35,8 +38,16 @@ static bool emc_enable;
>>>  #endif
>>>  module_param(emc_enable, bool, 0644);
>>>  
>>> +struct emc_superclient_constraint {
>>> +	int bandwidth;
>>> +	enum tegra_emc_perf_level perf_level;
>>> +};
>>> +
>>> +static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
>>> +
>>>  static struct platform_device *emc_pdev;
>>>  static void __iomem *emc_regbase;
>>> +static struct clk *emc_clk;
>>
>> Instead of using global variables, it's better to save it into a driver
>> private structure. Although this way is easy to write. :)
>> I think when we start upstream works on DVFS, an emc driver private
>> structure is needed so we can do this right now.
>>
> I can certainly do this. It will increase the churn of the patch a bit,
> but I think your concern is valid and I'll address it in V2.
> 
>>>  
>>>  static inline void emc_writel(u32 val, unsigned long addr)
>>>  {
>>> @@ -176,6 +187,64 @@ int tegra_emc_set_rate(unsigned long rate)
>>>  	return 0;
>>>  }
>>>  
>>> +static void tegra_emc_scale_clock(void)
>>> +{
>>> +	u64 bandwidth_floor = 0;
>>> +	enum tegra_emc_perf_level highest_level = TEGRA_EMC_PL_LOW;
>>> +	unsigned long clock_rate;
>>> +	int i;
>>> +
>>> +	for (i = 0; i < ARRAY_SIZE(constraints); i++) {
>>> +		bandwidth_floor += constraints[i].bandwidth;
>>
>> Get confused here. Why we add all bandwidth up? We should loop all the
>> bandwidth requests in "constraints" array, find out the largest one,
>> compare with emc table then finally write the registers and set the emc
>> clock, isn't it?
>>
> No, bandwidth constraints add up. Imagine two display clients scanning
> out at the same time. Both on their own may be satisfied with 83MHz DRAM
> clock, but if they are running at the same time you have to account for
> that and maybe bump DRAM freq to 133MHz.

Ah, yes, this is correct for dc. I skimmed the downstream kernel codes
and found that there are different logics for different clients. For DC,
their bandwidth request should be added up. For other clients, e.g: USB,
set to max bandwidth request is OK.

So we may do more works on this part later, to write a more accurate
algorithms to get the final emc rate. For now, it's OK.

> 
> Regards,
> Lucas
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Dec. 18, 2012, 9:11 a.m. UTC | #9
Am Dienstag, den 18.12.2012, 16:09 +0800 schrieb Mark Zhang:
[...]
> >> Why don't you put this file into the directory which "tegra_emc.h"
> >> resides? I think that'll be easy to find and understand.
> >>
> > Because my understanding is that mach-tegra is not the right place for
> > include files that are used by normal drivers and not just the platform
> > code.
> > 
> 
> Tegra emc driver is not "normal" driver, it's highly tegra related.
> "tegra_emc_performance.h" is also tegra related so I think it's not good
> to put it into a generic directory, just like "include/memory" here.
> 
In fact it is a normal device driver. There is nothing specific to the
platform in there. Citing from [1]: "Device drivers for things, even if
they are only found on the ARM architecture, or even only on your
machine, do not go in these directories, but in the relevant parts of
the main tree; usually linux/drivers/, but sometimes linux/fs
or /linux/net if there are filesystems, partition types or network
protocols specific to your device."

So honestly I don't even know why the EMC driver is located in
mach-tegra. This may have been historically the right place, as EMC was
intertwined with CPUfreq, which is in fact platform specific. But that
point is moot now.

[...]
> >>
> >> Get confused here. Why we add all bandwidth up? We should loop all the
> >> bandwidth requests in "constraints" array, find out the largest one,
> >> compare with emc table then finally write the registers and set the emc
> >> clock, isn't it?
> >>
> > No, bandwidth constraints add up. Imagine two display clients scanning
> > out at the same time. Both on their own may be satisfied with 83MHz DRAM
> > clock, but if they are running at the same time you have to account for
> > that and maybe bump DRAM freq to 133MHz.
> 
> Ah, yes, this is correct for dc. I skimmed the downstream kernel codes
> and found that there are different logics for different clients. For DC,
> their bandwidth request should be added up. For other clients, e.g: USB,
> set to max bandwidth request is OK.
> 
> So we may do more works on this part later, to write a more accurate
> algorithms to get the final emc rate. For now, it's OK.
> 
I don't see how there could be any difference at the EMC level. You
always want the EMC to deliver enough bandwidth for all registered
clients. You certainly don't want to risk display corruptions just
because USB also demands it's share. This might be a bad example as the
high priority of the DC transaction may kick out USB, but still.

There might be different ways on how clients compute their needed
bandwidth, maybe USB only request bandwidth for one high speed port even
if three ports are connected. But that's the clients decision, not the
EMCs.

Regards,
Lucas

[1] http://www.linux-arm.org/pub/LinuxKernel/WebHome/aleph-porting.pdf

--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Zhang Dec. 18, 2012, 11:50 a.m. UTC | #10
On 12/18/2012 05:11 PM, Lucas Stach wrote:
> Am Dienstag, den 18.12.2012, 16:09 +0800 schrieb Mark Zhang:
> [...]
>>>> Why don't you put this file into the directory which "tegra_emc.h"
>>>> resides? I think that'll be easy to find and understand.
>>>>
>>> Because my understanding is that mach-tegra is not the right place for
>>> include files that are used by normal drivers and not just the platform
>>> code.
>>>
>>
>> Tegra emc driver is not "normal" driver, it's highly tegra related.
>> "tegra_emc_performance.h" is also tegra related so I think it's not good
>> to put it into a generic directory, just like "include/memory" here.
>>
> In fact it is a normal device driver. There is nothing specific to the
> platform in there. Citing from [1]: "Device drivers for things, even if
> they are only found on the ARM architecture, or even only on your
> machine, do not go in these directories, but in the relevant parts of
> the main tree; usually linux/drivers/, but sometimes linux/fs
> or /linux/net if there are filesystems, partition types or network
> protocols specific to your device."
> 
> So honestly I don't even know why the EMC driver is located in
> mach-tegra. This may have been historically the right place, as EMC was
> intertwined with CPUfreq, which is in fact platform specific. But that
> point is moot now.
> 

Okay, learned. Thanks for the explanation.

> [...]
>>>>
>>>> Get confused here. Why we add all bandwidth up? We should loop all the
>>>> bandwidth requests in "constraints" array, find out the largest one,
>>>> compare with emc table then finally write the registers and set the emc
>>>> clock, isn't it?
>>>>
>>> No, bandwidth constraints add up. Imagine two display clients scanning
>>> out at the same time. Both on their own may be satisfied with 83MHz DRAM
>>> clock, but if they are running at the same time you have to account for
>>> that and maybe bump DRAM freq to 133MHz.
>>
>> Ah, yes, this is correct for dc. I skimmed the downstream kernel codes
>> and found that there are different logics for different clients. For DC,
>> their bandwidth request should be added up. For other clients, e.g: USB,
>> set to max bandwidth request is OK.
>>
>> So we may do more works on this part later, to write a more accurate
>> algorithms to get the final emc rate. For now, it's OK.
>>
> I don't see how there could be any difference at the EMC level. You
> always want the EMC to deliver enough bandwidth for all registered
> clients. You certainly don't want to risk display corruptions just
> because USB also demands it's share. This might be a bad example as the
> high priority of the DC transaction may kick out USB, but still.
> 
> There might be different ways on how clients compute their needed
> bandwidth, maybe USB only request bandwidth for one high speed port even
> if three ports are connected. But that's the clients decision, not the
> EMCs.

Make EMC to deliver enough bandwidth is right but if we don't do
something while just adding all requests up will make EMC always running
at the highest rate, because EMC has too many clients. That makes
DVFS/power saving doesn't make sense.

So EMC should do some arbitrary works and this also can't be done in EMC
client side because they don't know the requirements each other.

Maybe I didn't explain above very clearly, you may refer to
"tegra3_clk_shared_bus_update" of tegra30_clocks.c in downstream kernel.

> 
> Regards,
> Lucas
> 
> [1] http://www.linux-arm.org/pub/LinuxKernel/WebHome/aleph-porting.pdf
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lucas Stach Dec. 18, 2012, 12:02 p.m. UTC | #11
Am Dienstag, den 18.12.2012, 19:50 +0800 schrieb Mark Zhang:
[...]
> >>>>
> >>>> Get confused here. Why we add all bandwidth up? We should loop all the
> >>>> bandwidth requests in "constraints" array, find out the largest one,
> >>>> compare with emc table then finally write the registers and set the emc
> >>>> clock, isn't it?
> >>>>
> >>> No, bandwidth constraints add up. Imagine two display clients scanning
> >>> out at the same time. Both on their own may be satisfied with 83MHz DRAM
> >>> clock, but if they are running at the same time you have to account for
> >>> that and maybe bump DRAM freq to 133MHz.
> >>
> >> Ah, yes, this is correct for dc. I skimmed the downstream kernel codes
> >> and found that there are different logics for different clients. For DC,
> >> their bandwidth request should be added up. For other clients, e.g: USB,
> >> set to max bandwidth request is OK.
> >>
> >> So we may do more works on this part later, to write a more accurate
> >> algorithms to get the final emc rate. For now, it's OK.
> >>
> > I don't see how there could be any difference at the EMC level. You
> > always want the EMC to deliver enough bandwidth for all registered
> > clients. You certainly don't want to risk display corruptions just
> > because USB also demands it's share. This might be a bad example as the
> > high priority of the DC transaction may kick out USB, but still.
> > 
> > There might be different ways on how clients compute their needed
> > bandwidth, maybe USB only request bandwidth for one high speed port even
> > if three ports are connected. But that's the clients decision, not the
> > EMCs.
> 
> Make EMC to deliver enough bandwidth is right but if we don't do
> something while just adding all requests up will make EMC always running
> at the highest rate, because EMC has too many clients. That makes
> DVFS/power saving doesn't make sense.
> 
Clients should only request bandwidth from the EMC if they are going to
use it. It isn't too clear from the current patchset, but clients are
responsible to set their bandwidth req back to zero if they don't need
the bw any more to provide the EMC with opportunities to scale back down
again.

For DC we are not using this right now as we are never turning off a DC
at all. But I have some DPMS patches in the making that will show how
this is supposed to work: request bw for the current mode, clear out the
req once you stop scanout.

As explained in the comment at the function prototype you don't request
bw because you may need it to do something fast, but because you need it
and your engine will malfunction if the EMC fails to provide this much
bw. That's why it called the bandwidth floor internally.

Regards,
Lucas


--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Zhang Dec. 19, 2012, 3:21 a.m. UTC | #12
On 12/18/2012 08:02 PM, Lucas Stach wrote:
> Am Dienstag, den 18.12.2012, 19:50 +0800 schrieb Mark Zhang:
> [...]
>>>>>>
>>>>>> Get confused here. Why we add all bandwidth up? We should loop all the
>>>>>> bandwidth requests in "constraints" array, find out the largest one,
>>>>>> compare with emc table then finally write the registers and set the emc
>>>>>> clock, isn't it?
>>>>>>
>>>>> No, bandwidth constraints add up. Imagine two display clients scanning
>>>>> out at the same time. Both on their own may be satisfied with 83MHz DRAM
>>>>> clock, but if they are running at the same time you have to account for
>>>>> that and maybe bump DRAM freq to 133MHz.
>>>>
>>>> Ah, yes, this is correct for dc. I skimmed the downstream kernel codes
>>>> and found that there are different logics for different clients. For DC,
>>>> their bandwidth request should be added up. For other clients, e.g: USB,
>>>> set to max bandwidth request is OK.
>>>>
>>>> So we may do more works on this part later, to write a more accurate
>>>> algorithms to get the final emc rate. For now, it's OK.
>>>>
>>> I don't see how there could be any difference at the EMC level. You
>>> always want the EMC to deliver enough bandwidth for all registered
>>> clients. You certainly don't want to risk display corruptions just
>>> because USB also demands it's share. This might be a bad example as the
>>> high priority of the DC transaction may kick out USB, but still.
>>>
>>> There might be different ways on how clients compute their needed
>>> bandwidth, maybe USB only request bandwidth for one high speed port even
>>> if three ports are connected. But that's the clients decision, not the
>>> EMCs.
>>
>> Make EMC to deliver enough bandwidth is right but if we don't do
>> something while just adding all requests up will make EMC always running
>> at the highest rate, because EMC has too many clients. That makes
>> DVFS/power saving doesn't make sense.
>>
> Clients should only request bandwidth from the EMC if they are going to
> use it. It isn't too clear from the current patchset, but clients are
> responsible to set their bandwidth req back to zero if they don't need
> the bw any more to provide the EMC with opportunities to scale back down
> again.
> 
> For DC we are not using this right now as we are never turning off a DC
> at all. But I have some DPMS patches in the making that will show how
> this is supposed to work: request bw for the current mode, clear out the
> req once you stop scanout.
> 

Correct. But I'm still not convinced that we don't need any strategy
while calculating the EMC clock rate. Just add all bandwidth requests up
seems roughness. I'll try to ask EMC scaling gurus for suggestions.

Mark
> As explained in the comment at the function prototype you don't request
> bw because you may need it to do something fast, but because you need it
> and your engine will malfunction if the EMC fails to provide this much
> bw. That's why it called the bandwidth floor internally.
> 
> Regards,
> Lucas
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-tegra" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/mach-tegra/tegra2_emc.c b/arch/arm/mach-tegra/tegra2_emc.c
index 837c7b9..d2cf7a1 100644
--- a/arch/arm/mach-tegra/tegra2_emc.c
+++ b/arch/arm/mach-tegra/tegra2_emc.c
@@ -15,6 +15,7 @@ 
  *
  */
 
+#include <asm/div64.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/clk.h>
@@ -24,6 +25,8 @@ 
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/tegra_emc.h>
+#include <linux/types.h>
+#include <memory/tegra_emc_performance.h>
 
 #include "tegra2_emc.h"
 #include "fuse.h"
@@ -35,8 +38,16 @@  static bool emc_enable;
 #endif
 module_param(emc_enable, bool, 0644);
 
+struct emc_superclient_constraint {
+	int bandwidth;
+	enum tegra_emc_perf_level perf_level;
+};
+
+static struct emc_superclient_constraint constraints[TEGRA_EMC_SC_NUM];
+
 static struct platform_device *emc_pdev;
 static void __iomem *emc_regbase;
+static struct clk *emc_clk;
 
 static inline void emc_writel(u32 val, unsigned long addr)
 {
@@ -176,6 +187,64 @@  int tegra_emc_set_rate(unsigned long rate)
 	return 0;
 }
 
+static void tegra_emc_scale_clock(void)
+{
+	u64 bandwidth_floor = 0;
+	enum tegra_emc_perf_level highest_level = TEGRA_EMC_PL_LOW;
+	unsigned long clock_rate;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(constraints); i++) {
+		bandwidth_floor += constraints[i].bandwidth;
+		if (constraints[i].perf_level > highest_level)
+			highest_level = constraints[i].perf_level;
+	}
+
+	/*
+	 * Add some headroom to the bandwidth, we use a conservative 60%
+	 * DRAM bandwidth efficiency factor
+	 */
+	bandwidth_floor *= 5;
+	do_div(bandwidth_floor, 3);
+
+	clock_rate = bandwidth_floor >> 2; /* 4byte/clock */
+
+	/* boost clock according to perf level */
+	switch (highest_level) {
+	case TEGRA_EMC_PL_MID:
+		clock_rate += 300000000;
+		break;
+	case TEGRA_EMC_PL_HIGH:
+		clock_rate += 600000000;
+		break;
+	default:
+		break;
+	}
+
+	/* cap clock_rate at 600MHz, enough to select highest rate table */
+	clock_rate = min(600000000UL, clock_rate);
+
+	clk_set_rate(emc_clk, clock_rate);
+}
+
+void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
+				 int bandwidth)
+{
+	constraints[client].bandwidth = bandwidth;
+
+	tegra_emc_scale_clock();
+}
+EXPORT_SYMBOL_GPL(tegra_emc_request_bandwidth);
+
+void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
+				  enum tegra_emc_perf_level level)
+{
+	constraints[client].perf_level = level;
+
+	tegra_emc_scale_clock();
+}
+EXPORT_SYMBOL_GPL(tegra_emc_request_perf_level);
+
 #ifdef CONFIG_OF
 static struct device_node *tegra_emc_ramcode_devnode(struct device_node *np)
 {
@@ -270,19 +339,17 @@  static struct tegra_emc_pdata *tegra_emc_dt_parse_pdata(
 
 static struct tegra_emc_pdata __devinit *tegra_emc_fill_pdata(struct platform_device *pdev)
 {
-	struct clk *c = clk_get_sys(NULL, "emc");
 	struct tegra_emc_pdata *pdata;
 	unsigned long khz;
 	int i;
 
 	WARN_ON(pdev->dev.platform_data);
-	BUG_ON(IS_ERR_OR_NULL(c));
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	pdata->tables = devm_kzalloc(&pdev->dev, sizeof(*pdata->tables),
 				     GFP_KERNEL);
 
-	pdata->tables[0].rate = clk_get_rate(c) / 2 / 1000;
+	pdata->tables[0].rate = clk_get_rate(emc_clk) / 2 / 1000;
 
 	for (i = 0; i < TEGRA_EMC_NUM_REGS; i++)
 		pdata->tables[0].regs[i] = emc_readl(emc_reg_addr[i]);
@@ -306,6 +373,9 @@  static int __devinit tegra_emc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	emc_clk = clk_get_sys(NULL, "emc");
+	BUG_ON(IS_ERR_OR_NULL(emc_clk));
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(&pdev->dev, "missing register base\n");
@@ -333,6 +403,13 @@  static int __devinit tegra_emc_probe(struct platform_device *pdev)
 	return 0;
 }
 
+static int tegra_emc_remove(struct platform_device *pdev)
+{
+	clk_put(emc_clk);
+
+	return 0;
+}
+
 static struct of_device_id tegra_emc_of_match[] __devinitdata = {
 	{ .compatible = "nvidia,tegra20-emc", },
 	{ },
@@ -345,6 +422,7 @@  static struct platform_driver tegra_emc_driver = {
 		.of_match_table = tegra_emc_of_match,
 	},
 	.probe          = tegra_emc_probe,
+	.remove         = tegra_emc_remove,
 };
 
 static int __init tegra_emc_init(void)
diff --git a/include/memory/tegra_emc_performance.h b/include/memory/tegra_emc_performance.h
new file mode 100644
index 0000000..6c23099
--- /dev/null
+++ b/include/memory/tegra_emc_performance.h
@@ -0,0 +1,79 @@ 
+/*
+ * Copyright (C) 2012 Lucas Stach
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __TEGRA_EMC_PERFORMANCE_H
+#define __TEGRA_EMC_PERFORMANCE_H
+
+enum tegra_emc_superclient {
+	TEGRA_EMC_SC_AFI = 0,	/* Tegra30 only */
+	TEGRA_EMC_SC_AVP,
+	TEGRA_EMC_SC_DC,
+	TEGRA_EMC_SC_DCB,
+	TEGRA_EMC_SC_EPP,
+	TEGRA_EMC_SC_G2,
+	TEGRA_EMC_SC_HC,
+	TEGRA_EMC_SC_HDA,	/* Tegra30 only */
+	TEGRA_EMC_SC_ISP,
+	TEGRA_EMC_SC_MPCORE,
+	TEGRA_EMC_SC_MPCORELP,	/* Tegra30 only */
+	TEGRA_EMC_SC_MPE,
+	TEGRA_EMC_SC_NV,
+	TEGRA_EMC_SC_NV2,	/* Tegra30 only */
+	TEGRA_EMC_SC_PPCS,
+	TEGRA_EMC_SC_SATA,	/* Tegra30 only */
+	TEGRA_EMC_SC_VDE,
+	TEGRA_EMC_SC_VI,
+
+	TEGRA_EMC_SC_NUM
+};
+
+enum tegra_emc_perf_level {
+	TEGRA_EMC_PL_LOW = 0,	/* no boost */
+	TEGRA_EMC_PL_MID,	/* half frequency boost */
+	TEGRA_EMC_PL_HIGH	/* max frequency boost */
+};
+
+/**
+ * tegra_emc_request_bandwidth - request bandwidth for isochronous clients
+ * @client: the EMC superclient which requests bandwidth
+ * @bandwidth: needed bandwidth in bytes/s
+ *
+ * Use this function to request bandwidth for isochronous clients, which
+ * need a specific bandwidth to operate properly. EMC will make sure to not
+ * drop below this limit while scaling DRAM frequency.
+ * Superclients have to add up the required bandwidth for their subunits
+ * themselves.
+ */
+void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
+				 int bandwidth);
+
+/**
+ * tegra_emc_request_perf_level - request DRAM frequency boost
+ * @client: the EMC superclient which requests boost
+ * @level: the requested boost level
+ *
+ * Use this function to boost DRAM frequency for clients, which don't need a
+ * specific bandwidth but want to scale up DRAM bandwidth to implement
+ * race-to-idle.
+ */
+void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
+				  enum tegra_emc_perf_level level);
+
+/* FIXME: Tegra 30 has no EMC driver yet, stub out performance functions */
+#ifndef CONFIG_ARCH_TEGRA_2x_SOC
+void tegra_emc_request_bandwidth(enum tegra_emc_superclient client,
+				 int bandwidth)
+{
+}
+void tegra_emc_request_perf_level(enum tegra_emc_superclient client,
+				  enum tegra_emc_perf_level level)
+{
+}
+#endif /* !CONFIG_ARCH_TEGRA_2x_SOC */
+
+#endif /* __TEGRA_EMC_PERFORMANCE_H */