diff mbox series

[04/16] mtd: spi-nor: aspeed: Add read training

Message ID 20191004115919.20788-5-clg@kaod.org
State New, archived
Delegated to: Ambarus Tudor
Headers show
Series mtd: spi-nor: aspeed: AST2600 support and extensions | expand

Commit Message

Cédric Le Goater Oct. 4, 2019, 11:59 a.m. UTC
The read training algorithm first reads a golden buffer at low speed
and then performs reads with different clocks and delay cycles
settings to find the fastest configuration for the chip. The current
implementation is based on the OpenPOWER pflash tool.

For the moment, read training is only activated for SPI controllers as
U-Boot should have done the read training for the FMC controller using
the DMA interface. We also don't limit yet the max frequency, so it's
safer not to be too optimistic on the capabilities of the boot flash.

It can be deactivated at boot time with the kernel parameter :

	aspeed_smc.optimize_read=0

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 drivers/mtd/spi-nor/aspeed-smc.c | 204 +++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)

Comments

Boris Brezillon Oct. 11, 2019, 12:28 p.m. UTC | #1
On Fri,  4 Oct 2019 13:59:07 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> +#define ASPEED_SMC_HCLK_DIV(i) \
> +	(aspeed_smc_hclk_divs[(i) - 1] << CONTROL_CLOCK_FREQ_SEL_SHIFT)
> +
> +static u32 aspeed_smc_default_read(struct aspeed_smc_chip *chip)
> +{
> +	/*
> +	 * Keep the 4Byte address mode on the AST2400 SPI controller.
> +	 * Other controllers set the 4Byte mode in the CE Control
> +	 * Register
> +	 */
> +	u32 ctl_mask = chip->controller->info == &spi_2400_info ?
> +		 CONTROL_IO_ADDRESS_4B : 0;
> +
> +	return (chip->ctl_val[smc_read] & ctl_mask) |
> +		(0x00 << 28) | /* Single bit */
> +		(0x00 << 24) | /* CE# max */
> +		(0x03 << 16) | /* use normal reads */
> +		(0x00 <<  8) | /* HCLK/16 */
> +		(0x00 <<  6) | /* no dummy cycle */
> +		(0x00);        /* normal mode */

IIUC, you're using a SPINOR_OP_READ operation to read the golden
buffer, and if I'm right, you start reading at offset 0 of the dirmap
window (offset 0 in the flash), so basically the first block in the NOR.
What happens if this block is erased? In that case your golden buf will
contain only 0xff values, and the read calibration is likely to be
useless (how can you determine if timings are good when IO pins always
stay high). Don't we have a command that return non-ff/non-0 data while
still being predictable and immutable? Do you expect users to always
flash a pattern that helps calibrating those delays?

> +}
> +
> +static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
> +				    u32 max_freq)
> +{
> +	u8 *golden_buf, *test_buf;
> +	int i, rc, best_div = -1;
> +	u32 save_read_val = chip->ctl_val[smc_read];
> +	u32 ahb_freq = chip->controller->clk_frequency;
> +
> +	dev_dbg(chip->nor.dev, "AHB frequency: %d MHz", ahb_freq / 1000000);
> +
> +	test_buf = kmalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
> +
> +	/* We start with the dumbest setting (keep 4Byte bit) and read
> +	 * some data
> +	 */
> +	chip->ctl_val[smc_read] = aspeed_smc_default_read(chip);
> +
> +	writel(chip->ctl_val[smc_read], chip->ctl);
> +
> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
> +
> +	/* Establish our read mode with freq field set to 0 (HCLK/16) */
> +	chip->ctl_val[smc_read] = save_read_val & 0xfffff0ff;
> +
> +	/* Check if calibration data is suitable */
> +	if (!aspeed_smc_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
> +		dev_info(chip->nor.dev,
> +			 "Calibration area too uniform, using low speed");
> +		writel(chip->ctl_val[smc_read], chip->ctl);
> +		kfree(test_buf);
> +		return 0;
> +	}
> +
> +	/* Now we iterate the HCLK dividers until we find our breaking point */
> +	for (i = ARRAY_SIZE(aspeed_smc_hclk_divs); i > 0; i--) {
> +		u32 tv, freq;
> +
> +		/* Compare timing to max */
> +		freq = ahb_freq / i;
> +		if (freq > max_freq)
> +			continue;
> +
> +		/* Set the timing */
> +		tv = chip->ctl_val[smc_read] | ASPEED_SMC_HCLK_DIV(i);
> +		writel(tv, chip->ctl);
> +		dev_dbg(chip->nor.dev, "Trying HCLK/%d...", i);
> +		rc = aspeed_smc_calibrate_reads(chip, i, golden_buf, test_buf);
> +		if (rc == 0)
> +			best_div = i;
> +	}
> +	kfree(test_buf);
> +
> +	/* Nothing found ? */
> +	if (best_div < 0) {
> +		dev_warn(chip->nor.dev, "No good frequency, using dumb slow");
> +	} else {
> +		dev_dbg(chip->nor.dev, "Found good read timings at HCLK/%d",
> +			best_div);
> +		chip->ctl_val[smc_read] |= ASPEED_SMC_HCLK_DIV(best_div);
> +	}
> +
> +	writel(chip->ctl_val[smc_read], chip->ctl);
> +	return 0;
> +}
Raghavendra, Vignesh Oct. 11, 2019, 1:13 p.m. UTC | #2
Hi Cedric,

On 11/10/19 5:58 PM, Boris Brezillon wrote:
> On Fri,  4 Oct 2019 13:59:07 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> +#define ASPEED_SMC_HCLK_DIV(i) \
>> +	(aspeed_smc_hclk_divs[(i) - 1] << CONTROL_CLOCK_FREQ_SEL_SHIFT)
>> +
>> +static u32 aspeed_smc_default_read(struct aspeed_smc_chip *chip)
>> +{
>> +	/*
>> +	 * Keep the 4Byte address mode on the AST2400 SPI controller.
>> +	 * Other controllers set the 4Byte mode in the CE Control
>> +	 * Register
>> +	 */
>> +	u32 ctl_mask = chip->controller->info == &spi_2400_info ?
>> +		 CONTROL_IO_ADDRESS_4B : 0;
>> +
>> +	return (chip->ctl_val[smc_read] & ctl_mask) |
>> +		(0x00 << 28) | /* Single bit */
>> +		(0x00 << 24) | /* CE# max */
>> +		(0x03 << 16) | /* use normal reads */
>> +		(0x00 <<  8) | /* HCLK/16 */
>> +		(0x00 <<  6) | /* no dummy cycle */
>> +		(0x00);        /* normal mode */
> 
> IIUC, you're using a SPINOR_OP_READ operation to read the golden
> buffer, and if I'm right, you start reading at offset 0 of the dirmap
> window (offset 0 in the flash), so basically the first block in the NOR.
> What happens if this block is erased? In that case your golden buf will
> contain only 0xff values, and the read calibration is likely to be
> useless (how can you determine if timings are good when IO pins always
> stay high). Don't we have a command that return non-ff/non-0 data while
> still being predictable and immutable? Do you expect users to always
> flash a pattern that helps calibrating those delays?
> 

Yes, this is precisely my concern as well. I have been developing
training sequence for cadence-quadspi controller (requirements are
similar to what you have here) and found that its better to use read
only data such as SFDP table data to calibrate. Cadence-quadspi requires
training only in higher performance modes like Quad/Octal DTR mode and
needs 16 bytes of known data for calibration. Hence SFDP works well for
my case.
But the problem here is that, aspeed controller needs 16K of known data.
SFDP table is not that big and read beyond address space is not required
to wrap around.
Wondering if you really need to read 16K amount of data for calibration?

Regards
Vignesh

>> +}
>> +
>> +static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
>> +				    u32 max_freq)
>> +{
>> +	u8 *golden_buf, *test_buf;
>> +	int i, rc, best_div = -1;
>> +	u32 save_read_val = chip->ctl_val[smc_read];
>> +	u32 ahb_freq = chip->controller->clk_frequency;
>> +
>> +	dev_dbg(chip->nor.dev, "AHB frequency: %d MHz", ahb_freq / 1000000);
>> +
>> +	test_buf = kmalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
>> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
>> +
>> +	/* We start with the dumbest setting (keep 4Byte bit) and read
>> +	 * some data
>> +	 */
>> +	chip->ctl_val[smc_read] = aspeed_smc_default_read(chip);
>> +
>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>> +
>> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> +
>> +	/* Establish our read mode with freq field set to 0 (HCLK/16) */
>> +	chip->ctl_val[smc_read] = save_read_val & 0xfffff0ff;
>> +
>> +	/* Check if calibration data is suitable */
>> +	if (!aspeed_smc_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
>> +		dev_info(chip->nor.dev,
>> +			 "Calibration area too uniform, using low speed");
>> +		writel(chip->ctl_val[smc_read], chip->ctl);
>> +		kfree(test_buf);
>> +		return 0;
>> +	}
>> +
>> +	/* Now we iterate the HCLK dividers until we find our breaking point */
>> +	for (i = ARRAY_SIZE(aspeed_smc_hclk_divs); i > 0; i--) {
>> +		u32 tv, freq;
>> +
>> +		/* Compare timing to max */
>> +		freq = ahb_freq / i;
>> +		if (freq > max_freq)
>> +			continue;
>> +
>> +		/* Set the timing */
>> +		tv = chip->ctl_val[smc_read] | ASPEED_SMC_HCLK_DIV(i);
>> +		writel(tv, chip->ctl);
>> +		dev_dbg(chip->nor.dev, "Trying HCLK/%d...", i);
>> +		rc = aspeed_smc_calibrate_reads(chip, i, golden_buf, test_buf);
>> +		if (rc == 0)
>> +			best_div = i;
>> +	}
>> +	kfree(test_buf);
>> +
>> +	/* Nothing found ? */
>> +	if (best_div < 0) {
>> +		dev_warn(chip->nor.dev, "No good frequency, using dumb slow");
>> +	} else {
>> +		dev_dbg(chip->nor.dev, "Found good read timings at HCLK/%d",
>> +			best_div);
>> +		chip->ctl_val[smc_read] |= ASPEED_SMC_HCLK_DIV(best_div);
>> +	}
>> +
>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>> +	return 0;
>> +}
> 
>
Cédric Le Goater Oct. 11, 2019, 1:55 p.m. UTC | #3
On 11/10/2019 14:28, Boris Brezillon wrote:
> On Fri,  4 Oct 2019 13:59:07 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> +#define ASPEED_SMC_HCLK_DIV(i) \
>> +	(aspeed_smc_hclk_divs[(i) - 1] << CONTROL_CLOCK_FREQ_SEL_SHIFT)
>> +
>> +static u32 aspeed_smc_default_read(struct aspeed_smc_chip *chip)
>> +{
>> +	/*
>> +	 * Keep the 4Byte address mode on the AST2400 SPI controller.
>> +	 * Other controllers set the 4Byte mode in the CE Control
>> +	 * Register
>> +	 */
>> +	u32 ctl_mask = chip->controller->info == &spi_2400_info ?
>> +		 CONTROL_IO_ADDRESS_4B : 0;
>> +
>> +	return (chip->ctl_val[smc_read] & ctl_mask) |
>> +		(0x00 << 28) | /* Single bit */
>> +		(0x00 << 24) | /* CE# max */
>> +		(0x03 << 16) | /* use normal reads */
>> +		(0x00 <<  8) | /* HCLK/16 */
>> +		(0x00 <<  6) | /* no dummy cycle */
>> +		(0x00);        /* normal mode */
> 
> IIUC, you're using a SPINOR_OP_READ operation to read the golden
> buffer, and if I'm right, you start reading at offset 0 of the dirmap
> window (offset 0 in the flash), so basically the first block in the NOR.

Yes.

> What happens if this block is erased? In that case your golden buf will
> contain only 0xff values, and the read calibration is likely to be
> useless 

yes. that is why we have the aspeed_smc_check_calib_data() routine to
check that the data read makes some sense. If this is not the case, then :

	 "Calibration area too uniform, using low speed"

> (how can you determine if timings are good when IO pins always
> stay high). Don't we have a command that return non-ff/non-0 data while
> still being predictable and immutable? 

Not that I know of on these controllers.

> Do you expect users to always
> flash a pattern that helps calibrating those delays?

This is the case on the OpenBMC systems, AFAICT. 

u-boot.bin should be the data read on the FMC controller and the 
SPI controller contains the host Firmware which is as random.   

> 
>> +}
>> +
>> +static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
>> +				    u32 max_freq)
>> +{
>> +	u8 *golden_buf, *test_buf;
>> +	int i, rc, best_div = -1;
>> +	u32 save_read_val = chip->ctl_val[smc_read];
>> +	u32 ahb_freq = chip->controller->clk_frequency;
>> +
>> +	dev_dbg(chip->nor.dev, "AHB frequency: %d MHz", ahb_freq / 1000000);
>> +
>> +	test_buf = kmalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
>> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
>> +
>> +	/* We start with the dumbest setting (keep 4Byte bit) and read
>> +	 * some data
>> +	 */
>> +	chip->ctl_val[smc_read] = aspeed_smc_default_read(chip);
>> +
>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>> +
>> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>> +
>> +	/* Establish our read mode with freq field set to 0 (HCLK/16) */
>> +	chip->ctl_val[smc_read] = save_read_val & 0xfffff0ff;
>> +
>> +	/* Check if calibration data is suitable */
>> +	if (!aspeed_smc_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
>> +		dev_info(chip->nor.dev,
>> +			 "Calibration area too uniform, using low speed");
>> +		writel(chip->ctl_val[smc_read], chip->ctl);
>> +		kfree(test_buf);
>> +		return 0;
>> +	}
>> +
>> +	/* Now we iterate the HCLK dividers until we find our breaking point */
>> +	for (i = ARRAY_SIZE(aspeed_smc_hclk_divs); i > 0; i--) {
>> +		u32 tv, freq;
>> +
>> +		/* Compare timing to max */
>> +		freq = ahb_freq / i;
>> +		if (freq > max_freq)
>> +			continue;
>> +
>> +		/* Set the timing */
>> +		tv = chip->ctl_val[smc_read] | ASPEED_SMC_HCLK_DIV(i);
>> +		writel(tv, chip->ctl);
>> +		dev_dbg(chip->nor.dev, "Trying HCLK/%d...", i);
>> +		rc = aspeed_smc_calibrate_reads(chip, i, golden_buf, test_buf);
>> +		if (rc == 0)
>> +			best_div = i;
>> +	}
>> +	kfree(test_buf);
>> +
>> +	/* Nothing found ? */
>> +	if (best_div < 0) {
>> +		dev_warn(chip->nor.dev, "No good frequency, using dumb slow");
>> +	} else {
>> +		dev_dbg(chip->nor.dev, "Found good read timings at HCLK/%d",
>> +			best_div);
>> +		chip->ctl_val[smc_read] |= ASPEED_SMC_HCLK_DIV(best_div);
>> +	}
>> +
>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>> +	return 0;
>> +}
> 
>
Cédric Le Goater Oct. 11, 2019, 2:03 p.m. UTC | #4
On 11/10/2019 15:13, Vignesh Raghavendra wrote:
> Hi Cedric,
> 
> On 11/10/19 5:58 PM, Boris Brezillon wrote:
>> On Fri,  4 Oct 2019 13:59:07 +0200
>> Cédric Le Goater <clg@kaod.org> wrote:
>>
>>> +#define ASPEED_SMC_HCLK_DIV(i) \
>>> +	(aspeed_smc_hclk_divs[(i) - 1] << CONTROL_CLOCK_FREQ_SEL_SHIFT)
>>> +
>>> +static u32 aspeed_smc_default_read(struct aspeed_smc_chip *chip)
>>> +{
>>> +	/*
>>> +	 * Keep the 4Byte address mode on the AST2400 SPI controller.
>>> +	 * Other controllers set the 4Byte mode in the CE Control
>>> +	 * Register
>>> +	 */
>>> +	u32 ctl_mask = chip->controller->info == &spi_2400_info ?
>>> +		 CONTROL_IO_ADDRESS_4B : 0;
>>> +
>>> +	return (chip->ctl_val[smc_read] & ctl_mask) |
>>> +		(0x00 << 28) | /* Single bit */
>>> +		(0x00 << 24) | /* CE# max */
>>> +		(0x03 << 16) | /* use normal reads */
>>> +		(0x00 <<  8) | /* HCLK/16 */
>>> +		(0x00 <<  6) | /* no dummy cycle */
>>> +		(0x00);        /* normal mode */
>>
>> IIUC, you're using a SPINOR_OP_READ operation to read the golden
>> buffer, and if I'm right, you start reading at offset 0 of the dirmap
>> window (offset 0 in the flash), so basically the first block in the NOR.
>> What happens if this block is erased? In that case your golden buf will
>> contain only 0xff values, and the read calibration is likely to be
>> useless (how can you determine if timings are good when IO pins always
>> stay high). Don't we have a command that return non-ff/non-0 data while
>> still being predictable and immutable? Do you expect users to always
>> flash a pattern that helps calibrating those delays?
>>
> 
> Yes, this is precisely my concern as well. I have been developing
> training sequence for cadence-quadspi controller (requirements are
> similar to what you have here) and found that its better to use read
> only data such as SFDP table data to calibrate. Cadence-quadspi requires
> training only in higher performance modes like Quad/Octal DTR mode and
> needs 16 bytes of known data for calibration. Hence SFDP works well for
> my case.

OK. Good to know.

> But the problem here is that, aspeed controller needs 16K of known data.

It's a choice we made on the first P8 systems we had.

> SFDP table is not that big and read beyond address space is not required
> to wrap around.
> Wondering if you really need to read 16K amount of data for calibration?

May be not. I agree this is taking a bit of time as we read 10 times
and compares: 3s on boot time on an ast2400 I think. Joel could tell
better. We could reduce the amount of data read and the number of 
loops surely.  

As for the validity of the data, we check with the golden buffer with
aspeed_smc_check_calib_data().

If it's too uniform, like on a flash never written too, we will need
a first write and a reboot to have faster read speed.

Thanks,

C. 

> 
> Regards
> Vignesh
> 
>>> +}
>>> +
>>> +static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
>>> +				    u32 max_freq)
>>> +{
>>> +	u8 *golden_buf, *test_buf;
>>> +	int i, rc, best_div = -1;
>>> +	u32 save_read_val = chip->ctl_val[smc_read];
>>> +	u32 ahb_freq = chip->controller->clk_frequency;
>>> +
>>> +	dev_dbg(chip->nor.dev, "AHB frequency: %d MHz", ahb_freq / 1000000);
>>> +
>>> +	test_buf = kmalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
>>> +	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
>>> +
>>> +	/* We start with the dumbest setting (keep 4Byte bit) and read
>>> +	 * some data
>>> +	 */
>>> +	chip->ctl_val[smc_read] = aspeed_smc_default_read(chip);
>>> +
>>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>>> +
>>> +	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
>>> +
>>> +	/* Establish our read mode with freq field set to 0 (HCLK/16) */
>>> +	chip->ctl_val[smc_read] = save_read_val & 0xfffff0ff;
>>> +
>>> +	/* Check if calibration data is suitable */
>>> +	if (!aspeed_smc_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
>>> +		dev_info(chip->nor.dev,
>>> +			 "Calibration area too uniform, using low speed");
>>> +		writel(chip->ctl_val[smc_read], chip->ctl);
>>> +		kfree(test_buf);
>>> +		return 0;
>>> +	}
>>> +
>>> +	/* Now we iterate the HCLK dividers until we find our breaking point */
>>> +	for (i = ARRAY_SIZE(aspeed_smc_hclk_divs); i > 0; i--) {
>>> +		u32 tv, freq;
>>> +
>>> +		/* Compare timing to max */
>>> +		freq = ahb_freq / i;
>>> +		if (freq > max_freq)
>>> +			continue;
>>> +
>>> +		/* Set the timing */
>>> +		tv = chip->ctl_val[smc_read] | ASPEED_SMC_HCLK_DIV(i);
>>> +		writel(tv, chip->ctl);
>>> +		dev_dbg(chip->nor.dev, "Trying HCLK/%d...", i);
>>> +		rc = aspeed_smc_calibrate_reads(chip, i, golden_buf, test_buf);
>>> +		if (rc == 0)
>>> +			best_div = i;
>>> +	}
>>> +	kfree(test_buf);
>>> +
>>> +	/* Nothing found ? */
>>> +	if (best_div < 0) {
>>> +		dev_warn(chip->nor.dev, "No good frequency, using dumb slow");
>>> +	} else {
>>> +		dev_dbg(chip->nor.dev, "Found good read timings at HCLK/%d",
>>> +			best_div);
>>> +		chip->ctl_val[smc_read] |= ASPEED_SMC_HCLK_DIV(best_div);
>>> +	}
>>> +
>>> +	writel(chip->ctl_val[smc_read], chip->ctl);
>>> +	return 0;
>>> +}
>>
>>
>
Boris Brezillon Oct. 11, 2019, 2:29 p.m. UTC | #5
On Fri, 11 Oct 2019 15:55:25 +0200
Cédric Le Goater <clg@kaod.org> wrote:

 
> > (how can you determine if timings are good when IO pins always
> > stay high). Don't we have a command that return non-ff/non-0 data while
> > still being predictable and immutable?   
> 
> Not that I know of on these controllers.

It's not really a controller thing, more a chip thing. The ideal
solution would be to have a loopback mode or an internal SRAM you can
write then read back, but AFAICT it doesn't exists. There's the SFDP
table as Vignesh mentioned, but we have the following problems:

1/ it might be too small (definitely < 16k)
2/ some NORs don't support SFDP (maybe not the ones we care about
   though)
Cédric Le Goater Oct. 11, 2019, 2:37 p.m. UTC | #6
On 11/10/2019 16:29, Boris Brezillon wrote:
> On Fri, 11 Oct 2019 15:55:25 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>  
>>> (how can you determine if timings are good when IO pins always
>>> stay high). Don't we have a command that return non-ff/non-0 data while
>>> still being predictable and immutable?   
>>
>> Not that I know of on these controllers.
> 
> It's not really a controller thing, more a chip thing. The ideal
> solution would be to have a loopback mode or an internal SRAM you can
> write then read back, but AFAICT it doesn't exists. There's the SFDP
> table as Vignesh mentioned, but we have the following problems:
> 
> 1/ it might be too small (definitely < 16k)
> 2/ some NORs don't support SFDP (maybe not the ones we care about
>    though)


Yes. The approach we follow has good results, once the data is 
qualified as good enough for the training. 

We had some issues back in 2014 with some chips on early systems 
and I think we could reduce the amount of the data read and the 
number of loops now. 

Thanks,

C.
diff mbox series

Patch

diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index facd8fc16ca3..155c407c2bdf 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -17,6 +17,7 @@ 
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/sizes.h>
+#include <linux/slab.h>
 #include <linux/sysfs.h>
 
 #define DEVICE_NAME	"aspeed-smc"
@@ -38,12 +39,16 @@  struct aspeed_smc_info {
 	bool hastype;		/* flash type field exists in config reg */
 	u8 we0;			/* shift for write enable bit for CE0 */
 	u8 ctl0;		/* offset in regs of ctl for CE0 */
+	u8 timing;		/* offset in regs of timing */
 
 	void (*set_4b)(struct aspeed_smc_chip *chip);
+	int (*optimize_read)(struct aspeed_smc_chip *chip, u32 max_freq);
 };
 
 static void aspeed_smc_chip_set_4b_spi_2400(struct aspeed_smc_chip *chip);
 static void aspeed_smc_chip_set_4b(struct aspeed_smc_chip *chip);
+static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
+				    u32 max_freq);
 
 static const struct aspeed_smc_info fmc_2400_info = {
 	.maxsize = 64 * 1024 * 1024,
@@ -51,6 +56,7 @@  static const struct aspeed_smc_info fmc_2400_info = {
 	.hastype = true,
 	.we0 = 16,
 	.ctl0 = 0x10,
+	.timing = 0x94,
 	.set_4b = aspeed_smc_chip_set_4b,
 };
 
@@ -60,7 +66,9 @@  static const struct aspeed_smc_info spi_2400_info = {
 	.hastype = false,
 	.we0 = 0,
 	.ctl0 = 0x04,
+	.timing = 0x14,
 	.set_4b = aspeed_smc_chip_set_4b_spi_2400,
+	.optimize_read = aspeed_smc_optimize_read,
 };
 
 static const struct aspeed_smc_info fmc_2500_info = {
@@ -69,6 +77,7 @@  static const struct aspeed_smc_info fmc_2500_info = {
 	.hastype = true,
 	.we0 = 16,
 	.ctl0 = 0x10,
+	.timing = 0x94,
 	.set_4b = aspeed_smc_chip_set_4b,
 };
 
@@ -78,7 +87,9 @@  static const struct aspeed_smc_info spi_2500_info = {
 	.hastype = false,
 	.we0 = 16,
 	.ctl0 = 0x10,
+	.timing = 0x94,
 	.set_4b = aspeed_smc_chip_set_4b,
+	.optimize_read = aspeed_smc_optimize_read,
 };
 
 enum aspeed_smc_ctl_reg_value {
@@ -200,6 +211,12 @@  struct aspeed_smc_controller {
 #define SEGMENT_ADDR_REG(controller, cs)	\
 	((controller)->regs + SEGMENT_ADDR_REG0 + (cs) * 4)
 
+/*
+ * Switch to turn off read optimisation if needed
+ */
+static bool optimize_read = true;
+module_param(optimize_read, bool, 0644);
+
 /*
  * In user mode all data bytes read or written to the chip decode address
  * range are transferred to or from the SPI bus. The range is treated as a
@@ -761,6 +778,187 @@  static int aspeed_smc_chip_setup_init(struct aspeed_smc_chip *chip,
 	return 0;
 }
 
+#define CALIBRATE_BUF_SIZE 16384
+
+static bool aspeed_smc_check_reads(struct aspeed_smc_chip *chip,
+				   const u8 *golden_buf, u8 *test_buf)
+{
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		memcpy_fromio(test_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
+		if (memcmp(test_buf, golden_buf, CALIBRATE_BUF_SIZE) != 0)
+			return false;
+	}
+	return true;
+}
+
+static int aspeed_smc_calibrate_reads(struct aspeed_smc_chip *chip, u32 hdiv,
+				      const u8 *golden_buf, u8 *test_buf)
+{
+	struct aspeed_smc_controller *controller = chip->controller;
+	const struct aspeed_smc_info *info = controller->info;
+	int i;
+	int good_pass = -1, pass_count = 0;
+	u32 shift = (hdiv - 1) << 2;
+	u32 mask = ~(0xfu << shift);
+	u32 fread_timing_val = 0;
+
+#define FREAD_TPASS(i)	(((i) / 2) | (((i) & 1) ? 0 : 8))
+
+	/* Try HCLK delay 0..5, each one with/without delay and look for a
+	 * good pair.
+	 */
+	for (i = 0; i < 12; i++) {
+		bool pass;
+
+		fread_timing_val &= mask;
+		fread_timing_val |= FREAD_TPASS(i) << shift;
+
+		writel(fread_timing_val, controller->regs + info->timing);
+		pass = aspeed_smc_check_reads(chip, golden_buf, test_buf);
+		dev_dbg(chip->nor.dev,
+			"  * [%08x] %d HCLK delay, %dns DI delay : %s",
+			fread_timing_val, i / 2, (i & 1) ? 0 : 4,
+			pass ? "PASS" : "FAIL");
+		if (pass) {
+			pass_count++;
+			if (pass_count == 3) {
+				good_pass = i - 1;
+				break;
+			}
+		} else {
+			pass_count = 0;
+		}
+	}
+
+	/* No good setting for this frequency */
+	if (good_pass < 0)
+		return -1;
+
+	/* We have at least one pass of margin, let's use first pass */
+	fread_timing_val &= mask;
+	fread_timing_val |= FREAD_TPASS(good_pass) << shift;
+	writel(fread_timing_val, controller->regs + info->timing);
+	dev_dbg(chip->nor.dev, " * -> good is pass %d [0x%08x]",
+		good_pass, fread_timing_val);
+	return 0;
+}
+
+static bool aspeed_smc_check_calib_data(const u8 *test_buf, u32 size)
+{
+	const u32 *tb32 = (const u32 *)test_buf;
+	u32 i, cnt = 0;
+
+	/* We check if we have enough words that are neither all 0
+	 * nor all 1's so the calibration can be considered valid.
+	 *
+	 * I use an arbitrary threshold for now of 64
+	 */
+	size >>= 2;
+	for (i = 0; i < size; i++) {
+		if (tb32[i] != 0 && tb32[i] != 0xffffffff)
+			cnt++;
+	}
+	return cnt >= 64;
+}
+
+static const u32 aspeed_smc_hclk_divs[] = {
+	0xf, /* HCLK */
+	0x7, /* HCLK/2 */
+	0xe, /* HCLK/3 */
+	0x6, /* HCLK/4 */
+	0xd, /* HCLK/5 */
+};
+
+#define ASPEED_SMC_HCLK_DIV(i) \
+	(aspeed_smc_hclk_divs[(i) - 1] << CONTROL_CLOCK_FREQ_SEL_SHIFT)
+
+static u32 aspeed_smc_default_read(struct aspeed_smc_chip *chip)
+{
+	/*
+	 * Keep the 4Byte address mode on the AST2400 SPI controller.
+	 * Other controllers set the 4Byte mode in the CE Control
+	 * Register
+	 */
+	u32 ctl_mask = chip->controller->info == &spi_2400_info ?
+		 CONTROL_IO_ADDRESS_4B : 0;
+
+	return (chip->ctl_val[smc_read] & ctl_mask) |
+		(0x00 << 28) | /* Single bit */
+		(0x00 << 24) | /* CE# max */
+		(0x03 << 16) | /* use normal reads */
+		(0x00 <<  8) | /* HCLK/16 */
+		(0x00 <<  6) | /* no dummy cycle */
+		(0x00);        /* normal mode */
+}
+
+static int aspeed_smc_optimize_read(struct aspeed_smc_chip *chip,
+				    u32 max_freq)
+{
+	u8 *golden_buf, *test_buf;
+	int i, rc, best_div = -1;
+	u32 save_read_val = chip->ctl_val[smc_read];
+	u32 ahb_freq = chip->controller->clk_frequency;
+
+	dev_dbg(chip->nor.dev, "AHB frequency: %d MHz", ahb_freq / 1000000);
+
+	test_buf = kmalloc(CALIBRATE_BUF_SIZE * 2, GFP_KERNEL);
+	golden_buf = test_buf + CALIBRATE_BUF_SIZE;
+
+	/* We start with the dumbest setting (keep 4Byte bit) and read
+	 * some data
+	 */
+	chip->ctl_val[smc_read] = aspeed_smc_default_read(chip);
+
+	writel(chip->ctl_val[smc_read], chip->ctl);
+
+	memcpy_fromio(golden_buf, chip->ahb_base, CALIBRATE_BUF_SIZE);
+
+	/* Establish our read mode with freq field set to 0 (HCLK/16) */
+	chip->ctl_val[smc_read] = save_read_val & 0xfffff0ff;
+
+	/* Check if calibration data is suitable */
+	if (!aspeed_smc_check_calib_data(golden_buf, CALIBRATE_BUF_SIZE)) {
+		dev_info(chip->nor.dev,
+			 "Calibration area too uniform, using low speed");
+		writel(chip->ctl_val[smc_read], chip->ctl);
+		kfree(test_buf);
+		return 0;
+	}
+
+	/* Now we iterate the HCLK dividers until we find our breaking point */
+	for (i = ARRAY_SIZE(aspeed_smc_hclk_divs); i > 0; i--) {
+		u32 tv, freq;
+
+		/* Compare timing to max */
+		freq = ahb_freq / i;
+		if (freq > max_freq)
+			continue;
+
+		/* Set the timing */
+		tv = chip->ctl_val[smc_read] | ASPEED_SMC_HCLK_DIV(i);
+		writel(tv, chip->ctl);
+		dev_dbg(chip->nor.dev, "Trying HCLK/%d...", i);
+		rc = aspeed_smc_calibrate_reads(chip, i, golden_buf, test_buf);
+		if (rc == 0)
+			best_div = i;
+	}
+	kfree(test_buf);
+
+	/* Nothing found ? */
+	if (best_div < 0) {
+		dev_warn(chip->nor.dev, "No good frequency, using dumb slow");
+	} else {
+		dev_dbg(chip->nor.dev, "Found good read timings at HCLK/%d",
+			best_div);
+		chip->ctl_val[smc_read] |= ASPEED_SMC_HCLK_DIV(best_div);
+	}
+
+	writel(chip->ctl_val[smc_read], chip->ctl);
+	return 0;
+}
+
 static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 {
 	struct aspeed_smc_controller *controller = chip->controller;
@@ -803,6 +1001,12 @@  static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
 
 	dev_info(controller->dev, "read control register: %08x\n",
 		 chip->ctl_val[smc_read]);
+
+	/*
+	 * TODO: get max freq from chip
+	 */
+	if (optimize_read && info->optimize_read)
+		info->optimize_read(chip, 104000000);
 	return 0;
 }