diff mbox

[U-Boot,v2,14/23] sunxi: H3: add DRAM controller single bit delay support

Message ID 1480902750-839-15-git-send-email-andre.przywara@arm.com
State Superseded
Delegated to: Jagannadha Sutradharudu Teki
Headers show

Commit Message

Andre Przywara Dec. 5, 2016, 1:52 a.m. UTC
From: Jens Kuske <jenskuske@gmail.com>

Instead of setting the delay for whole bytes allow setting
it for each individual bit. Also add support for
address/command lane delays.

Signed-off-by: Jens Kuske <jenskuske@gmail.com>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 arch/arm/mach-sunxi/dram_sun8i_h3.c | 54 ++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

Comments

Simon Glass Dec. 5, 2016, 6:26 a.m. UTC | #1
Hi Andre,

On 4 December 2016 at 18:52, Andre Przywara <andre.przywara@arm.com> wrote:
> From: Jens Kuske <jenskuske@gmail.com>
>
> Instead of setting the delay for whole bytes allow setting
> it for each individual bit. Also add support for
> address/command lane delays.
>
> Signed-off-by: Jens Kuske <jenskuske@gmail.com>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  arch/arm/mach-sunxi/dram_sun8i_h3.c | 54 ++++++++++++++++++-------------------
>  1 file changed, 27 insertions(+), 27 deletions(-)

ACBDLR_WRITE_DELAY_SHIFT

>
> diff --git a/arch/arm/mach-sunxi/dram_sun8i_h3.c b/arch/arm/mach-sunxi/dram_sun8i_h3.c
> index 3dd6803..1647d76 100644
> --- a/arch/arm/mach-sunxi/dram_sun8i_h3.c
> +++ b/arch/arm/mach-sunxi/dram_sun8i_h3.c
> @@ -16,12 +16,13 @@
>  #include <linux/kconfig.h>
>
>  struct dram_para {
> -       u32 read_delays;
> -       u32 write_delays;
>         u16 page_size;
>         u8 bus_width;
>         u8 dual_rank;
>         u8 row_bits;
> +       const u8 dx_read_delays[4][11];

Can we have #defines for 4 and 11?

> +       const u8 dx_write_delays[4][11];
> +       const u8 ac_delays[31];
>  };
>
>  static inline int ns_to_t(int nanoseconds)
> @@ -64,34 +65,25 @@ static void mctl_phy_init(u32 val)
>         mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
>  }
>
> -static void mctl_dq_delay(u32 read, u32 write)
> +static void mctl_set_bit_delays(struct dram_para *para)
>  {
>         struct sunxi_mctl_ctl_reg * const mctl_ctl =
>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>         int i, j;
> -       u32 val;
> -
> -       for (i = 0; i < 4; i++) {
> -               val = DXBDLR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
> -                     DXBDLR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
> -
> -               for (j = DXBDLR_DQ(0); j <= DXBDLR_DM; j++)
> -                       writel(val, &mctl_ctl->dx[i].bdlr[j]);
> -       }
>
>         clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>
> -       for (i = 0; i < 4; i++) {
> -               val = DXBDLR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
> -                     DXBDLR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
> +       for (i = 0; i < 4; i++)
> +               for (j = 0; j < 11; j++)
> +                       writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
> +                              DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
> +                              &mctl_ctl->dx[i].bdlr[j]);
>
> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQS]);
> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQSN]);
> -       }
> +       for (i = 0; i < 31; i++)
> +               writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
> +                      &mctl_ctl->acbdlr[i]);
>
>         setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
> -
> -       udelay(1);
>  }
>
>  static void mctl_set_master_priority(void)
> @@ -372,11 +364,8 @@ static int mctl_channel_init(struct dram_para *para)
>         clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
>                         (para->dual_rank ? 0x3 : 0x1) << 24);
>
> -
> -       if (para->read_delays || para->write_delays) {
> -               mctl_dq_delay(para->read_delays, para->write_delays);
> -               udelay(50);
> -       }
> +       mctl_set_bit_delays(para);
> +       udelay(50);
>
>         mctl_zq_calibration(para);
>
> @@ -458,12 +447,23 @@ unsigned long sunxi_dram_init(void)
>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>
>         struct dram_para para = {
> -               .read_delays = 0x00007979,      /* dram_tpr12 */
> -               .write_delays = 0x6aaa0000,     /* dram_tpr11 */
>                 .dual_rank = 0,
>                 .bus_width = 32,
>                 .row_bits = 15,
>                 .page_size = 4096,
> +
> +               .dx_read_delays =  {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },
> +                                   { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }},
> +               .dx_write_delays = {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }},
> +               .ac_delays = {  0,  0,  0,  0,  0,  0,  0,  0,
> +                               0,  0,  0,  0,  0,  0,  0,  0,
> +                               0,  0,  0,  0,  0,  0,  0,  0,
> +                               0,  0,  0,  0,  0,  0,  0      },
>         };
>
>         mctl_sys_init(&para);
> --
> 2.8.2
>

I wonder if there is value in moving this to device tree with of-platdata?

Regards,
Simon
Chen-Yu Tsai Dec. 5, 2016, 7:58 a.m. UTC | #2
On Mon, Dec 5, 2016 at 2:26 PM, Simon Glass <sjg@chromium.org> wrote:
> Hi Andre,
>
> On 4 December 2016 at 18:52, Andre Przywara <andre.przywara@arm.com> wrote:
>> From: Jens Kuske <jenskuske@gmail.com>
>>
>> Instead of setting the delay for whole bytes allow setting
>> it for each individual bit. Also add support for
>> address/command lane delays.
>>
>> Signed-off-by: Jens Kuske <jenskuske@gmail.com>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  arch/arm/mach-sunxi/dram_sun8i_h3.c | 54 ++++++++++++++++++-------------------
>>  1 file changed, 27 insertions(+), 27 deletions(-)
>
> ACBDLR_WRITE_DELAY_SHIFT
>
>>
>> diff --git a/arch/arm/mach-sunxi/dram_sun8i_h3.c b/arch/arm/mach-sunxi/dram_sun8i_h3.c
>> index 3dd6803..1647d76 100644
>> --- a/arch/arm/mach-sunxi/dram_sun8i_h3.c
>> +++ b/arch/arm/mach-sunxi/dram_sun8i_h3.c
>> @@ -16,12 +16,13 @@
>>  #include <linux/kconfig.h>
>>
>>  struct dram_para {
>> -       u32 read_delays;
>> -       u32 write_delays;
>>         u16 page_size;
>>         u8 bus_width;
>>         u8 dual_rank;
>>         u8 row_bits;
>> +       const u8 dx_read_delays[4][11];
>
> Can we have #defines for 4 and 11?
>
>> +       const u8 dx_write_delays[4][11];
>> +       const u8 ac_delays[31];
>>  };
>>
>>  static inline int ns_to_t(int nanoseconds)
>> @@ -64,34 +65,25 @@ static void mctl_phy_init(u32 val)
>>         mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
>>  }
>>
>> -static void mctl_dq_delay(u32 read, u32 write)
>> +static void mctl_set_bit_delays(struct dram_para *para)
>>  {
>>         struct sunxi_mctl_ctl_reg * const mctl_ctl =
>>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>>         int i, j;
>> -       u32 val;
>> -
>> -       for (i = 0; i < 4; i++) {
>> -               val = DXBDLR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
>> -                     DXBDLR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
>> -
>> -               for (j = DXBDLR_DQ(0); j <= DXBDLR_DM; j++)
>> -                       writel(val, &mctl_ctl->dx[i].bdlr[j]);
>> -       }
>>
>>         clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>>
>> -       for (i = 0; i < 4; i++) {
>> -               val = DXBDLR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
>> -                     DXBDLR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
>> +       for (i = 0; i < 4; i++)
>> +               for (j = 0; j < 11; j++)
>> +                       writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
>> +                              DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
>> +                              &mctl_ctl->dx[i].bdlr[j]);
>>
>> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQS]);
>> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQSN]);
>> -       }
>> +       for (i = 0; i < 31; i++)
>> +               writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
>> +                      &mctl_ctl->acbdlr[i]);
>>
>>         setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>> -
>> -       udelay(1);
>>  }
>>
>>  static void mctl_set_master_priority(void)
>> @@ -372,11 +364,8 @@ static int mctl_channel_init(struct dram_para *para)
>>         clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
>>                         (para->dual_rank ? 0x3 : 0x1) << 24);
>>
>> -
>> -       if (para->read_delays || para->write_delays) {
>> -               mctl_dq_delay(para->read_delays, para->write_delays);
>> -               udelay(50);
>> -       }
>> +       mctl_set_bit_delays(para);
>> +       udelay(50);
>>
>>         mctl_zq_calibration(para);
>>
>> @@ -458,12 +447,23 @@ unsigned long sunxi_dram_init(void)
>>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>>
>>         struct dram_para para = {
>> -               .read_delays = 0x00007979,      /* dram_tpr12 */
>> -               .write_delays = 0x6aaa0000,     /* dram_tpr11 */
>>                 .dual_rank = 0,
>>                 .bus_width = 32,
>>                 .row_bits = 15,
>>                 .page_size = 4096,
>> +
>> +               .dx_read_delays =  {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
>> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },
>> +                                   { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
>> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }},
>> +               .dx_write_delays = {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }},
>> +               .ac_delays = {  0,  0,  0,  0,  0,  0,  0,  0,
>> +                               0,  0,  0,  0,  0,  0,  0,  0,
>> +                               0,  0,  0,  0,  0,  0,  0,  0,
>> +                               0,  0,  0,  0,  0,  0,  0      },
>>         };
>>
>>         mctl_sys_init(&para);
>> --
>> 2.8.2
>>
>
> I wonder if there is value in moving this to device tree with of-platdata?

I think device tree support is unlikely to fit in SPL for sunxi.
IIRC Andre already mentions the space constraints in his cover letter.

ChenYu
Andre Przywara Dec. 5, 2016, 11:28 a.m. UTC | #3
Hi,

On 05/12/16 07:58, Chen-Yu Tsai wrote:
> On Mon, Dec 5, 2016 at 2:26 PM, Simon Glass <sjg@chromium.org> wrote:
>> Hi Andre,
>>
>> On 4 December 2016 at 18:52, Andre Przywara <andre.przywara@arm.com> wrote:
>>> From: Jens Kuske <jenskuske@gmail.com>
>>>
>>> Instead of setting the delay for whole bytes allow setting
>>> it for each individual bit. Also add support for
>>> address/command lane delays.
>>>
>>> Signed-off-by: Jens Kuske <jenskuske@gmail.com>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  arch/arm/mach-sunxi/dram_sun8i_h3.c | 54 ++++++++++++++++++-------------------
>>>  1 file changed, 27 insertions(+), 27 deletions(-)
>>
>> ACBDLR_WRITE_DELAY_SHIFT
>>
>>>
>>> diff --git a/arch/arm/mach-sunxi/dram_sun8i_h3.c b/arch/arm/mach-sunxi/dram_sun8i_h3.c
>>> index 3dd6803..1647d76 100644
>>> --- a/arch/arm/mach-sunxi/dram_sun8i_h3.c
>>> +++ b/arch/arm/mach-sunxi/dram_sun8i_h3.c
>>> @@ -16,12 +16,13 @@
>>>  #include <linux/kconfig.h>
>>>
>>>  struct dram_para {
>>> -       u32 read_delays;
>>> -       u32 write_delays;
>>>         u16 page_size;
>>>         u8 bus_width;
>>>         u8 dual_rank;
>>>         u8 row_bits;
>>> +       const u8 dx_read_delays[4][11];
>>
>> Can we have #defines for 4 and 11?
>>
>>> +       const u8 dx_write_delays[4][11];
>>> +       const u8 ac_delays[31];
>>>  };
>>>
>>>  static inline int ns_to_t(int nanoseconds)
>>> @@ -64,34 +65,25 @@ static void mctl_phy_init(u32 val)
>>>         mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
>>>  }
>>>
>>> -static void mctl_dq_delay(u32 read, u32 write)
>>> +static void mctl_set_bit_delays(struct dram_para *para)
>>>  {
>>>         struct sunxi_mctl_ctl_reg * const mctl_ctl =
>>>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>>>         int i, j;
>>> -       u32 val;
>>> -
>>> -       for (i = 0; i < 4; i++) {
>>> -               val = DXBDLR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
>>> -                     DXBDLR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
>>> -
>>> -               for (j = DXBDLR_DQ(0); j <= DXBDLR_DM; j++)
>>> -                       writel(val, &mctl_ctl->dx[i].bdlr[j]);
>>> -       }
>>>
>>>         clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>>>
>>> -       for (i = 0; i < 4; i++) {
>>> -               val = DXBDLR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
>>> -                     DXBDLR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
>>> +       for (i = 0; i < 4; i++)
>>> +               for (j = 0; j < 11; j++)
>>> +                       writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
>>> +                              DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
>>> +                              &mctl_ctl->dx[i].bdlr[j]);
>>>
>>> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQS]);
>>> -               writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQSN]);
>>> -       }
>>> +       for (i = 0; i < 31; i++)
>>> +               writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
>>> +                      &mctl_ctl->acbdlr[i]);
>>>
>>>         setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>>> -
>>> -       udelay(1);
>>>  }
>>>
>>>  static void mctl_set_master_priority(void)
>>> @@ -372,11 +364,8 @@ static int mctl_channel_init(struct dram_para *para)
>>>         clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
>>>                         (para->dual_rank ? 0x3 : 0x1) << 24);
>>>
>>> -
>>> -       if (para->read_delays || para->write_delays) {
>>> -               mctl_dq_delay(para->read_delays, para->write_delays);
>>> -               udelay(50);
>>> -       }
>>> +       mctl_set_bit_delays(para);
>>> +       udelay(50);
>>>
>>>         mctl_zq_calibration(para);
>>>
>>> @@ -458,12 +447,23 @@ unsigned long sunxi_dram_init(void)
>>>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>>>
>>>         struct dram_para para = {
>>> -               .read_delays = 0x00007979,      /* dram_tpr12 */
>>> -               .write_delays = 0x6aaa0000,     /* dram_tpr11 */
>>>                 .dual_rank = 0,
>>>                 .bus_width = 32,
>>>                 .row_bits = 15,
>>>                 .page_size = 4096,
>>> +
>>> +               .dx_read_delays =  {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
>>> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },
>>> +                                   { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
>>> +                                   { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }},
>>> +               .dx_write_delays = {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
>>> +                                   {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }},
>>> +               .ac_delays = {  0,  0,  0,  0,  0,  0,  0,  0,
>>> +                               0,  0,  0,  0,  0,  0,  0,  0,
>>> +                               0,  0,  0,  0,  0,  0,  0,  0,
>>> +                               0,  0,  0,  0,  0,  0,  0      },
>>>         };
>>>
>>>         mctl_sys_init(&para);
>>> --
>>> 2.8.2
>>>
>>
>> I wonder if there is value in moving this to device tree with of-platdata?

While I kind of like the idea of using the DT for this, there are some
issues:

1) There is no binding so far for representing the DRAM data. Given the
lacking documentation for the DRAM controller it sounds very hard to
come up with a good binding anyway. Also we can't push this through the
Linux DT binding review, since this is of no interest to the kernel. And
I'd rather avoid making up some dodgy binding just for this.

There is work underway to improve the DRAM init code and make it more
robust and flexible. Ideally we can use some autodetection and
calibration feature the controller offers to get rid of arbitrary magic
numbers. But this is quite some work ahead and shouldn't block the much
sought after A64 SPL support for now.

2) If there is need, we can detect the SoC easily by reading the ID
register and differentiate at runtime. This is probably less code than
pulling in DT bits, also more robust.

> I think device tree support is unlikely to fit in SPL for sunxi.
> IIRC Andre already mentions the space constraints in his cover letter.

3) Yes, adding DT support for the SPL makes it rather big. I think it
breaks the 28K limit that the mksunxiboot tool currently has. This can
(and will) be fixed later, but just for this exercise I'd rather keep it
small, especially as we would use it only for the DRAM code and not for
the device drivers.

Actually I have a plan to make better use of DT, but not for the SPL. To
a good degree the SPL code mimics the on-SoC boot ROM operation
(accessing storage devices to load code), which has to work with every
board already and thus does not need a board specific DT.
I can elaborate on that if there is interest.

Cheers,
Andre.
Maxime Ripard Dec. 6, 2016, 11:02 a.m. UTC | #4
On Mon, Dec 05, 2016 at 01:52:21AM +0000, Andre Przywara wrote:
> From: Jens Kuske <jenskuske@gmail.com>
> 
> Instead of setting the delay for whole bytes allow setting
> it for each individual bit. Also add support for
> address/command lane delays.
> 
> Signed-off-by: Jens Kuske <jenskuske@gmail.com>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  arch/arm/mach-sunxi/dram_sun8i_h3.c | 54 ++++++++++++++++++-------------------
>  1 file changed, 27 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/arm/mach-sunxi/dram_sun8i_h3.c b/arch/arm/mach-sunxi/dram_sun8i_h3.c
> index 3dd6803..1647d76 100644
> --- a/arch/arm/mach-sunxi/dram_sun8i_h3.c
> +++ b/arch/arm/mach-sunxi/dram_sun8i_h3.c
> @@ -16,12 +16,13 @@
>  #include <linux/kconfig.h>
>  
>  struct dram_para {
> -	u32 read_delays;
> -	u32 write_delays;
>  	u16 page_size;
>  	u8 bus_width;
>  	u8 dual_rank;
>  	u8 row_bits;
> +	const u8 dx_read_delays[4][11];
> +	const u8 dx_write_delays[4][11];
> +	const u8 ac_delays[31];
>  };

Some documentation on what is the expected format and what it
corresponds to would be welcome.

>  
>  static inline int ns_to_t(int nanoseconds)
> @@ -64,34 +65,25 @@ static void mctl_phy_init(u32 val)
>  	mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
>  }
>  
> -static void mctl_dq_delay(u32 read, u32 write)
> +static void mctl_set_bit_delays(struct dram_para *para)
>  {
>  	struct sunxi_mctl_ctl_reg * const mctl_ctl =
>  			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>  	int i, j;
> -	u32 val;
> -
> -	for (i = 0; i < 4; i++) {
> -		val = DXBDLR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
> -		      DXBDLR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
> -
> -		for (j = DXBDLR_DQ(0); j <= DXBDLR_DM; j++)
> -			writel(val, &mctl_ctl->dx[i].bdlr[j]);
> -	}
>  
>  	clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
>  
> -	for (i = 0; i < 4; i++) {
> -		val = DXBDLR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
> -		      DXBDLR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
> +	for (i = 0; i < 4; i++)
> +		for (j = 0; j < 11; j++)
> +			writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
> +			       DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
> +			       &mctl_ctl->dx[i].bdlr[j]);
>  
> -		writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQS]);
> -		writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQSN]);
> -	}
> +	for (i = 0; i < 31; i++)
> +		writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
> +		       &mctl_ctl->acbdlr[i]);
>  
>  	setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
> -
> -	udelay(1);
>  }
>  
>  static void mctl_set_master_priority(void)
> @@ -372,11 +364,8 @@ static int mctl_channel_init(struct dram_para *para)
>  	clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
>  			(para->dual_rank ? 0x3 : 0x1) << 24);
>  
> -
> -	if (para->read_delays || para->write_delays) {
> -		mctl_dq_delay(para->read_delays, para->write_delays);
> -		udelay(50);
> -	}
> +	mctl_set_bit_delays(para);
> +	udelay(50);
>  
>  	mctl_zq_calibration(para);
>  
> @@ -458,12 +447,23 @@ unsigned long sunxi_dram_init(void)
>  			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>  
>  	struct dram_para para = {
> -		.read_delays = 0x00007979,	/* dram_tpr12 */
> -		.write_delays = 0x6aaa0000,	/* dram_tpr11 */
>  		.dual_rank = 0,
>  		.bus_width = 32,
>  		.row_bits = 15,
>  		.page_size = 4096,
> +
> +		.dx_read_delays =  {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
> +		                    { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },
> +		                    { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
> +		                    { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }},
> +		.dx_write_delays = {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
> +		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }},
> +		.ac_delays = {  0,  0,  0,  0,  0,  0,  0,  0,
> +		                0,  0,  0,  0,  0,  0,  0,  0,
> +		                0,  0,  0,  0,  0,  0,  0,  0,
> +		                0,  0,  0,  0,  0,  0,  0      },

You're mixing tabs and spaces for the indentation, and the tab before
that bracket looks useless.

Thanks,
Maxime
Simon Glass Dec. 7, 2016, 3:48 a.m. UTC | #5
Hi Andre,

[...]

>>> I wonder if there is value in moving this to device tree with of-platdata?
>
> While I kind of like the idea of using the DT for this, there are some
> issues:
>
> 1) There is no binding so far for representing the DRAM data. Given the
> lacking documentation for the DRAM controller it sounds very hard to
> come up with a good binding anyway. Also we can't push this through the
> Linux DT binding review, since this is of no interest to the kernel. And
> I'd rather avoid making up some dodgy binding just for this.
>
> There is work underway to improve the DRAM init code and make it more
> robust and flexible. Ideally we can use some autodetection and
> calibration feature the controller offers to get rid of arbitrary magic
> numbers. But this is quite some work ahead and shouldn't block the much
> sought after A64 SPL support for now.
>
> 2) If there is need, we can detect the SoC easily by reading the ID
> register and differentiate at runtime. This is probably less code than
> pulling in DT bits, also more robust.
>
>> I think device tree support is unlikely to fit in SPL for sunxi.
>> IIRC Andre already mentions the space constraints in his cover letter.
>
> 3) Yes, adding DT support for the SPL makes it rather big. I think it
> breaks the 28K limit that the mksunxiboot tool currently has. This can
> (and will) be fixed later, but just for this exercise I'd rather keep it
> small, especially as we would use it only for the DRAM code and not for
> the device drivers.

Take a look at rk3288-firefly if you like. It has an ad-hoc device
tree binding (no one has the energy to try to get this into Linux :-).

With of-platdata, DT support doesn't actually add any space (or at
least very little). There is no libfdt and the only code is that
needed to copy data from the of-platdata struct to the normal one.

That said, there has to be a benefit, and it's much more desirable to
spend the time on this IMO:

>
> Actually I have a plan to make better use of DT, but not for the SPL. To
> a good degree the SPL code mimics the on-SoC boot ROM operation
> (accessing storage devices to load code), which has to work with every
> board already and thus does not need a board specific DT.
> I can elaborate on that if there is interest.

Regards,
Simon
Andre Przywara Dec. 17, 2016, 2:33 a.m. UTC | #6
On 07/12/16 03:48, Simon Glass wrote:
> Hi Andre,
> 
> [...]
> 
>>>> I wonder if there is value in moving this to device tree with of-platdata?
>>
>> While I kind of like the idea of using the DT for this, there are some
>> issues:
>>
>> 1) There is no binding so far for representing the DRAM data. Given the
>> lacking documentation for the DRAM controller it sounds very hard to
>> come up with a good binding anyway. Also we can't push this through the
>> Linux DT binding review, since this is of no interest to the kernel. And
>> I'd rather avoid making up some dodgy binding just for this.
>>
>> There is work underway to improve the DRAM init code and make it more
>> robust and flexible. Ideally we can use some autodetection and
>> calibration feature the controller offers to get rid of arbitrary magic
>> numbers. But this is quite some work ahead and shouldn't block the much
>> sought after A64 SPL support for now.
>>
>> 2) If there is need, we can detect the SoC easily by reading the ID
>> register and differentiate at runtime. This is probably less code than
>> pulling in DT bits, also more robust.
>>
>>> I think device tree support is unlikely to fit in SPL for sunxi.
>>> IIRC Andre already mentions the space constraints in his cover letter.
>>
>> 3) Yes, adding DT support for the SPL makes it rather big. I think it
>> breaks the 28K limit that the mksunxiboot tool currently has. This can
>> (and will) be fixed later, but just for this exercise I'd rather keep it
>> small, especially as we would use it only for the DRAM code and not for
>> the device drivers.
> 
> Take a look at rk3288-firefly if you like. It has an ad-hoc device
> tree binding (no one has the energy to try to get this into Linux :-).

I found some lpddr2 binding in Linux, I guess we can use these as a
template. But ....

> With of-platdata, DT support doesn't actually add any space (or at
> least very little). There is no libfdt and the only code is that
> needed to copy data from the of-platdata struct to the normal one.
> 
> That said, there has to be a benefit, and it's much more desirable to
> spend the time on this IMO:

I think there is some benefit, but as you hinted it takes more time. My
understanding is that these parameters are actually board specific,
although a) nobody cared so much before and just went with the same
Allwinner provided values for every board and b) many vendors copy the
DRAM trace layout and thus share the same values here.

So:
1) We would need to work out what parameters we actually need.
2) Also which are a SoC property, which are board specific and which are
DRAM chip dependent. For instance we often see the same chips used on
different boards, also similar layouts across boards.
Technically the amount of DRAM also matters, as that means different
chips in possibly different configurations (Pine64 1GB with 2x16 bits
vs. the 2GB version with 4x8 bits)
3) We need to learn how much we can actually auto detect. The DRAM
controller seem to have some facilities, it may be worth to explore this.

There are some patches to rework and improve the DRAM setup, so I guess
we need to revisit this anyway. I tried to merge them in here, but gave
up because I think they need more love.
So for the sake of getting good-enough support for the Pine64 board now
I'd rather go with these fixed values for now and postpone this discussion.

Cheers,
Andre.

>>
>> Actually I have a plan to make better use of DT, but not for the SPL. To
>> a good degree the SPL code mimics the on-SoC boot ROM operation
>> (accessing storage devices to load code), which has to work with every
>> board already and thus does not need a board specific DT.
>> I can elaborate on that if there is interest.
>
diff mbox

Patch

diff --git a/arch/arm/mach-sunxi/dram_sun8i_h3.c b/arch/arm/mach-sunxi/dram_sun8i_h3.c
index 3dd6803..1647d76 100644
--- a/arch/arm/mach-sunxi/dram_sun8i_h3.c
+++ b/arch/arm/mach-sunxi/dram_sun8i_h3.c
@@ -16,12 +16,13 @@ 
 #include <linux/kconfig.h>
 
 struct dram_para {
-	u32 read_delays;
-	u32 write_delays;
 	u16 page_size;
 	u8 bus_width;
 	u8 dual_rank;
 	u8 row_bits;
+	const u8 dx_read_delays[4][11];
+	const u8 dx_write_delays[4][11];
+	const u8 ac_delays[31];
 };
 
 static inline int ns_to_t(int nanoseconds)
@@ -64,34 +65,25 @@  static void mctl_phy_init(u32 val)
 	mctl_await_completion(&mctl_ctl->pgsr[0], PGSR_INIT_DONE, 0x1);
 }
 
-static void mctl_dq_delay(u32 read, u32 write)
+static void mctl_set_bit_delays(struct dram_para *para)
 {
 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
 	int i, j;
-	u32 val;
-
-	for (i = 0; i < 4; i++) {
-		val = DXBDLR_WRITE_DELAY((write >> (i * 4)) & 0xf) |
-		      DXBDLR_READ_DELAY(((read >> (i * 4)) & 0xf) * 2);
-
-		for (j = DXBDLR_DQ(0); j <= DXBDLR_DM; j++)
-			writel(val, &mctl_ctl->dx[i].bdlr[j]);
-	}
 
 	clrbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
 
-	for (i = 0; i < 4; i++) {
-		val = DXBDLR_WRITE_DELAY((write >> (16 + i * 4)) & 0xf) |
-		      DXBDLR_READ_DELAY((read >> (16 + i * 4)) & 0xf);
+	for (i = 0; i < 4; i++)
+		for (j = 0; j < 11; j++)
+			writel(DXBDLR_WRITE_DELAY(para->dx_write_delays[i][j]) |
+			       DXBDLR_READ_DELAY(para->dx_read_delays[i][j]),
+			       &mctl_ctl->dx[i].bdlr[j]);
 
-		writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQS]);
-		writel(val, &mctl_ctl->dx[i].bdlr[DXBDLR_DQSN]);
-	}
+	for (i = 0; i < 31; i++)
+		writel(ACBDLR_WRITE_DELAY(para->ac_delays[i]),
+		       &mctl_ctl->acbdlr[i]);
 
 	setbits_le32(&mctl_ctl->pgcr[0], 1 << 26);
-
-	udelay(1);
 }
 
 static void mctl_set_master_priority(void)
@@ -372,11 +364,8 @@  static int mctl_channel_init(struct dram_para *para)
 	clrsetbits_le32(&mctl_ctl->dtcr, 0xf << 24,
 			(para->dual_rank ? 0x3 : 0x1) << 24);
 
-
-	if (para->read_delays || para->write_delays) {
-		mctl_dq_delay(para->read_delays, para->write_delays);
-		udelay(50);
-	}
+	mctl_set_bit_delays(para);
+	udelay(50);
 
 	mctl_zq_calibration(para);
 
@@ -458,12 +447,23 @@  unsigned long sunxi_dram_init(void)
 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
 
 	struct dram_para para = {
-		.read_delays = 0x00007979,	/* dram_tpr12 */
-		.write_delays = 0x6aaa0000,	/* dram_tpr11 */
 		.dual_rank = 0,
 		.bus_width = 32,
 		.row_bits = 15,
 		.page_size = 4096,
+
+		.dx_read_delays =  {{ 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
+		                    { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 },
+		                    { 18, 18, 18, 18, 18, 18, 18, 18, 18,  0,  0 },
+		                    { 14, 14, 14, 14, 14, 14, 14, 14, 14,  0,  0 }},
+		.dx_write_delays = {{  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
+		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
+		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 10 },
+		                    {  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  6 }},
+		.ac_delays = {  0,  0,  0,  0,  0,  0,  0,  0,
+		                0,  0,  0,  0,  0,  0,  0,  0,
+		                0,  0,  0,  0,  0,  0,  0,  0,
+		                0,  0,  0,  0,  0,  0,  0      },
 	};
 
 	mctl_sys_init(&para);