diff mbox series

[1/2] sunxi: support asymmetric dual rank DRAM on A64/R40

Message ID 20210225161325.94019-2-icenowy@aosc.io
State Accepted
Commit e9dfd8e96031317a837e659ac2aa1a59278c2ce6
Delegated to: Andre Przywara
Headers show
Series A64/R40 DRAM controller dual-rank-related changes | expand

Commit Message

Icenowy Zheng Feb. 25, 2021, 4:13 p.m. UTC
Previously we have known that R40 has a configuration register for its
rank 1, which allows different configuration than rank 0. Reverse
engineering of newest libdram of A64 from Allwinner shows that A64 has
this register too. It's bit 0 (which enables dual rank in rank 0
configuration register) means a dedicated rank size setup is used for
rank 1.

Now, Pine64 scheduled to use a 3GiB LPDDR3 DRAM chip (which has 2GiB
rank 0 and 1GiB rank 1) on PinePhone, that makes asymmetric dual rank
DRAM support necessary.

Add this support. The code could support both A64 and R40, but because
dual rank detection is broken on R40 now, we cannot really use it on R40
currently.

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
---
 .../include/asm/arch-sunxi/dram_sunxi_dw.h    | 11 ++-
 arch/arm/mach-sunxi/dram_sunxi_dw.c           | 94 +++++++++++++++----
 2 files changed, 82 insertions(+), 23 deletions(-)

Comments

Andre Przywara March 18, 2021, 1:32 a.m. UTC | #1
On Fri, 26 Feb 2021 00:13:24 +0800
Icenowy Zheng <icenowy@aosc.io> wrote:

> Previously we have known that R40 has a configuration register for its
> rank 1, which allows different configuration than rank 0. Reverse
> engineering of newest libdram of A64 from Allwinner shows that A64 has
> this register too. It's bit 0 (which enables dual rank in rank 0
> configuration register) means a dedicated rank size setup is used for
> rank 1.
> 
> Now, Pine64 scheduled to use a 3GiB LPDDR3 DRAM chip (which has 2GiB
> rank 0 and 1GiB rank 1) on PinePhone, that makes asymmetric dual rank
> DRAM support necessary.
> 
> Add this support. The code could support both A64 and R40, but because
> dual rank detection is broken on R40 now, we cannot really use it on R40
> currently.
> 
> Signed-off-by: Icenowy Zheng <icenowy@aosc.io>

So this looks alright to me.
I tried to re-use the existing mctl_mem_matches() implementation for
the _base() version you introduce, but interestingly this increases the
code size - I guess we reach the limits of the toolchain garbage
collection here. So it's some code duplication, but for the sake of
keeping the SPL small, I am OK with that.

I couldn't test this, but reportedly this makes quite some Pinephone
people happy, so:

Reviewed-by: Andre Przywara <andre.przywara@arm.com>

Cheers,
Andre

> ---
>  .../include/asm/arch-sunxi/dram_sunxi_dw.h    | 11 ++-
>  arch/arm/mach-sunxi/dram_sunxi_dw.c           | 94 +++++++++++++++----
>  2 files changed, 82 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> index a5a7ebde44..e843c14202 100644
> --- a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> +++ b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> @@ -215,12 +215,17 @@ struct sunxi_mctl_ctl_reg {
>  #define NR_OF_BYTE_LANES	(32 / BITS_PER_BYTE)
>  /* The eight data lines (DQn) plus DM, DQS and DQSN */
>  #define LINES_PER_BYTE_LANE	(BITS_PER_BYTE + 3)
> -struct dram_para {
> +
> +struct rank_para {
>  	u16 page_size;
> -	u8 bus_full_width;
> -	u8 dual_rank;
>  	u8 row_bits;
>  	u8 bank_bits;
> +};
> +
> +struct dram_para {
> +	u8 dual_rank;
> +	u8 bus_full_width;
> +	struct rank_para ranks[2];
>  	const u8 dx_read_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
>  	const u8 dx_write_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
>  	const u8 ac_delays[31];
> diff --git a/arch/arm/mach-sunxi/dram_sunxi_dw.c b/arch/arm/mach-sunxi/dram_sunxi_dw.c
> index d0600011ff..2b9d631d49 100644
> --- a/arch/arm/mach-sunxi/dram_sunxi_dw.c
> +++ b/arch/arm/mach-sunxi/dram_sunxi_dw.c
> @@ -399,11 +399,19 @@ static void mctl_set_cr(uint16_t socid, struct dram_para *para)
>  #else
>  #error Unsupported DRAM type!
>  #endif
> -	       (para->bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
> +	       (para->ranks[0].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
>  	       MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
>  	       (para->dual_rank ? MCTL_CR_DUAL_RANK : MCTL_CR_SINGLE_RANK) |
> -	       MCTL_CR_PAGE_SIZE(para->page_size) |
> -	       MCTL_CR_ROW_BITS(para->row_bits), &mctl_com->cr);
> +	       MCTL_CR_PAGE_SIZE(para->ranks[0].page_size) |
> +	       MCTL_CR_ROW_BITS(para->ranks[0].row_bits), &mctl_com->cr);
> +
> +	if (para->dual_rank && (socid == SOCID_A64 || socid == SOCID_R40)) {
> +		writel((para->ranks[1].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
> +		       MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
> +		       MCTL_CR_DUAL_RANK |
> +		       MCTL_CR_PAGE_SIZE(para->ranks[1].page_size) |
> +		       MCTL_CR_ROW_BITS(para->ranks[1].row_bits), &mctl_com->cr_r1);
> +	}
>  
>  	if (socid == SOCID_R40) {
>  		if (para->dual_rank)
> @@ -646,35 +654,63 @@ static int mctl_channel_init(uint16_t socid, struct dram_para *para)
>  	return 0;
>  }
>  
> -static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
> +/*
> + * Test if memory at offset offset matches memory at a certain base
> + */
> +static bool mctl_mem_matches_base(u32 offset, ulong base)
> +{
> +	/* Try to write different values to RAM at two addresses */
> +	writel(0, base);
> +	writel(0xaa55aa55, base + offset);
> +	dsb();
> +	/* Check if the same value is actually observed when reading back */
> +	return readl(base) ==
> +	       readl(base + offset);
> +}
> +
> +static void mctl_auto_detect_dram_size_rank(uint16_t socid, struct dram_para *para, ulong base, struct rank_para *rank)
>  {
>  	/* detect row address bits */
> -	para->page_size = 512;
> -	para->row_bits = 16;
> -	para->bank_bits = 2;
> +	rank->page_size = 512;
> +	rank->row_bits = 16;
> +	rank->bank_bits = 2;
>  	mctl_set_cr(socid, para);
>  
> -	for (para->row_bits = 11; para->row_bits < 16; para->row_bits++)
> -		if (mctl_mem_matches((1 << (para->row_bits + para->bank_bits)) * para->page_size))
> +	for (rank->row_bits = 11; rank->row_bits < 16; rank->row_bits++)
> +		if (mctl_mem_matches_base((1 << (rank->row_bits + rank->bank_bits)) * rank->page_size, base))
>  			break;
>  
>  	/* detect bank address bits */
> -	para->bank_bits = 3;
> +	rank->bank_bits = 3;
>  	mctl_set_cr(socid, para);
>  
> -	for (para->bank_bits = 2; para->bank_bits < 3; para->bank_bits++)
> -		if (mctl_mem_matches((1 << para->bank_bits) * para->page_size))
> +	for (rank->bank_bits = 2; rank->bank_bits < 3; rank->bank_bits++)
> +		if (mctl_mem_matches_base((1 << rank->bank_bits) * rank->page_size, base))
>  			break;
>  
>  	/* detect page size */
> -	para->page_size = 8192;
> +	rank->page_size = 8192;
>  	mctl_set_cr(socid, para);
>  
> -	for (para->page_size = 512; para->page_size < 8192; para->page_size *= 2)
> -		if (mctl_mem_matches(para->page_size))
> +	for (rank->page_size = 512; rank->page_size < 8192; rank->page_size *= 2)
> +		if (mctl_mem_matches_base(rank->page_size, base))
>  			break;
>  }
>  
> +static unsigned long mctl_calc_rank_size(struct rank_para *rank)
> +{
> +	return (1UL << (rank->row_bits + rank->bank_bits)) * rank->page_size;
> +}
> +
> +static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
> +{
> +	mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE, &para->ranks[0]);
> +
> +	if ((socid == SOCID_A64 || socid == SOCID_R40) && para->dual_rank) {
> +		mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE + mctl_calc_rank_size(&para->ranks[0]), &para->ranks[1]);
> +	}
> +}
> +
>  /*
>   * The actual values used here are taken from Allwinner provided boot0
>   * binaries, though they are probably board specific, so would likely benefit
> @@ -769,12 +805,23 @@ unsigned long sunxi_dram_init(void)
>  	struct sunxi_mctl_ctl_reg * const mctl_ctl =
>  			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>  
> +	unsigned long size;
> +
>  	struct dram_para para = {
>  		.dual_rank = 1,
>  		.bus_full_width = 1,
> -		.row_bits = 15,
> -		.bank_bits = 3,
> -		.page_size = 4096,
> +		.ranks = {
> +			{
> +				.row_bits = 15,
> +				.bank_bits = 3,
> +				.page_size = 4096,
> +			},
> +			{
> +				.row_bits = 15,
> +				.bank_bits = 3,
> +				.page_size = 4096,
> +			}
> +		},
>  
>  #if defined(CONFIG_MACH_SUN8I_H3)
>  		.dx_read_delays  = SUN8I_H3_DX_READ_DELAYS,
> @@ -846,6 +893,13 @@ unsigned long sunxi_dram_init(void)
>  	mctl_auto_detect_dram_size(socid, &para);
>  	mctl_set_cr(socid, &para);
>  
> -	return (1UL << (para.row_bits + para.bank_bits)) * para.page_size *
> -	       (para.dual_rank ? 2 : 1);
> +	size = mctl_calc_rank_size(&para.ranks[0]);
> +	if (socid == SOCID_A64 || socid == SOCID_R40) {
> +		if (para.dual_rank)
> +			size += mctl_calc_rank_size(&para.ranks[1]);
> +	} else if (para.dual_rank) {
> +		size *= 2;
> +	}
> +
> +	return size;
>  }
Peter Robinson March 18, 2021, 10:02 a.m. UTC | #2
On Thu, Feb 25, 2021 at 4:14 PM Icenowy Zheng <icenowy@aosc.io> wrote:
>
> Previously we have known that R40 has a configuration register for its
> rank 1, which allows different configuration than rank 0. Reverse
> engineering of newest libdram of A64 from Allwinner shows that A64 has
> this register too. It's bit 0 (which enables dual rank in rank 0
> configuration register) means a dedicated rank size setup is used for
> rank 1.
>
> Now, Pine64 scheduled to use a 3GiB LPDDR3 DRAM chip (which has 2GiB
> rank 0 and 1GiB rank 1) on PinePhone, that makes asymmetric dual rank
> DRAM support necessary.
>
> Add this support. The code could support both A64 and R40, but because
> dual rank detection is broken on R40 now, we cannot really use it on R40
> currently.
>
> Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Tested-by: Peter Robinson <pbrobinson@gmail.com>

Tested on Pinephone Braveheart and 3Gb variants plus a Pine64 and all
work as expected.

> ---
>  .../include/asm/arch-sunxi/dram_sunxi_dw.h    | 11 ++-
>  arch/arm/mach-sunxi/dram_sunxi_dw.c           | 94 +++++++++++++++----
>  2 files changed, 82 insertions(+), 23 deletions(-)
>
> diff --git a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> index a5a7ebde44..e843c14202 100644
> --- a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> +++ b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
> @@ -215,12 +215,17 @@ struct sunxi_mctl_ctl_reg {
>  #define NR_OF_BYTE_LANES       (32 / BITS_PER_BYTE)
>  /* The eight data lines (DQn) plus DM, DQS and DQSN */
>  #define LINES_PER_BYTE_LANE    (BITS_PER_BYTE + 3)
> -struct dram_para {
> +
> +struct rank_para {
>         u16 page_size;
> -       u8 bus_full_width;
> -       u8 dual_rank;
>         u8 row_bits;
>         u8 bank_bits;
> +};
> +
> +struct dram_para {
> +       u8 dual_rank;
> +       u8 bus_full_width;
> +       struct rank_para ranks[2];
>         const u8 dx_read_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
>         const u8 dx_write_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
>         const u8 ac_delays[31];
> diff --git a/arch/arm/mach-sunxi/dram_sunxi_dw.c b/arch/arm/mach-sunxi/dram_sunxi_dw.c
> index d0600011ff..2b9d631d49 100644
> --- a/arch/arm/mach-sunxi/dram_sunxi_dw.c
> +++ b/arch/arm/mach-sunxi/dram_sunxi_dw.c
> @@ -399,11 +399,19 @@ static void mctl_set_cr(uint16_t socid, struct dram_para *para)
>  #else
>  #error Unsupported DRAM type!
>  #endif
> -              (para->bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
> +              (para->ranks[0].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
>                MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
>                (para->dual_rank ? MCTL_CR_DUAL_RANK : MCTL_CR_SINGLE_RANK) |
> -              MCTL_CR_PAGE_SIZE(para->page_size) |
> -              MCTL_CR_ROW_BITS(para->row_bits), &mctl_com->cr);
> +              MCTL_CR_PAGE_SIZE(para->ranks[0].page_size) |
> +              MCTL_CR_ROW_BITS(para->ranks[0].row_bits), &mctl_com->cr);
> +
> +       if (para->dual_rank && (socid == SOCID_A64 || socid == SOCID_R40)) {
> +               writel((para->ranks[1].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
> +                      MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
> +                      MCTL_CR_DUAL_RANK |
> +                      MCTL_CR_PAGE_SIZE(para->ranks[1].page_size) |
> +                      MCTL_CR_ROW_BITS(para->ranks[1].row_bits), &mctl_com->cr_r1);
> +       }
>
>         if (socid == SOCID_R40) {
>                 if (para->dual_rank)
> @@ -646,35 +654,63 @@ static int mctl_channel_init(uint16_t socid, struct dram_para *para)
>         return 0;
>  }
>
> -static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
> +/*
> + * Test if memory at offset offset matches memory at a certain base
> + */
> +static bool mctl_mem_matches_base(u32 offset, ulong base)
> +{
> +       /* Try to write different values to RAM at two addresses */
> +       writel(0, base);
> +       writel(0xaa55aa55, base + offset);
> +       dsb();
> +       /* Check if the same value is actually observed when reading back */
> +       return readl(base) ==
> +              readl(base + offset);
> +}
> +
> +static void mctl_auto_detect_dram_size_rank(uint16_t socid, struct dram_para *para, ulong base, struct rank_para *rank)
>  {
>         /* detect row address bits */
> -       para->page_size = 512;
> -       para->row_bits = 16;
> -       para->bank_bits = 2;
> +       rank->page_size = 512;
> +       rank->row_bits = 16;
> +       rank->bank_bits = 2;
>         mctl_set_cr(socid, para);
>
> -       for (para->row_bits = 11; para->row_bits < 16; para->row_bits++)
> -               if (mctl_mem_matches((1 << (para->row_bits + para->bank_bits)) * para->page_size))
> +       for (rank->row_bits = 11; rank->row_bits < 16; rank->row_bits++)
> +               if (mctl_mem_matches_base((1 << (rank->row_bits + rank->bank_bits)) * rank->page_size, base))
>                         break;
>
>         /* detect bank address bits */
> -       para->bank_bits = 3;
> +       rank->bank_bits = 3;
>         mctl_set_cr(socid, para);
>
> -       for (para->bank_bits = 2; para->bank_bits < 3; para->bank_bits++)
> -               if (mctl_mem_matches((1 << para->bank_bits) * para->page_size))
> +       for (rank->bank_bits = 2; rank->bank_bits < 3; rank->bank_bits++)
> +               if (mctl_mem_matches_base((1 << rank->bank_bits) * rank->page_size, base))
>                         break;
>
>         /* detect page size */
> -       para->page_size = 8192;
> +       rank->page_size = 8192;
>         mctl_set_cr(socid, para);
>
> -       for (para->page_size = 512; para->page_size < 8192; para->page_size *= 2)
> -               if (mctl_mem_matches(para->page_size))
> +       for (rank->page_size = 512; rank->page_size < 8192; rank->page_size *= 2)
> +               if (mctl_mem_matches_base(rank->page_size, base))
>                         break;
>  }
>
> +static unsigned long mctl_calc_rank_size(struct rank_para *rank)
> +{
> +       return (1UL << (rank->row_bits + rank->bank_bits)) * rank->page_size;
> +}
> +
> +static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
> +{
> +       mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE, &para->ranks[0]);
> +
> +       if ((socid == SOCID_A64 || socid == SOCID_R40) && para->dual_rank) {
> +               mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE + mctl_calc_rank_size(&para->ranks[0]), &para->ranks[1]);
> +       }
> +}
> +
>  /*
>   * The actual values used here are taken from Allwinner provided boot0
>   * binaries, though they are probably board specific, so would likely benefit
> @@ -769,12 +805,23 @@ unsigned long sunxi_dram_init(void)
>         struct sunxi_mctl_ctl_reg * const mctl_ctl =
>                         (struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
>
> +       unsigned long size;
> +
>         struct dram_para para = {
>                 .dual_rank = 1,
>                 .bus_full_width = 1,
> -               .row_bits = 15,
> -               .bank_bits = 3,
> -               .page_size = 4096,
> +               .ranks = {
> +                       {
> +                               .row_bits = 15,
> +                               .bank_bits = 3,
> +                               .page_size = 4096,
> +                       },
> +                       {
> +                               .row_bits = 15,
> +                               .bank_bits = 3,
> +                               .page_size = 4096,
> +                       }
> +               },
>
>  #if defined(CONFIG_MACH_SUN8I_H3)
>                 .dx_read_delays  = SUN8I_H3_DX_READ_DELAYS,
> @@ -846,6 +893,13 @@ unsigned long sunxi_dram_init(void)
>         mctl_auto_detect_dram_size(socid, &para);
>         mctl_set_cr(socid, &para);
>
> -       return (1UL << (para.row_bits + para.bank_bits)) * para.page_size *
> -              (para.dual_rank ? 2 : 1);
> +       size = mctl_calc_rank_size(&para.ranks[0]);
> +       if (socid == SOCID_A64 || socid == SOCID_R40) {
> +               if (para.dual_rank)
> +                       size += mctl_calc_rank_size(&para.ranks[1]);
> +       } else if (para.dual_rank) {
> +               size *= 2;
> +       }
> +
> +       return size;
>  }
> --
> 2.30.0
diff mbox series

Patch

diff --git a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
index a5a7ebde44..e843c14202 100644
--- a/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
+++ b/arch/arm/include/asm/arch-sunxi/dram_sunxi_dw.h
@@ -215,12 +215,17 @@  struct sunxi_mctl_ctl_reg {
 #define NR_OF_BYTE_LANES	(32 / BITS_PER_BYTE)
 /* The eight data lines (DQn) plus DM, DQS and DQSN */
 #define LINES_PER_BYTE_LANE	(BITS_PER_BYTE + 3)
-struct dram_para {
+
+struct rank_para {
 	u16 page_size;
-	u8 bus_full_width;
-	u8 dual_rank;
 	u8 row_bits;
 	u8 bank_bits;
+};
+
+struct dram_para {
+	u8 dual_rank;
+	u8 bus_full_width;
+	struct rank_para ranks[2];
 	const u8 dx_read_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
 	const u8 dx_write_delays[NR_OF_BYTE_LANES][LINES_PER_BYTE_LANE];
 	const u8 ac_delays[31];
diff --git a/arch/arm/mach-sunxi/dram_sunxi_dw.c b/arch/arm/mach-sunxi/dram_sunxi_dw.c
index d0600011ff..2b9d631d49 100644
--- a/arch/arm/mach-sunxi/dram_sunxi_dw.c
+++ b/arch/arm/mach-sunxi/dram_sunxi_dw.c
@@ -399,11 +399,19 @@  static void mctl_set_cr(uint16_t socid, struct dram_para *para)
 #else
 #error Unsupported DRAM type!
 #endif
-	       (para->bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
+	       (para->ranks[0].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
 	       MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
 	       (para->dual_rank ? MCTL_CR_DUAL_RANK : MCTL_CR_SINGLE_RANK) |
-	       MCTL_CR_PAGE_SIZE(para->page_size) |
-	       MCTL_CR_ROW_BITS(para->row_bits), &mctl_com->cr);
+	       MCTL_CR_PAGE_SIZE(para->ranks[0].page_size) |
+	       MCTL_CR_ROW_BITS(para->ranks[0].row_bits), &mctl_com->cr);
+
+	if (para->dual_rank && (socid == SOCID_A64 || socid == SOCID_R40)) {
+		writel((para->ranks[1].bank_bits == 3 ? MCTL_CR_EIGHT_BANKS : MCTL_CR_FOUR_BANKS) |
+		       MCTL_CR_BUS_FULL_WIDTH(para->bus_full_width) |
+		       MCTL_CR_DUAL_RANK |
+		       MCTL_CR_PAGE_SIZE(para->ranks[1].page_size) |
+		       MCTL_CR_ROW_BITS(para->ranks[1].row_bits), &mctl_com->cr_r1);
+	}
 
 	if (socid == SOCID_R40) {
 		if (para->dual_rank)
@@ -646,35 +654,63 @@  static int mctl_channel_init(uint16_t socid, struct dram_para *para)
 	return 0;
 }
 
-static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
+/*
+ * Test if memory at offset offset matches memory at a certain base
+ */
+static bool mctl_mem_matches_base(u32 offset, ulong base)
+{
+	/* Try to write different values to RAM at two addresses */
+	writel(0, base);
+	writel(0xaa55aa55, base + offset);
+	dsb();
+	/* Check if the same value is actually observed when reading back */
+	return readl(base) ==
+	       readl(base + offset);
+}
+
+static void mctl_auto_detect_dram_size_rank(uint16_t socid, struct dram_para *para, ulong base, struct rank_para *rank)
 {
 	/* detect row address bits */
-	para->page_size = 512;
-	para->row_bits = 16;
-	para->bank_bits = 2;
+	rank->page_size = 512;
+	rank->row_bits = 16;
+	rank->bank_bits = 2;
 	mctl_set_cr(socid, para);
 
-	for (para->row_bits = 11; para->row_bits < 16; para->row_bits++)
-		if (mctl_mem_matches((1 << (para->row_bits + para->bank_bits)) * para->page_size))
+	for (rank->row_bits = 11; rank->row_bits < 16; rank->row_bits++)
+		if (mctl_mem_matches_base((1 << (rank->row_bits + rank->bank_bits)) * rank->page_size, base))
 			break;
 
 	/* detect bank address bits */
-	para->bank_bits = 3;
+	rank->bank_bits = 3;
 	mctl_set_cr(socid, para);
 
-	for (para->bank_bits = 2; para->bank_bits < 3; para->bank_bits++)
-		if (mctl_mem_matches((1 << para->bank_bits) * para->page_size))
+	for (rank->bank_bits = 2; rank->bank_bits < 3; rank->bank_bits++)
+		if (mctl_mem_matches_base((1 << rank->bank_bits) * rank->page_size, base))
 			break;
 
 	/* detect page size */
-	para->page_size = 8192;
+	rank->page_size = 8192;
 	mctl_set_cr(socid, para);
 
-	for (para->page_size = 512; para->page_size < 8192; para->page_size *= 2)
-		if (mctl_mem_matches(para->page_size))
+	for (rank->page_size = 512; rank->page_size < 8192; rank->page_size *= 2)
+		if (mctl_mem_matches_base(rank->page_size, base))
 			break;
 }
 
+static unsigned long mctl_calc_rank_size(struct rank_para *rank)
+{
+	return (1UL << (rank->row_bits + rank->bank_bits)) * rank->page_size;
+}
+
+static void mctl_auto_detect_dram_size(uint16_t socid, struct dram_para *para)
+{
+	mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE, &para->ranks[0]);
+
+	if ((socid == SOCID_A64 || socid == SOCID_R40) && para->dual_rank) {
+		mctl_auto_detect_dram_size_rank(socid, para, (ulong)CONFIG_SYS_SDRAM_BASE + mctl_calc_rank_size(&para->ranks[0]), &para->ranks[1]);
+	}
+}
+
 /*
  * The actual values used here are taken from Allwinner provided boot0
  * binaries, though they are probably board specific, so would likely benefit
@@ -769,12 +805,23 @@  unsigned long sunxi_dram_init(void)
 	struct sunxi_mctl_ctl_reg * const mctl_ctl =
 			(struct sunxi_mctl_ctl_reg *)SUNXI_DRAM_CTL0_BASE;
 
+	unsigned long size;
+
 	struct dram_para para = {
 		.dual_rank = 1,
 		.bus_full_width = 1,
-		.row_bits = 15,
-		.bank_bits = 3,
-		.page_size = 4096,
+		.ranks = {
+			{
+				.row_bits = 15,
+				.bank_bits = 3,
+				.page_size = 4096,
+			},
+			{
+				.row_bits = 15,
+				.bank_bits = 3,
+				.page_size = 4096,
+			}
+		},
 
 #if defined(CONFIG_MACH_SUN8I_H3)
 		.dx_read_delays  = SUN8I_H3_DX_READ_DELAYS,
@@ -846,6 +893,13 @@  unsigned long sunxi_dram_init(void)
 	mctl_auto_detect_dram_size(socid, &para);
 	mctl_set_cr(socid, &para);
 
-	return (1UL << (para.row_bits + para.bank_bits)) * para.page_size *
-	       (para.dual_rank ? 2 : 1);
+	size = mctl_calc_rank_size(&para.ranks[0]);
+	if (socid == SOCID_A64 || socid == SOCID_R40) {
+		if (para.dual_rank)
+			size += mctl_calc_rank_size(&para.ranks[1]);
+	} else if (para.dual_rank) {
+		size *= 2;
+	}
+
+	return size;
 }