Patchwork [V5,1/3] ARM: imx: add suspend in ocram support for i.mx6q

login
register
mail settings
Submitter Anson Huang
Date Jan. 14, 2014, 6:35 a.m.
Message ID <1389681315-10231-1-git-send-email-b20788@freescale.com>
Download mbox | patch
Permalink /patch/310457/
State New
Headers show

Comments

Anson Huang - Jan. 14, 2014, 6:35 a.m.
When system enter suspend, we can set the DDR IO to
high-Z state to save DDR IOs' power consumption, this
operation can save many power(from ~26mA@1.5V to ~15mA@1.5V,
measured on i.MX6Q SabreSD board, R25) of DDR IOs. To
achieve that, we need to copy the suspend code to ocram
and run the low level hardware related code(set DDR IOs
to high-Z state) in ocram.

If there is no ocram space available, then system will
still do suspend in external DDR, hence no DDR IOs will
be set to high-Z.

The OCRAM usage layout is as below,

ocram suspend region(4K currently):
Shawn Guo - Jan. 15, 2014, 3:43 a.m.
On Tue, Jan 14, 2014 at 02:35:13PM +0800, Anson Huang wrote:
> When system enter suspend, we can set the DDR IO to
> high-Z state to save DDR IOs' power consumption, this
> operation can save many power(from ~26mA@1.5V to ~15mA@1.5V,
> measured on i.MX6Q SabreSD board, R25) of DDR IOs. To
> achieve that, we need to copy the suspend code to ocram
> and run the low level hardware related code(set DDR IOs
> to high-Z state) in ocram.
> 
> If there is no ocram space available, then system will
> still do suspend in external DDR, hence no DDR IOs will
> be set to high-Z.
> 
> The OCRAM usage layout is as below,
> 
> ocram suspend region(4K currently):
> ======================== high address ======================
>                               .
>                               .
>                               .
>                               ^
>                               ^
>                               ^
>                       imx6_suspend code
>       reserved space(to make imx6_suspend aligned with 8)

We can remove this line now, right?  Same for the comment in code.

>              PM_INFO structure(imx6_cpu_pm_info)
> ======================== low address =======================
> 
> Signed-off-by: Anson Huang <b20788@freescale.com>

<snip>

> +ENTRY(imx6_suspend)
> +	ldr	r1, [r0, #PM_INFO_PBASE_OFFSET]
> +	ldr	r2, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
> +	ldr	r3, [r0, #PM_INFO_CPU_TYPE_OFFSET]
> +	ldr	r4, [r0, #PM_INFO_PM_INFO_SIZE_OFFSET]
> +
> +	/*
> +	 * counting the resume address in iram
> +	 * to set it in SRC register.
> +	 */
> +	ldr	r6, =imx6_suspend
> +	ldr	r7, =resume
> +	sub	r7, r7, r6
> +	add	r8, r1, r4
> +	add	r9, r8, r7
> +
> +	/*
> +	 * make sure TLB contain the addr we want,
> +	 * as we will access them after MMDC IO floated.
> +	 */
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
> +	ldr	r6, [r11, #0x0]
> +	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
> +	ldr	r6, [r11, #0x0]
> +
> +	/* use r11 to store the IO address */
> +	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_V_OFFSET]
> +	/* store physical resume addr and pm_info address. */
> +	str	r9, [r11, #MX6Q_SRC_GPR1]
> +	str	r1, [r11, #MX6Q_SRC_GPR2]
> +
> +	/* need to sync L2 cache before DSM. */
> +	sync_l2_cache
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
> +	/*
> +	 * put DDR explicitly into self-refresh and
> +	 * disable automatic power savings.
> +	 */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	orr	r7, r7, #0x1
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +
> +	/* make the DDR explicitly enter self-refresh. */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	orr	r7, r7, #(1 << 21)
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +
> +poll_dvfs_set_1:
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	ands	r7, r7, #(1 << 25)
> +	beq	poll_dvfs_set_1
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
> +	ldr	r6, =0x0
> +	ldr	r7, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
> +	ldr	r8, =PM_INFO_MMDC_IO_VAL_OFFSET

If we add the following instruction here

	add     r8, r0, r8

> +set_mmdc_io_lpm:
> +	ldr	r9, [r0, r8]

, it can be replaced by the following one
 
	ldr     r9, [r8], #0x8

> +	str	r6, [r11, r9]
> +	add	r8, r8, #0x8

, and then we can save this one in the loop, right?

> +	sub	r7, r7, #0x1
> +	cmp     r7, #0x0

The sequence of 'sub ...; cmp ..., #0' and 'and ...; cmp ..., #0' can
generally be replaced by 'subs ...' and 'ands' respectively to save one
instruction, right?

The above two comments apply to a few other places in the code.

> +	bne	set_mmdc_io_lpm
> +
> +	/*
> +	 * mask all GPC interrupts before
> +	 * enabling the RBC counters to
> +	 * avoid the counter starting too
> +	 * early if an interupt is already
> +	 * pending.
> +	 */
> +	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
> +	ldr	r6, [r11, #MX6Q_GPC_IMR1]
> +	ldr	r7, [r11, #MX6Q_GPC_IMR2]
> +	ldr	r8, [r11, #MX6Q_GPC_IMR3]
> +	ldr	r9, [r11, #MX6Q_GPC_IMR4]
> +
> +	ldr	r10, =0xffffffff
> +	str	r10, [r11, #MX6Q_GPC_IMR1]
> +	str	r10, [r11, #MX6Q_GPC_IMR2]
> +	str	r10, [r11, #MX6Q_GPC_IMR3]
> +	str	r10, [r11, #MX6Q_GPC_IMR4]
> +
> +	/*
> +	 * enable the RBC bypass counter here
> +	 * to hold off the interrupts. RBC counter
> +	 * = 32 (1ms), Minimum RBC delay should be
> +	 * 400us for the analog LDOs to power down.
> +	 */
> +	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
> +	ldr	r10, [r11, #MX6Q_CCM_CCR]
> +	bic	r10, r10, #(0x3f << 21)
> +	orr	r10, r10, #(0x20 << 21)
> +	str	r10, [r11, #MX6Q_CCM_CCR]
> +
> +	/* enable the counter. */
> +	ldr	r10, [r11, #MX6Q_CCM_CCR]
> +	orr	r10, r10, #(0x1 << 27)
> +	str	r10, [r11, #MX6Q_CCM_CCR]
> +
> +	/* unmask all the GPC interrupts. */
> +	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
> +	str	r6, [r11, #MX6Q_GPC_IMR1]
> +	str	r7, [r11, #MX6Q_GPC_IMR2]
> +	str	r8, [r11, #MX6Q_GPC_IMR3]
> +	str	r9, [r11, #MX6Q_GPC_IMR4]
> +
> +	/*
> +	 * now delay for a short while (3usec)
> +	 * ARM is at 1GHz at this point
> +	 * so a short loop should be enough.
> +	 * this delay is required to ensure that
> +	 * the RBC counter can start counting in
> +	 * case an interrupt is already pending
> +	 * or in case an interrupt arrives just
> +	 * as ARM is about to assert DSM_request.
> +	 */
> +	ldr     r6, =2000
> +rbc_loop:
> +	sub     r6, r6, #0x1
> +	cmp     r6, #0x0
> +	bne     rbc_loop
> +
> +	/* Zzz, enter stop mode */
> +	wfi
> +	nop
> +	nop
> +	nop
> +	nop
> +
> +	/*
> +	 * run to here means there is pending
> +	 * wakeup source, system should auto
> +	 * resume, we need to restore MMDC IO first
> +	 */

The MMDC restoring code looks identical between the case of wakeup
source pending and the normal resume case, except that the former runs
at virtual address and the later runs at the physical.  Can we make
a macro for it to save some code duplication?

Shawn

> +	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
> +	ldr	r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
> +	ldr	r7, =PM_INFO_MMDC_IO_VAL_OFFSET
> +restore_mmdc_io:
> +	ldr	r8, [r0, r7]
> +	add	r7, r7, #0x4
> +	ldr	r9, [r0, r7]
> +	add	r7, r7, #0x4
> +	str	r9, [r11, r8]
> +	sub	r6, r6, #0x1
> +	cmp     r6, #0x0
> +	bne	restore_mmdc_io
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
> +	/* let DDR out of self-refresh. */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	bic	r7, r7, #(1 << 21)
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +
> +poll_dvfs_clear_2:
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	ands	r7, r7, #(1 << 25)
> +	bne     poll_dvfs_clear_2
> +	/* enable DDR auto power saving */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	bic	r7, r7, #0x1
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	/* return to suspend finish */
> +	mov	pc, lr
> +
> +resume:
> +	/* invalidate L1 I-cache first */
> +	mov     r6, #0x0
> +	mcr     p15, 0, r6, c7, c5, 0
> +	mcr     p15, 0, r6, c7, c5, 0
> +	mcr     p15, 0, r6, c7, c5, 6
> +	/* enable the Icache and branch prediction */
> +	mov     r6, #0x1800
> +	mcr     p15, 0, r6, c1, c0, 0
> +	isb
> +
> +	/* get physical resume address from pm_info. */
> +	ldr	lr, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
> +	/* clear core0's entry and parameter */
> +	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_P_OFFSET]
> +	mov	r7, #0
> +	str	r7, [r11, #MX6Q_SRC_GPR1]
> +	str	r7, [r11, #MX6Q_SRC_GPR2]
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_P_OFFSET]
> +	ldr	r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
> +	ldr	r7, =PM_INFO_MMDC_IO_VAL_OFFSET
> +dsm_restore_mmdc_io:
> +	ldr	r8, [r0, r7]
> +	add	r7, r7, #0x4
> +	ldr	r9, [r0, r7]
> +	add	r7, r7, #0x4
> +	str	r9, [r11, r8]
> +	sub	r6, r6, #0x1
> +	cmp     r6, #0x0
> +	bne	dsm_restore_mmdc_io
> +
> +	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_P_OFFSET]
> +	/* let DDR out of self-refresh */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	bic	r7, r7, #(1 << 21)
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +
> +poll_dvfs_clear_1:
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	ands	r7, r7, #(1 << 25)
> +	bne	poll_dvfs_clear_1
> +	/* enable DDR auto power saving */
> +	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	bic	r7, r7, #0x1
> +	str	r7, [r11, #MX6Q_MMDC_MAPSR]
> +	mov	pc, lr
> +ENDPROC(imx6_suspend)
> -- 
> 1.7.9.5
> 
>
Anson.Huang@freescale.com - Jan. 15, 2014, 5:23 a.m.
Hi, Shawn
        these comments should be applicable, will optimize this asm code in V6, thanks.

Sent from Anson's iPhone

> 在 2014年1月15日,11:42,"Shawn Guo" <shawn.guo@linaro.org> 写道:

> 

>> On Tue, Jan 14, 2014 at 02:35:13PM +0800, Anson Huang wrote:

>> When system enter suspend, we can set the DDR IO to

>> high-Z state to save DDR IOs' power consumption, this

>> operation can save many power(from ~26mA@1.5V to ~15mA@1.5V,

>> measured on i.MX6Q SabreSD board, R25) of DDR IOs. To

>> achieve that, we need to copy the suspend code to ocram

>> and run the low level hardware related code(set DDR IOs

>> to high-Z state) in ocram.

>> 

>> If there is no ocram space available, then system will

>> still do suspend in external DDR, hence no DDR IOs will

>> be set to high-Z.

>> 

>> The OCRAM usage layout is as below,

>> 

>> ocram suspend region(4K currently):

>> ======================== high address ======================

>>                              .

>>                              .

>>                              .

>>                              ^

>>                              ^

>>                              ^

>>                      imx6_suspend code

>>      reserved space(to make imx6_suspend aligned with 8)

> 

> We can remove this line now, right?  Same for the comment in code.

> 

>>             PM_INFO structure(imx6_cpu_pm_info)

>> ======================== low address =======================

>> 

>> Signed-off-by: Anson Huang <b20788@freescale.com>

> 

> <snip>

> 

>> +ENTRY(imx6_suspend)

>> +    ldr    r1, [r0, #PM_INFO_PBASE_OFFSET]

>> +    ldr    r2, [r0, #PM_INFO_RESUME_ADDR_OFFSET]

>> +    ldr    r3, [r0, #PM_INFO_CPU_TYPE_OFFSET]

>> +    ldr    r4, [r0, #PM_INFO_PM_INFO_SIZE_OFFSET]

>> +

>> +    /*

>> +     * counting the resume address in iram

>> +     * to set it in SRC register.

>> +     */

>> +    ldr    r6, =imx6_suspend

>> +    ldr    r7, =resume

>> +    sub    r7, r7, r6

>> +    add    r8, r1, r4

>> +    add    r9, r8, r7

>> +

>> +    /*

>> +     * make sure TLB contain the addr we want,

>> +     * as we will access them after MMDC IO floated.

>> +     */

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]

>> +    ldr    r6, [r11, #0x0]

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]

>> +    ldr    r6, [r11, #0x0]

>> +

>> +    /* use r11 to store the IO address */

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_SRC_V_OFFSET]

>> +    /* store physical resume addr and pm_info address. */

>> +    str    r9, [r11, #MX6Q_SRC_GPR1]

>> +    str    r1, [r11, #MX6Q_SRC_GPR2]

>> +

>> +    /* need to sync L2 cache before DSM. */

>> +    sync_l2_cache

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]

>> +    /*

>> +     * put DDR explicitly into self-refresh and

>> +     * disable automatic power savings.

>> +     */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    orr    r7, r7, #0x1

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +

>> +    /* make the DDR explicitly enter self-refresh. */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    orr    r7, r7, #(1 << 21)

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +

>> +poll_dvfs_set_1:

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    ands    r7, r7, #(1 << 25)

>> +    beq    poll_dvfs_set_1

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]

>> +    ldr    r6, =0x0

>> +    ldr    r7, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]

>> +    ldr    r8, =PM_INFO_MMDC_IO_VAL_OFFSET

> 

> If we add the following instruction here

> 

>    add     r8, r0, r8

> 

>> +set_mmdc_io_lpm:

>> +    ldr    r9, [r0, r8]

> 

> , it can be replaced by the following one

> 

>    ldr     r9, [r8], #0x8

> 

>> +    str    r6, [r11, r9]

>> +    add    r8, r8, #0x8

> 

> , and then we can save this one in the loop, right?

> 

>> +    sub    r7, r7, #0x1

>> +    cmp     r7, #0x0

> 

> The sequence of 'sub ...; cmp ..., #0' and 'and ...; cmp ..., #0' can

> generally be replaced by 'subs ...' and 'ands' respectively to save one

> instruction, right?

> 

> The above two comments apply to a few other places in the code.

> 

>> +    bne    set_mmdc_io_lpm

>> +

>> +    /*

>> +     * mask all GPC interrupts before

>> +     * enabling the RBC counters to

>> +     * avoid the counter starting too

>> +     * early if an interupt is already

>> +     * pending.

>> +     */

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]

>> +    ldr    r6, [r11, #MX6Q_GPC_IMR1]

>> +    ldr    r7, [r11, #MX6Q_GPC_IMR2]

>> +    ldr    r8, [r11, #MX6Q_GPC_IMR3]

>> +    ldr    r9, [r11, #MX6Q_GPC_IMR4]

>> +

>> +    ldr    r10, =0xffffffff

>> +    str    r10, [r11, #MX6Q_GPC_IMR1]

>> +    str    r10, [r11, #MX6Q_GPC_IMR2]

>> +    str    r10, [r11, #MX6Q_GPC_IMR3]

>> +    str    r10, [r11, #MX6Q_GPC_IMR4]

>> +

>> +    /*

>> +     * enable the RBC bypass counter here

>> +     * to hold off the interrupts. RBC counter

>> +     * = 32 (1ms), Minimum RBC delay should be

>> +     * 400us for the analog LDOs to power down.

>> +     */

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]

>> +    ldr    r10, [r11, #MX6Q_CCM_CCR]

>> +    bic    r10, r10, #(0x3f << 21)

>> +    orr    r10, r10, #(0x20 << 21)

>> +    str    r10, [r11, #MX6Q_CCM_CCR]

>> +

>> +    /* enable the counter. */

>> +    ldr    r10, [r11, #MX6Q_CCM_CCR]

>> +    orr    r10, r10, #(0x1 << 27)

>> +    str    r10, [r11, #MX6Q_CCM_CCR]

>> +

>> +    /* unmask all the GPC interrupts. */

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]

>> +    str    r6, [r11, #MX6Q_GPC_IMR1]

>> +    str    r7, [r11, #MX6Q_GPC_IMR2]

>> +    str    r8, [r11, #MX6Q_GPC_IMR3]

>> +    str    r9, [r11, #MX6Q_GPC_IMR4]

>> +

>> +    /*

>> +     * now delay for a short while (3usec)

>> +     * ARM is at 1GHz at this point

>> +     * so a short loop should be enough.

>> +     * this delay is required to ensure that

>> +     * the RBC counter can start counting in

>> +     * case an interrupt is already pending

>> +     * or in case an interrupt arrives just

>> +     * as ARM is about to assert DSM_request.

>> +     */

>> +    ldr     r6, =2000

>> +rbc_loop:

>> +    sub     r6, r6, #0x1

>> +    cmp     r6, #0x0

>> +    bne     rbc_loop

>> +

>> +    /* Zzz, enter stop mode */

>> +    wfi

>> +    nop

>> +    nop

>> +    nop

>> +    nop

>> +

>> +    /*

>> +     * run to here means there is pending

>> +     * wakeup source, system should auto

>> +     * resume, we need to restore MMDC IO first

>> +     */

> 

> The MMDC restoring code looks identical between the case of wakeup

> source pending and the normal resume case, except that the former runs

> at virtual address and the later runs at the physical.  Can we make

> a macro for it to save some code duplication?

> 

> Shawn

> 

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]

>> +    ldr    r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]

>> +    ldr    r7, =PM_INFO_MMDC_IO_VAL_OFFSET

>> +restore_mmdc_io:

>> +    ldr    r8, [r0, r7]

>> +    add    r7, r7, #0x4

>> +    ldr    r9, [r0, r7]

>> +    add    r7, r7, #0x4

>> +    str    r9, [r11, r8]

>> +    sub    r6, r6, #0x1

>> +    cmp     r6, #0x0

>> +    bne    restore_mmdc_io

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]

>> +    /* let DDR out of self-refresh. */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    bic    r7, r7, #(1 << 21)

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +

>> +poll_dvfs_clear_2:

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    ands    r7, r7, #(1 << 25)

>> +    bne     poll_dvfs_clear_2

>> +    /* enable DDR auto power saving */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    bic    r7, r7, #0x1

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    /* return to suspend finish */

>> +    mov    pc, lr

>> +

>> +resume:

>> +    /* invalidate L1 I-cache first */

>> +    mov     r6, #0x0

>> +    mcr     p15, 0, r6, c7, c5, 0

>> +    mcr     p15, 0, r6, c7, c5, 0

>> +    mcr     p15, 0, r6, c7, c5, 6

>> +    /* enable the Icache and branch prediction */

>> +    mov     r6, #0x1800

>> +    mcr     p15, 0, r6, c1, c0, 0

>> +    isb

>> +

>> +    /* get physical resume address from pm_info. */

>> +    ldr    lr, [r0, #PM_INFO_RESUME_ADDR_OFFSET]

>> +    /* clear core0's entry and parameter */

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_SRC_P_OFFSET]

>> +    mov    r7, #0

>> +    str    r7, [r11, #MX6Q_SRC_GPR1]

>> +    str    r7, [r11, #MX6Q_SRC_GPR2]

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_IOMUXC_P_OFFSET]

>> +    ldr    r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]

>> +    ldr    r7, =PM_INFO_MMDC_IO_VAL_OFFSET

>> +dsm_restore_mmdc_io:

>> +    ldr    r8, [r0, r7]

>> +    add    r7, r7, #0x4

>> +    ldr    r9, [r0, r7]

>> +    add    r7, r7, #0x4

>> +    str    r9, [r11, r8]

>> +    sub    r6, r6, #0x1

>> +    cmp     r6, #0x0

>> +    bne    dsm_restore_mmdc_io

>> +

>> +    ldr    r11, [r0, #PM_INFO_MX6Q_MMDC_P_OFFSET]

>> +    /* let DDR out of self-refresh */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    bic    r7, r7, #(1 << 21)

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +

>> +poll_dvfs_clear_1:

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    ands    r7, r7, #(1 << 25)

>> +    bne    poll_dvfs_clear_1

>> +    /* enable DDR auto power saving */

>> +    ldr    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    bic    r7, r7, #0x1

>> +    str    r7, [r11, #MX6Q_MMDC_MAPSR]

>> +    mov    pc, lr

>> +ENDPROC(imx6_suspend)

>> -- 

>> 1.7.9.5

>

Patch

======================== high address ======================
                              .
                              .
                              .
                              ^
                              ^
                              ^
                      imx6_suspend code
      reserved space(to make imx6_suspend aligned with 8)
             PM_INFO structure(imx6_cpu_pm_info)
======================== low address =======================

Signed-off-by: Anson Huang <b20788@freescale.com>
---
Changes since V4:
    1. Create socdata to pass cpu type, mmdc io num,
       mmdc offset and compatible name for each soc, so that
       we can avoid cpu type check and dts change;
    2. Improve asm code function declaration to avoid cast.

 arch/arm/mach-imx/Makefile       |    3 +-
 arch/arm/mach-imx/common.h       |    8 +-
 arch/arm/mach-imx/hardware.h     |    4 +-
 arch/arm/mach-imx/mach-imx6q.c   |    2 +-
 arch/arm/mach-imx/mach-imx6sl.c  |    3 +-
 arch/arm/mach-imx/pm-imx6q.c     |  280 ++++++++++++++++++++++++++++++++++-
 arch/arm/mach-imx/suspend-imx6.S |  303 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 592 insertions(+), 11 deletions(-)
 create mode 100644 arch/arm/mach-imx/suspend-imx6.S

diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile
index befcaf5..3d96a45 100644
--- a/arch/arm/mach-imx/Makefile
+++ b/arch/arm/mach-imx/Makefile
@@ -102,7 +102,8 @@  obj-$(CONFIG_SOC_IMX6Q) += clk-imx6q.o mach-imx6q.o
 obj-$(CONFIG_SOC_IMX6SL) += clk-imx6sl.o mach-imx6sl.o
 
 ifeq ($(CONFIG_PM),y)
-obj-$(CONFIG_SOC_IMX6Q) += pm-imx6q.o headsmp.o
+AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a
+obj-$(CONFIG_SOC_IMX6Q) += pm-imx6q.o headsmp.o suspend-imx6.o
 # i.MX6SL reuses i.MX6Q code
 obj-$(CONFIG_SOC_IMX6SL) += pm-imx6q.o headsmp.o
 endif
diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h
index 4f4a95c..a363e71 100644
--- a/arch/arm/mach-imx/common.h
+++ b/arch/arm/mach-imx/common.h
@@ -1,5 +1,5 @@ 
 /*
- * Copyright 2004-2013 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2004-2014 Freescale Semiconductor, Inc. All Rights Reserved.
  */
 
 /*
@@ -145,11 +145,17 @@  void imx_cpu_die(unsigned int cpu);
 int imx_cpu_kill(unsigned int cpu);
 
 #ifdef CONFIG_PM
+void imx6_suspend(void __iomem *ocram_vbase);
 void imx6q_pm_init(void);
+void imx6dl_pm_init(void);
+void imx6sl_pm_init(void);
 void imx6q_pm_set_ccm_base(void __iomem *base);
 void imx5_pm_init(void);
 #else
+static inline void imx6_suspend(void __iomem *ocram_vbase) {}
 static inline void imx6q_pm_init(void) {}
+static inline void imx6dl_pm_init(void) {}
+static inline void imx6sl_pm_init(void) {}
 static inline void imx6q_pm_set_ccm_base(void __iomem *base) {}
 static inline void imx5_pm_init(void) {}
 #endif
diff --git a/arch/arm/mach-imx/hardware.h b/arch/arm/mach-imx/hardware.h
index a3b0b04..abf43bb 100644
--- a/arch/arm/mach-imx/hardware.h
+++ b/arch/arm/mach-imx/hardware.h
@@ -1,5 +1,5 @@ 
 /*
- * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ * Copyright 2004-2007, 2014 Freescale Semiconductor, Inc. All Rights Reserved.
  * Copyright 2008 Juergen Beisert, kernel@pengutronix.de
  *
  * This program is free software; you can redistribute it and/or
@@ -20,7 +20,9 @@ 
 #ifndef __ASM_ARCH_MXC_HARDWARE_H__
 #define __ASM_ARCH_MXC_HARDWARE_H__
 
+#ifndef __ASSEMBLY__
 #include <asm/io.h>
+#endif
 #include <asm/sizes.h>
 
 #define addr_in_module(addr, mod) \
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index e51e3da..e629593 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -212,7 +212,7 @@  static void __init imx6q_init_machine(void)
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, parent);
 
 	imx_anatop_init();
-	imx6q_pm_init();
+	cpu_is_imx6q() ?  imx6q_pm_init() : imx6dl_pm_init();
 	imx6q_1588_init();
 }
 
diff --git a/arch/arm/mach-imx/mach-imx6sl.c b/arch/arm/mach-imx/mach-imx6sl.c
index 0f4fd4c..aa873d8 100644
--- a/arch/arm/mach-imx/mach-imx6sl.c
+++ b/arch/arm/mach-imx/mach-imx6sl.c
@@ -55,8 +55,7 @@  static void __init imx6sl_init_machine(void)
 
 	imx6sl_fec_init();
 	imx_anatop_init();
-	/* Reuse imx6q pm code */
-	imx6q_pm_init();
+	imx6sl_pm_init();
 }
 
 static void __init imx6sl_init_irq(void)
diff --git a/arch/arm/mach-imx/pm-imx6q.c b/arch/arm/mach-imx/pm-imx6q.c
index d45acc0..fb18de7 100644
--- a/arch/arm/mach-imx/pm-imx6q.c
+++ b/arch/arm/mach-imx/pm-imx6q.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright 2011-2013 Freescale Semiconductor, Inc.
+ * Copyright 2011-2014 Freescale Semiconductor, Inc.
  * Copyright 2011 Linaro Ltd.
  *
  * The code contained herein is licensed under the GNU General Public
@@ -14,16 +14,19 @@ 
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/genalloc.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
 #include <linux/regmap.h>
 #include <linux/suspend.h>
 #include <asm/cacheflush.h>
+#include <asm/fncpy.h>
 #include <asm/proc-fns.h>
 #include <asm/suspend.h>
-#include <asm/hardware/cache-l2x0.h>
+#include <asm/tlb.h>
 
 #include "common.h"
 #include "hardware.h"
@@ -58,7 +61,84 @@ 
 #define CGPR				0x64
 #define BM_CGPR_INT_MEM_CLK_LPM		(0x1 << 17)
 
+#define MX6Q_SUSPEND_OCRAM_SIZE		0x1000
+#define MX6_MAX_MMDC_IO_NUM		33
+
 static void __iomem *ccm_base;
+static void __iomem *suspend_ocram_base;
+static void (*imx6_suspend_in_ocram_fn)(void __iomem *ocram_vbase);
+
+/*
+ * suspend ocram space layout:
+ * ======================== high address ======================
+ *                              .
+ *                              .
+ *                              .
+ *                              ^
+ *                              ^
+ *                              ^
+ *                      imx6_suspend code
+ *      reserved space(to make imx6_suspend aligned with 8)
+ *              PM_INFO structure(imx6_cpu_pm_info)
+ * ======================== low address =======================
+ */
+
+struct imx6_pm_base {
+	phys_addr_t pbase;
+	void __iomem *vbase;
+};
+
+struct imx6_pm_socdata {
+	u32 cpu_type;
+	const char *mmdc_compat;
+	const char *src_compat;
+	const char *iomuxc_compat;
+	const char *gpc_compat;
+	const u32 mmdc_io_num;
+	const u32 mmdc_io_offset[MX6_MAX_MMDC_IO_NUM];
+};
+
+static const struct imx6_pm_socdata imx6q_pm_data __initconst = {
+	.cpu_type = MXC_CPU_IMX6Q,
+	.mmdc_compat = "fsl,imx6q-mmdc",
+	.src_compat = "fsl,imx6q-src",
+	.iomuxc_compat = "fsl,imx6q-iomuxc",
+	.gpc_compat = "fsl,imx6q-gpc",
+	.mmdc_io_num = 33,
+	.mmdc_io_offset = {
+		0x5ac, 0x5b4, 0x528, 0x520, /* DQM0 ~ DQM3 */
+		0x514, 0x510, 0x5bc, 0x5c4, /* DQM4 ~ DQM7 */
+		0x56c, 0x578, 0x588, 0x594, /* CAS, RAS, SDCLK_0, SDCLK_1 */
+		0x5a8, 0x5b0, 0x524, 0x51c, /* SDQS0 ~ SDQS3 */
+		0x518, 0x50c, 0x5b8, 0x5c0, /* SDQS4 ~ SDQS7 */
+		0x784, 0x788, 0x794, 0x79c, /* GPR_B0DS ~ GPR_B3DS */
+		0x7a0, 0x7a4, 0x7a8, 0x748, /* GPR_B4DS ~ GPR_B7DS */
+		0x59c, 0x5a0, 0x750, 0x774, /* SODT0, SODT1, MODE_CTL, MODE */
+		0x74c,			    /* GPR_ADDS */
+	},
+};
+
+/*
+ * This structure is for passing necessary data for low level ocram
+ * suspend code(arch/arm/mach-imx/suspend-imx6.S), if this struct
+ * definition is changed, the offset definition in
+ * arch/arm/mach-imx/suspend-imx6.S must be also changed accordingly,
+ * otherwise, the suspend to ocram fucntion will be broken!
+ */
+struct imx6_cpu_pm_info {
+	phys_addr_t pbase; /* The physical address of pm_info. */
+	phys_addr_t resume_addr; /* The physical resume address for asm code */
+	u32 cpu_type;
+	u32 pm_info_size; /* Size of pm_info. */
+	struct imx6_pm_base mmdc_base;
+	struct imx6_pm_base src_base;
+	struct imx6_pm_base iomuxc_base;
+	struct imx6_pm_base ccm_base;
+	struct imx6_pm_base gpc_base;
+	struct imx6_pm_base l2_base;
+	u32 mmdc_io_num; /* Number of MMDC IOs which need saved/restored. */
+	u32 mmdc_io_val[MX6_MAX_MMDC_IO_NUM][2]; /* To save offset and value */
+} __aligned(8);
 
 void imx6q_set_int_mem_clk_lpm(void)
 {
@@ -177,7 +257,17 @@  int imx6q_set_lpm(enum mxc_cpu_pwr_mode mode)
 
 static int imx6q_suspend_finish(unsigned long val)
 {
-	cpu_do_idle();
+	if (!imx6_suspend_in_ocram_fn) {
+		cpu_do_idle();
+	} else {
+		/*
+		 * call low level suspend function in ocram,
+		 * as we need to float DDR IO.
+		 */
+		local_flush_tlb_all();
+		imx6_suspend_in_ocram_fn(suspend_ocram_base);
+	}
+
 	return 0;
 }
 
@@ -187,7 +277,12 @@  static int imx6q_pm_enter(suspend_state_t state)
 	case PM_SUSPEND_MEM:
 		imx6q_set_lpm(STOP_POWER_OFF);
 		imx6q_enable_wb(true);
-		imx6q_enable_rbc(true);
+		/*
+		 * For suspend into ocram, asm code already take care of
+		 * RBC setting, so we do NOT need to do that here.
+		 */
+		if (!imx6_suspend_in_ocram_fn)
+			imx6q_enable_rbc(true);
 		imx_gpc_pre_suspend();
 		imx_anatop_pre_suspend();
 		imx_set_cpu_jump(0, v7_cpu_resume);
@@ -218,12 +313,172 @@  void __init imx6q_pm_set_ccm_base(void __iomem *base)
 	ccm_base = base;
 }
 
-void __init imx6q_pm_init(void)
+static int __init imx6_pm_get_base(struct imx6_pm_base *base,
+				const char *compat)
+{
+	struct device_node *node;
+	struct resource res;
+	int ret = 0;
+
+	node = of_find_compatible_node(NULL, NULL, compat);
+	if (!node) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = of_address_to_resource(node, 0, &res);
+	if (ret)
+		goto put_node;
+
+	base->pbase = res.start;
+	base->vbase = ioremap(res.start, resource_size(&res));
+	if (!base->vbase)
+		ret = -ENOMEM;
+
+put_node:
+	of_node_put(node);
+out:
+	return ret;
+}
+
+static int __init imx6q_ocram_suspend_init(const struct imx6_pm_socdata
+					*socdata)
+{
+	phys_addr_t ocram_pbase;
+	struct device_node *node;
+	struct platform_device *pdev;
+	struct imx6_cpu_pm_info *pm_info;
+	struct gen_pool *ocram_pool;
+	unsigned long ocram_base;
+	int i, ret = 0;
+	const u32 *mmdc_offset_array;
+
+	if (!socdata) {
+		pr_warn("%s: invalid argument!\n", __func__);
+		return -EINVAL;
+	}
+
+	node = of_find_compatible_node(NULL, NULL, "mmio-sram");
+	if (!node) {
+		pr_warn("%s: failed to find ocram node!\n", __func__);
+		return -ENODEV;
+	}
+
+	pdev = of_find_device_by_node(node);
+	if (!pdev) {
+		pr_warn("%s: failed to find ocram device!\n", __func__);
+		ret = -ENODEV;
+		goto put_node;
+	}
+
+	ocram_pool = dev_get_gen_pool(&pdev->dev);
+	if (!ocram_pool) {
+		pr_warn("%s: ocram pool unavailable!\n", __func__);
+		ret = -ENODEV;
+		goto put_node;
+	}
+
+	ocram_base = gen_pool_alloc(ocram_pool, MX6Q_SUSPEND_OCRAM_SIZE);
+	if (!ocram_base) {
+		pr_warn("%s: unable to alloc ocram!\n", __func__);
+		ret = -ENOMEM;
+		goto put_node;
+	}
+
+	ocram_pbase = gen_pool_virt_to_phys(ocram_pool, ocram_base);
+
+	suspend_ocram_base = __arm_ioremap_exec(ocram_pbase,
+		MX6Q_SUSPEND_OCRAM_SIZE, false);
+
+	pm_info = suspend_ocram_base;
+	pm_info->pbase = ocram_pbase;
+	pm_info->resume_addr = virt_to_phys(v7_cpu_resume);
+	pm_info->pm_info_size = sizeof(*pm_info);
+
+	/*
+	 * ccm physical address is not used by asm code currently,
+	 * so get ccm virtual address directly, as we already have
+	 * it from ccm driver.
+	 */
+	pm_info->ccm_base.vbase = ccm_base;
+
+	ret = imx6_pm_get_base(&pm_info->mmdc_base, socdata->mmdc_compat);
+	if (ret) {
+		pr_warn("%s: failed to get mmdc base %d!\n", __func__, ret);
+		goto put_node;
+	}
+
+	ret = imx6_pm_get_base(&pm_info->src_base, socdata->src_compat);
+	if (ret) {
+		pr_warn("%s: failed to get src base %d!\n", __func__, ret);
+		goto src_map_failed;
+	}
+
+	ret = imx6_pm_get_base(&pm_info->iomuxc_base, socdata->iomuxc_compat);
+	if (ret) {
+		pr_warn("%s: failed to get iomuxc base %d!\n", __func__, ret);
+		goto iomuxc_map_failed;
+	}
+
+	ret = imx6_pm_get_base(&pm_info->gpc_base, socdata->gpc_compat);
+	if (ret) {
+		pr_warn("%s: failed to get gpc base %d!\n", __func__, ret);
+		goto gpc_map_failed;
+	}
+
+	ret = imx6_pm_get_base(&pm_info->l2_base, "arm,pl310-cache");
+	if (ret) {
+		pr_warn("%s: failed to get pl310-cache base %d!\n",
+			__func__, ret);
+		goto pl310_cache_map_failed;
+	}
+
+	pm_info->cpu_type = socdata->cpu_type;
+	pm_info->mmdc_io_num = socdata->mmdc_io_num;
+	mmdc_offset_array = socdata->mmdc_io_offset;
+
+	for (i = 0; i < pm_info->mmdc_io_num; i++) {
+		pm_info->mmdc_io_val[i][0] =
+			mmdc_offset_array[i];
+		pm_info->mmdc_io_val[i][1] =
+			readl_relaxed(pm_info->iomuxc_base.vbase +
+			mmdc_offset_array[i]);
+	}
+
+	imx6_suspend_in_ocram_fn = fncpy(
+		suspend_ocram_base + sizeof(*pm_info),
+		&imx6_suspend,
+		MX6Q_SUSPEND_OCRAM_SIZE - sizeof(*pm_info));
+
+	goto put_node;
+
+pl310_cache_map_failed:
+	iounmap(&pm_info->gpc_base.vbase);
+gpc_map_failed:
+	iounmap(&pm_info->iomuxc_base.vbase);
+iomuxc_map_failed:
+	iounmap(&pm_info->src_base.vbase);
+src_map_failed:
+	iounmap(&pm_info->mmdc_base.vbase);
+put_node:
+	of_node_put(node);
+
+	return ret;
+}
+
+static void __init imx6_pm_common_init(const struct imx6_pm_socdata
+					*socdata)
 {
 	struct regmap *gpr;
+	int ret;
 
 	WARN_ON(!ccm_base);
 
+	ret = imx6q_ocram_suspend_init(socdata);
+	if (ret)
+		pr_warn("%s: failed to initialize ocram suspend %d!\n",
+			__func__, ret);
+
 	/*
 	 * This is for SW workaround step #1 of ERR007265, see comments
 	 * in imx6q_set_lpm for details of this errata.
@@ -241,3 +496,18 @@  void __init imx6q_pm_init(void)
 
 	suspend_set_ops(&imx6q_pm_ops);
 }
+
+void __init imx6q_pm_init(void)
+{
+	imx6_pm_common_init(&imx6q_pm_data);
+}
+
+void __init imx6dl_pm_init(void)
+{
+	imx6_pm_common_init(NULL);
+}
+
+void __init imx6sl_pm_init(void)
+{
+	imx6_pm_common_init(NULL);
+}
diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S
new file mode 100644
index 0000000..1384bb2
--- /dev/null
+++ b/arch/arm/mach-imx/suspend-imx6.S
@@ -0,0 +1,303 @@ 
+/*
+ * Copyright 2014 Freescale Semiconductor, Inc.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ * http://www.opensource.org/licenses/gpl-license.html
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+#include <linux/linkage.h>
+#include <asm/hardware/cache-l2x0.h>
+#include "hardware.h"
+
+/*
+ * ==================== low level suspend ====================
+ *
+ * Better to follow below rules to use ARM registers:
+ * r0: pm_info structure address;
+ * r1 ~ r5: for saving pm_info members;
+ * r6 ~ r10: free registers;
+ * r11: io base address.
+ *
+ * suspend ocram space layout:
+ * ======================== high address ======================
+ *                              .
+ *                              .
+ *                              .
+ *                              ^
+ *                              ^
+ *                              ^
+ *                      imx6_suspend code
+ *      reserved space(to make imx6_suspend aligned with 8)
+ *              PM_INFO structure(imx6_cpu_pm_info)
+ * ======================== low address =======================
+ */
+
+/*
+ * Below offsets are based on struct imx6_cpu_pm_info
+ * which defined in arch/arm/mach-imx/pm-imx6q.c, this
+ * structure contains necessary pm info for low level
+ * suspend related code.
+ */
+#define PM_INFO_PBASE_OFFSET			0x0
+#define PM_INFO_RESUME_ADDR_OFFSET		0x4
+#define PM_INFO_CPU_TYPE_OFFSET			0x8
+#define PM_INFO_PM_INFO_SIZE_OFFSET		0xC
+#define PM_INFO_MX6Q_MMDC_P_OFFSET		0x10
+#define PM_INFO_MX6Q_MMDC_V_OFFSET		0x14
+#define PM_INFO_MX6Q_SRC_P_OFFSET		0x18
+#define PM_INFO_MX6Q_SRC_V_OFFSET		0x1C
+#define PM_INFO_MX6Q_IOMUXC_P_OFFSET		0x20
+#define PM_INFO_MX6Q_IOMUXC_V_OFFSET		0x24
+#define PM_INFO_MX6Q_CCM_P_OFFSET		0x28
+#define PM_INFO_MX6Q_CCM_V_OFFSET		0x2C
+#define PM_INFO_MX6Q_GPC_P_OFFSET		0x30
+#define PM_INFO_MX6Q_GPC_V_OFFSET		0x34
+#define PM_INFO_MX6Q_L2_P_OFFSET		0x38
+#define PM_INFO_MX6Q_L2_V_OFFSET		0x3C
+#define PM_INFO_MMDC_IO_NUM_OFFSET		0x40
+#define PM_INFO_MMDC_IO_VAL_OFFSET		0x44
+
+#define MX6Q_SRC_GPR1	0x20
+#define MX6Q_SRC_GPR2	0x24
+#define MX6Q_MMDC_MAPSR	0x404
+#define MX6Q_GPC_IMR1	0x08
+#define MX6Q_GPC_IMR2	0x0c
+#define MX6Q_GPC_IMR3	0x10
+#define MX6Q_GPC_IMR4	0x14
+#define MX6Q_CCM_CCR	0x0
+
+	.align 3
+
+	.macro  sync_l2_cache
+
+	/* sync L2 cache to drain L2's buffers to DRAM. */
+#ifdef CONFIG_CACHE_L2X0
+	ldr	r11, [r0, #PM_INFO_MX6Q_L2_V_OFFSET]
+	mov	r6, #0x0
+	str	r6, [r11, #L2X0_CACHE_SYNC]
+1:
+	ldr	r6, [r11, #L2X0_CACHE_SYNC]
+	ands	r6, r6, #0x1
+	bne	1b
+#endif
+	.endm
+
+ENTRY(imx6_suspend)
+	ldr	r1, [r0, #PM_INFO_PBASE_OFFSET]
+	ldr	r2, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
+	ldr	r3, [r0, #PM_INFO_CPU_TYPE_OFFSET]
+	ldr	r4, [r0, #PM_INFO_PM_INFO_SIZE_OFFSET]
+
+	/*
+	 * counting the resume address in iram
+	 * to set it in SRC register.
+	 */
+	ldr	r6, =imx6_suspend
+	ldr	r7, =resume
+	sub	r7, r7, r6
+	add	r8, r1, r4
+	add	r9, r8, r7
+
+	/*
+	 * make sure TLB contain the addr we want,
+	 * as we will access them after MMDC IO floated.
+	 */
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
+	ldr	r6, [r11, #0x0]
+	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
+	ldr	r6, [r11, #0x0]
+
+	/* use r11 to store the IO address */
+	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_V_OFFSET]
+	/* store physical resume addr and pm_info address. */
+	str	r9, [r11, #MX6Q_SRC_GPR1]
+	str	r1, [r11, #MX6Q_SRC_GPR2]
+
+	/* need to sync L2 cache before DSM. */
+	sync_l2_cache
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
+	/*
+	 * put DDR explicitly into self-refresh and
+	 * disable automatic power savings.
+	 */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	orr	r7, r7, #0x1
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+
+	/* make the DDR explicitly enter self-refresh. */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	orr	r7, r7, #(1 << 21)
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+
+poll_dvfs_set_1:
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	ands	r7, r7, #(1 << 25)
+	beq	poll_dvfs_set_1
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
+	ldr	r6, =0x0
+	ldr	r7, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
+	ldr	r8, =PM_INFO_MMDC_IO_VAL_OFFSET
+set_mmdc_io_lpm:
+	ldr	r9, [r0, r8]
+	str	r6, [r11, r9]
+	add	r8, r8, #0x8
+	sub	r7, r7, #0x1
+	cmp     r7, #0x0
+	bne	set_mmdc_io_lpm
+
+	/*
+	 * mask all GPC interrupts before
+	 * enabling the RBC counters to
+	 * avoid the counter starting too
+	 * early if an interupt is already
+	 * pending.
+	 */
+	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
+	ldr	r6, [r11, #MX6Q_GPC_IMR1]
+	ldr	r7, [r11, #MX6Q_GPC_IMR2]
+	ldr	r8, [r11, #MX6Q_GPC_IMR3]
+	ldr	r9, [r11, #MX6Q_GPC_IMR4]
+
+	ldr	r10, =0xffffffff
+	str	r10, [r11, #MX6Q_GPC_IMR1]
+	str	r10, [r11, #MX6Q_GPC_IMR2]
+	str	r10, [r11, #MX6Q_GPC_IMR3]
+	str	r10, [r11, #MX6Q_GPC_IMR4]
+
+	/*
+	 * enable the RBC bypass counter here
+	 * to hold off the interrupts. RBC counter
+	 * = 32 (1ms), Minimum RBC delay should be
+	 * 400us for the analog LDOs to power down.
+	 */
+	ldr	r11, [r0, #PM_INFO_MX6Q_CCM_V_OFFSET]
+	ldr	r10, [r11, #MX6Q_CCM_CCR]
+	bic	r10, r10, #(0x3f << 21)
+	orr	r10, r10, #(0x20 << 21)
+	str	r10, [r11, #MX6Q_CCM_CCR]
+
+	/* enable the counter. */
+	ldr	r10, [r11, #MX6Q_CCM_CCR]
+	orr	r10, r10, #(0x1 << 27)
+	str	r10, [r11, #MX6Q_CCM_CCR]
+
+	/* unmask all the GPC interrupts. */
+	ldr	r11, [r0, #PM_INFO_MX6Q_GPC_V_OFFSET]
+	str	r6, [r11, #MX6Q_GPC_IMR1]
+	str	r7, [r11, #MX6Q_GPC_IMR2]
+	str	r8, [r11, #MX6Q_GPC_IMR3]
+	str	r9, [r11, #MX6Q_GPC_IMR4]
+
+	/*
+	 * now delay for a short while (3usec)
+	 * ARM is at 1GHz at this point
+	 * so a short loop should be enough.
+	 * this delay is required to ensure that
+	 * the RBC counter can start counting in
+	 * case an interrupt is already pending
+	 * or in case an interrupt arrives just
+	 * as ARM is about to assert DSM_request.
+	 */
+	ldr     r6, =2000
+rbc_loop:
+	sub     r6, r6, #0x1
+	cmp     r6, #0x0
+	bne     rbc_loop
+
+	/* Zzz, enter stop mode */
+	wfi
+	nop
+	nop
+	nop
+	nop
+
+	/*
+	 * run to here means there is pending
+	 * wakeup source, system should auto
+	 * resume, we need to restore MMDC IO first
+	 */
+	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_V_OFFSET]
+	ldr	r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
+	ldr	r7, =PM_INFO_MMDC_IO_VAL_OFFSET
+restore_mmdc_io:
+	ldr	r8, [r0, r7]
+	add	r7, r7, #0x4
+	ldr	r9, [r0, r7]
+	add	r7, r7, #0x4
+	str	r9, [r11, r8]
+	sub	r6, r6, #0x1
+	cmp     r6, #0x0
+	bne	restore_mmdc_io
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_V_OFFSET]
+	/* let DDR out of self-refresh. */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	bic	r7, r7, #(1 << 21)
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+
+poll_dvfs_clear_2:
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	ands	r7, r7, #(1 << 25)
+	bne     poll_dvfs_clear_2
+	/* enable DDR auto power saving */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	bic	r7, r7, #0x1
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+	/* return to suspend finish */
+	mov	pc, lr
+
+resume:
+	/* invalidate L1 I-cache first */
+	mov     r6, #0x0
+	mcr     p15, 0, r6, c7, c5, 0
+	mcr     p15, 0, r6, c7, c5, 0
+	mcr     p15, 0, r6, c7, c5, 6
+	/* enable the Icache and branch prediction */
+	mov     r6, #0x1800
+	mcr     p15, 0, r6, c1, c0, 0
+	isb
+
+	/* get physical resume address from pm_info. */
+	ldr	lr, [r0, #PM_INFO_RESUME_ADDR_OFFSET]
+	/* clear core0's entry and parameter */
+	ldr	r11, [r0, #PM_INFO_MX6Q_SRC_P_OFFSET]
+	mov	r7, #0
+	str	r7, [r11, #MX6Q_SRC_GPR1]
+	str	r7, [r11, #MX6Q_SRC_GPR2]
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_IOMUXC_P_OFFSET]
+	ldr	r6, [r0, #PM_INFO_MMDC_IO_NUM_OFFSET]
+	ldr	r7, =PM_INFO_MMDC_IO_VAL_OFFSET
+dsm_restore_mmdc_io:
+	ldr	r8, [r0, r7]
+	add	r7, r7, #0x4
+	ldr	r9, [r0, r7]
+	add	r7, r7, #0x4
+	str	r9, [r11, r8]
+	sub	r6, r6, #0x1
+	cmp     r6, #0x0
+	bne	dsm_restore_mmdc_io
+
+	ldr	r11, [r0, #PM_INFO_MX6Q_MMDC_P_OFFSET]
+	/* let DDR out of self-refresh */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	bic	r7, r7, #(1 << 21)
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+
+poll_dvfs_clear_1:
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	ands	r7, r7, #(1 << 25)
+	bne	poll_dvfs_clear_1
+	/* enable DDR auto power saving */
+	ldr	r7, [r11, #MX6Q_MMDC_MAPSR]
+	bic	r7, r7, #0x1
+	str	r7, [r11, #MX6Q_MMDC_MAPSR]
+	mov	pc, lr
+ENDPROC(imx6_suspend)