diff mbox series

[U-Boot,097/126] x86: Add support for newer CAR schemes

Message ID 20190925145750.200592-98-sjg@chromium.org
State Superseded
Delegated to: Bin Meng
Headers show
Series x86: Add initial support for apollolake | expand

Commit Message

Simon Glass Sept. 25, 2019, 2:57 p.m. UTC
Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
Add support for these along with suitable configuration options.

Signed-off-by: Simon Glass <sjg@chromium.org>
---

 arch/x86/Kconfig                        |  16 +
 arch/x86/cpu/intel_common/Kconfig       |  18 +
 arch/x86/cpu/intel_common/Makefile      |   8 +
 arch/x86/cpu/intel_common/car2.S        | 490 ++++++++++++++++++++++++
 arch/x86/cpu/intel_common/car2_uninit.S |  87 +++++
 5 files changed, 619 insertions(+)
 create mode 100644 arch/x86/cpu/intel_common/Kconfig
 create mode 100644 arch/x86/cpu/intel_common/car2.S
 create mode 100644 arch/x86/cpu/intel_common/car2_uninit.S

Comments

Bin Meng Oct. 10, 2019, 9:50 a.m. UTC | #1
Hi Simon,

On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
>
> Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> Add support for these along with suitable configuration options.
>

I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?

> Signed-off-by: Simon Glass <sjg@chromium.org>
> ---
>
>  arch/x86/Kconfig                        |  16 +
>  arch/x86/cpu/intel_common/Kconfig       |  18 +
>  arch/x86/cpu/intel_common/Makefile      |   8 +
>  arch/x86/cpu/intel_common/car2.S        | 490 ++++++++++++++++++++++++
>  arch/x86/cpu/intel_common/car2_uninit.S |  87 +++++
>  5 files changed, 619 insertions(+)
>  create mode 100644 arch/x86/cpu/intel_common/Kconfig
>  create mode 100644 arch/x86/cpu/intel_common/car2.S
>  create mode 100644 arch/x86/cpu/intel_common/car2_uninit.S
>
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 556e26080de..e34c71ec4cb 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -876,4 +876,20 @@ config HIGH_TABLE_SIZE
>           Increse it if the default size does not fit the board's needs.
>           This is most likely due to a large ACPI DSDT table is used.
>
> +config INTEL_CAR_CQOS
> +       bool "Support Intel Cache Quality of Service"
> +       help
> +         Cache Quality of Service allows more fine-grained control of cache
> +         usage. As result, it is possible to set up a portion of L2 cache for
> +         CAR and use the remainder for actual caching.
> +
> +#
> +# Each bit in QOS mask controls this many bytes. This is calculated as:
> +# (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS
> +#
> +config CACHE_QOS_SIZE_PER_BIT
> +       hex
> +       depends on INTEL_CAR_CQOS
> +       default 0x20000 # 128 KB
> +
>  endmenu
> diff --git a/arch/x86/cpu/intel_common/Kconfig b/arch/x86/cpu/intel_common/Kconfig
> new file mode 100644
> index 00000000000..a4f46b1108b
> --- /dev/null
> +++ b/arch/x86/cpu/intel_common/Kconfig
> @@ -0,0 +1,18 @@
> +config INTEL_PMC
> +       bool "Intel Power-management Controller"
> +       select POWER_MGR
> +       help
> +         Enable support for the common Intel power-management controller which
> +         provides features including checking whether the system started from
> +         resume, powering off the system and enabling/disabling the reset
> +         mechanism.
> +
> +config SPL_INTEL_PMC
> +       bool "Intel Power-management Controller in SPL"
> +       default y if SPL && INTEL_PMC
> +       select SPL_POWER_MGR
> +       help
> +         Enable support for the common Intel power-management controller which
> +         provides features including checking whether the system started from
> +         resume, powering off the system and enabling/disabling the reset
> +         mechanism.

I think the above 2 should not be in this patch

> diff --git a/arch/x86/cpu/intel_common/Makefile b/arch/x86/cpu/intel_common/Makefile
> index 2de567dd9fe..f620747a7d2 100644
> --- a/arch/x86/cpu/intel_common/Makefile
> +++ b/arch/x86/cpu/intel_common/Makefile
> @@ -8,6 +8,14 @@ obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += me_status.o
>  obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += report_platform.o
>  obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += mrc.o
>  endif
> +
> +ifdef CONFIG_FSP_VERSION2
> +obj-$(CONFIG_TPL_BUILD) += car2.o
> +ifndef CONFIG_SPL_BUILD
> +obj-y += car2_uninit.o
> +endif
> +endif
> +
>  obj-y += cpu.o
>  obj-$(CONFIG_SPI_FLASH_INTEL_FAST) += fast_spi.o
>  obj-y += lpc.o
> diff --git a/arch/x86/cpu/intel_common/car2.S b/arch/x86/cpu/intel_common/car2.S
> new file mode 100644
> index 00000000000..ac07fe5ea6a
> --- /dev/null
> +++ b/arch/x86/cpu/intel_common/car2.S
> @@ -0,0 +1,490 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/*
> + * This file is part of the coreboot project.
> + *
> + * Copyright (C) 2015-2016 Intel Corp.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; version 2 of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *

nits: there is already SPDX license

> + */
> +
> +#include <config.h>
> +#include <asm/msr-index.h>
> +#include <asm/mtrr.h>
> +#include <asm/post.h>
> +#include <asm/processor-flags.h>
> +
> +#define KiB 1024
> +
> +.global car_init
> +car_init:
> +       post_code(0x20)
> +
> +       /*
> +        * Use the MTRR default type MSR as a proxy for detecting INIT#.
> +        * Reset the system if any known bits are set in that MSR. That is
> +        * an indication of the CPU not being properly reset.
> +        */
> +check_for_clean_reset:
> +       mov     $MTRR_DEF_TYPE_MSR, %ecx
> +       rdmsr
> +       and     $(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax
> +       cmp     $0, %eax
> +       jz      no_reset
> +       /* perform warm reset */
> +       movw    $0xcf9, %dx
> +       movb    $0x06, %al
> +       outb    %al, %dx
> +
> +no_reset:
> +       post_code(0x21)

Can we use values from post.h?

> +
> +       /* Clear/disable fixed MTRRs */
> +       mov     $fixed_mtrr_list_size, %ebx
> +       xor     %eax, %eax
> +       xor     %edx, %edx
> +
> +clear_fixed_mtrr:
> +       add     $-2, %ebx
> +       movzwl  fixed_mtrr_list(%ebx), %ecx
> +       wrmsr
> +       jnz     clear_fixed_mtrr
> +
> +       post_code(0x22)
> +
> +       /* Figure put how many MTRRs we have, and clear them out */
> +       mov     $MTRR_CAP_MSR, %ecx
> +       rdmsr
> +       movzb   %al, %ebx               /* Number of variable MTRRs */
> +       mov     $MTRR_PHYS_BASE_MSR(0), %ecx
> +       xor     %eax, %eax
> +       xor     %edx, %edx
> +
> +clear_var_mtrr:
> +       wrmsr
> +       inc     %ecx
> +       wrmsr
> +       inc     %ecx
> +       dec     %ebx
> +       jnz     clear_var_mtrr
> +
> +       post_code(0x23)
> +
> +       /* Configure default memory type to uncacheable (UC) */
> +       mov     $MTRR_DEF_TYPE_MSR, %ecx
> +       rdmsr
> +       /* Clear enable bits and set default type to UC. */
> +       and     $~(MTRR_DEF_TYPE_MASK | MTRR_DEF_TYPE_EN | \
> +                MTRR_DEF_TYPE_FIX_EN), %eax
> +       wrmsr
> +
> +       /* Configure MTRR_PHYS_MASK_HIGH for proper addressing above 4GB

nits: wrong multi-line comment format

> +        * based on the physical address size supported for this processor
> +        * This is based on read from CPUID EAX = 080000008h, EAX bits [7:0]
> +        *
> +        * Examples:
> +        *  MTRR_PHYS_MASK_HIGH = 00000000Fh  For 36 bit addressing
> +        *  MTRR_PHYS_MASK_HIGH = 0000000FFh  For 40 bit addressing
> +        */
> +
> +       movl    $0x80000008, %eax       /* Address sizes leaf */
> +       cpuid
> +       sub     $32, %al
> +       movzx   %al, %eax
> +       xorl    %esi, %esi
> +       bts     %eax, %esi
> +       dec     %esi                    /* esi <- MTRR_PHYS_MASK_HIGH */
> +
> +       post_code(0x24)
> +
> +#if ((CONFIG_DCACHE_RAM_SIZE & (CONFIG_DCACHE_RAM_SIZE - 1)) == 0)
> +       /* Configure CAR region as write-back (WB) */
> +       mov     $MTRR_PHYS_BASE_MSR(0), %ecx
> +       mov     $CONFIG_DCACHE_RAM_BASE, %eax
> +       or      $MTRR_TYPE_WRBACK, %eax
> +       xor     %edx,%edx
> +       wrmsr
> +
> +       /* Configure the MTRR mask for the size region */
> +       mov     $MTRR_PHYS_MASK(0), %ecx
> +       mov     $CONFIG_DCACHE_RAM_SIZE, %eax   /* size mask */
> +       dec     %eax
> +       not     %eax
> +       or      $MTRR_PHYS_MASK_VALID, %eax
> +       movl    %esi, %edx      /* edx <- MTRR_PHYS_MASK_HIGH */
> +       wrmsr
> +#elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */
> +       /* Configure CAR region as write-back (WB) */
> +       mov     $MTRR_PHYS_BASE_MSR(0), %ecx
> +       mov     $CONFIG_DCACHE_RAM_BASE, %eax
> +       or      $MTRR_TYPE_WRBACK, %eax
> +       xor     %edx,%edx
> +       wrmsr
> +
> +       mov     $MTRR_PHYS_MASK_MSR(0), %ecx
> +       mov     $(512 * KiB), %eax      /* size mask */
> +       dec     %eax
> +       not     %eax
> +       or      $MTRR_PHYS_MASK_VALID, %eax
> +       movl    %esi, %edx      /* edx <- MTRR_PHYS_MASK_HIGH */
> +       wrmsr
> +
> +       mov     $MTRR_PHYS_BASE_MSR(1), %ecx
> +       mov     $(CONFIG_DCACHE_RAM_BASE + 512 * KiB), %eax
> +       or      $MTRR_TYPE_WRBACK, %eax
> +       xor     %edx,%edx
> +       wrmsr
> +
> +       mov     $MTRR_PHYS_MASK_MSR(1), %ecx
> +       mov     $(256 * KiB), %eax      /* size mask */
> +       dec     %eax
> +       not     %eax
> +       or      $MTRR_PHYS_MASK_VALID, %eax
> +       movl    %esi, %edx      /* edx <- MTRR_PHYS_MASK_HIGH */
> +       wrmsr
> +#else
> +#error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing"
> +#endif
> +       post_code(0x25)
> +
> +       /* start */
> +/*     mov     $0xffff80a8, %ebx */
> +/*     jmp     *%ebx */
> +.globl _from_bb
> +_from_bb:
> +/*     jmp     car_init_ret */
> +       /* end */
> +
> +       /* Enable variable MTRRs */
> +       mov     $MTRR_DEF_TYPE_MSR, %ecx
> +       rdmsr
> +       or      $MTRR_DEF_TYPE_EN, %eax
> +       wrmsr
> +
> +       /* Enable caching */
> +       mov     %cr0, %eax
> +       and     $~(X86_CR0_CD | X86_CR0_NW), %eax
> +       invd
> +       mov     %eax, %cr0
> +
> +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM)
> +       jmp car_nem
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
> +       jmp car_cqos
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
> +       jmp car_nem_enhanced
> +#else
> +#error "No CAR mechanism selected:
> +#endif
> +       jmp     car_init_ret
> +
> +#if 0
> +.global car_init_done
> +car_init_done:
> +
> +       post_code(0x29)
> +
> +       /* Setup bootblock stack */
> +       mov     $_car_stack_end, %esp
> +
> +       /* Need to align stack to 16 bytes at call instruction. Account for
> +          the two pushes below. */
> +       andl    $0xfffffff0, %esp
> +       sub     $8, %esp
> +
> +       /*push TSC value to stack*/
> +       movd    %mm2, %eax
> +       pushl   %eax    /* tsc[63:32] */
> +       movd    %mm1, %eax
> +       pushl   %eax    /* tsc[31:0] */
> +
> +before_carstage:
> +       post_code(0x2A)
> +
> +       call    bootblock_c_entry

where is this function?

> +       /* Never reached */
> +#endif
> +
> +fixed_mtrr_list:
> +       .word   MTRR_FIX_64K_00000_MSR
> +       .word   MTRR_FIX_16K_80000_MSR
> +       .word   MTRR_FIX_16K_A0000_MSR
> +       .word   MTRR_FIX_4K_C0000_MSR
> +       .word   MTRR_FIX_4K_C8000_MSR
> +       .word   MTRR_FIX_4K_D0000_MSR
> +       .word   MTRR_FIX_4K_D8000_MSR
> +       .word   MTRR_FIX_4K_E0000_MSR
> +       .word   MTRR_FIX_4K_E8000_MSR
> +       .word   MTRR_FIX_4K_F0000_MSR
> +       .word   MTRR_FIX_4K_F8000_MSR
> +fixed_mtrr_list_size = . - fixed_mtrr_list
> +
> +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM)
> +.global car_nem
> +car_nem:
> +       /* Disable cache eviction (setup stage) */
> +       mov     $MSR_EVICT_CTL, %ecx
> +       rdmsr
> +       or      $0x1, %eax
> +       wrmsr
> +
> +       post_code(0x26)
> +
> +       /* Clear the cache memory region. This will also fill up the cache */
> +       movl    $CONFIG_DCACHE_RAM_BASE, %edi
> +       movl    $CONFIG_DCACHE_RAM_SIZE, %ecx
> +       shr     $0x02, %ecx
> +       xor     %eax, %eax
> +       cld
> +       rep     stosl
> +
> +       post_code(0x27)
> +
> +       /* Disable cache eviction (run stage) */
> +       mov     $MSR_EVICT_CTL, %ecx
> +       rdmsr
> +       or      $0x2, %eax
> +       wrmsr
> +
> +       post_code(0x28)
> +
> +       jmp car_init_done
> +
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
> +.global car_cqos
> +car_cqos:
> +       /*
> +        * Create CBM_LEN_MASK based on CBM_LEN
> +        * Get CPUID.(EAX=10H, ECX=2H):EAX.CBM_LEN[bits 4:0]
> +        */
> +       mov $0x10, %eax
> +       mov $0x2,  %ecx
> +       cpuid
> +       and $0x1F, %eax
> +       add $1, %al
> +
> +       mov $1, %ebx
> +       mov %al, %cl
> +       shl %cl, %ebx
> +       sub $1, %ebx
> +
> +       /* Store the CBM_LEN_MASK in mm3 for later use. */
> +       movd %ebx, %mm3
> +
> +       /*
> +        * Disable both L1 and L2 prefetcher. For yet-to-understood reason,
> +        * prefetchers slow down filling cache with rep stos in CQOS mode.
> +        */
> +       mov     $MSR_PREFETCH_CTL, %ecx
> +       rdmsr
> +       or      $(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
> +       wrmsr
> +
> +#if (CONFIG_DCACHE_RAM_SIZE == CONFIG_L2_CACHE_SIZE)
> +/*
> + * If CAR size is set to full L2 size, mask is calculated as all-zeros.
> + * This is not supported by the CPU/uCode.
> + */
> +#error "CQOS CAR may not use whole L2 cache area"
> +#endif
> +
> +       /* Calculate how many bits to be used for CAR */
> +       xor     %edx, %edx
> +       mov     $CONFIG_DCACHE_RAM_SIZE, %eax   /* dividend */
> +       mov     $CONFIG_CACHE_QOS_SIZE_PER_BIT, %ecx    /* divisor */
> +       div     %ecx            /* result is in eax */
> +       mov     %eax, %ecx      /* save to ecx */
> +       mov     $1, %ebx
> +       shl     %cl, %ebx
> +       sub     $1, %ebx        /* resulting mask is is in ebx */
> +
> +       /* Set this mask for initial cache fill */
> +       mov     $MSR_L2_QOS_MASK(0), %ecx
> +       rdmsr
> +       mov     %ebx, %eax
> +       wrmsr
> +
> +       /* Set CLOS selector to 0 */
> +       mov     $MSR_IA32_PQR_ASSOC, %ecx
> +       rdmsr
> +       and     $~MSR_IA32_PQR_ASSOC_MASK, %edx /* select mask 0 */
> +       wrmsr
> +
> +       /* We will need to block CAR region from evicts */
> +       mov     $MSR_L2_QOS_MASK(1), %ecx
> +       rdmsr
> +       /* Invert bits that are to be used for cache */
> +       mov     %ebx, %eax
> +       xor     $~0, %eax                       /* invert 32 bits */
> +
> +       /*
> +        * Use CBM_LEN_MASK stored in mm3 to set bits based on Capacity Bit
> +        * Mask Length.
> +        */
> +       movd    %mm3, %ebx
> +       and     %ebx, %eax
> +       wrmsr
> +
> +       post_code(0x26)
> +
> +       /* Clear the cache memory region. This will also fill up the cache */
> +       movl    $CONFIG_DCACHE_RAM_BASE, %edi
> +       movl    $CONFIG_DCACHE_RAM_SIZE, %ecx
> +       shr     $0x02, %ecx
> +       xor     %eax, %eax
> +       cld
> +       rep     stosl
> +
> +       post_code(0x27)
> +
> +       /* Cache is populated. Use mask 1 that will block evicts */
> +       mov     $MSR_IA32_PQR_ASSOC, %ecx
> +       rdmsr
> +       and     $~MSR_IA32_PQR_ASSOC_MASK, %edx /* clear index bits first */
> +       or      $1, %edx                        /* select mask 1 */
> +       wrmsr
> +
> +       /* Enable prefetchers */
> +       mov     $MSR_PREFETCH_CTL, %ecx
> +       rdmsr
> +       and     $~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
> +       wrmsr
> +
> +       post_code(0x28)
> +
> +/*     jmp car_init_done */
> +       jmp     car_init_ret
> +
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
> +.global car_nem_enhanced
> +car_nem_enhanced:
> +       /* Disable cache eviction (setup stage) */
> +       mov     $MSR_EVICT_CTL, %ecx
> +       rdmsr
> +       or      $0x1, %eax
> +       wrmsr
> +       post_code(0x26)
> +
> +       /* Create n-way set associativity of cache */
> +       xorl    %edi, %edi
> +find_llc_subleaf:
> +       movl    %edi, %ecx
> +       movl    $0x04, %eax
> +       cpuid
> +       inc     %edi
> +       and     $0xe0, %al      /* EAX[7:5] = Cache Level */
> +       cmp     $0x60, %al      /* Check to see if it is LLC */
> +       jnz     find_llc_subleaf
> +
> +       /*
> +        * Set MSR 0xC91 IA32_L3_MASK_! = 0xE/0xFE/0xFFE/0xFFFE
> +        * for 4/8/16 way of LLC
> +       */
> +       shr     $22, %ebx
> +       inc     %ebx
> +       /* Calculate n-way associativity of LLC */
> +       mov     %bl, %cl
> +
> +       /*
> +        * Maximizing RO cacheability while locking in the CAR to a
> +        * single way since that particular way won't be victim candidate
> +        * for evictions.
> +        * This has been done after programing LLC_WAY_MASK_1 MSR
> +        * with desired LLC way as mentioned below.
> +        *
> +        * Hence create Code and Data Size as per request
> +        * Code Size (RO) : Up to 16M
> +        * Data Size (RW) : Up to 256K
> +        */
> +       movl    $0x01, %eax
> +       /*
> +        * LLC Ways -> LLC_WAY_MASK_1:
> +        *  4: 0x000E
> +        *  8: 0x00FE
> +        * 12: 0x0FFE
> +        * 16: 0xFFFE
> +        *
> +        * These MSRs contain one bit per each way of LLC
> +        * - If this bit is '0' - the way is protected from eviction
> +        * - If this bit is '1' - the way is not protected from eviction
> +        */
> +       shl     %cl, %eax
> +       subl    $0x02, %eax
> +       movl    $MSR_IA32_L3_MASK_1, %ecx
> +       xorl    %edx, %edx
> +       wrmsr
> +       /*
> +        * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1
> +        *
> +        * For SKL SOC, data size remains 256K consistently.
> +        * Hence, creating 1-way associative cache for Data
> +       */
> +       mov     $MSR_IA32_L3_MASK_2, %ecx
> +       mov     $0x01, %eax
> +       xorl    %edx, %edx
> +       wrmsr
> +       /*
> +        * Set MSR_IA32_PQR_ASSOC = 0x02
> +        *
> +        * Possible values:
> +        * 0: Default value, no way mask should be applied
> +        * 1: Apply way mask 1 to LLC
> +        * 2: Apply way mask 2 to LLC
> +        * 3: Shouldn't be use in NEM Mode
> +        */
> +       movl    $MSR_IA32_PQR_ASSOC, %ecx
> +       movl    $0x02, %eax
> +       xorl    %edx, %edx
> +       wrmsr
> +
> +       movl    $CONFIG_DCACHE_RAM_BASE, %edi
> +       movl    $CONFIG_DCACHE_RAM_SIZE, %ecx
> +       shr     $0x02, %ecx
> +       xor     %eax, %eax
> +       cld
> +       rep     stosl
> +       /*
> +        * Set MSR_IA32_PQR_ASSOC = 0x01
> +        * At this stage we apply LLC_WAY_MASK_1 to the cache.
> +        * i.e. way 0 is protected from eviction.
> +       */
> +       movl    $MSR_IA32_PQR_ASSOC, %ecx
> +       movl    $0x01, %eax
> +       xorl    %edx, %edx
> +       wrmsr
> +
> +       post_code(0x27)
> +       /*
> +        * Enable No-Eviction Mode Run State by setting
> +        * NO_EVICT_MODE MSR 2E0h bit [1] = '1'.
> +        */
> +
> +       movl    $MSR_EVICT_CTL, %ecx
> +       rdmsr
> +       orl     $0x02, %eax
> +       wrmsr
> +
> +       post_code(0x28)
> +
> +       jmp car_init_done
> +#endif
> +
> +#if CONFIG_IS_ENABLED(X86_16BIT_INIT)
> +_dt_ucode_base_size:
> +       /* These next two fields are filled in by binman */
> +.globl ucode_base
> +ucode_base:    /* Declared in microcode.h */
> +       .long   0                       /* microcode base */
> +.globl ucode_size
> +ucode_size:    /* Declared in microcode.h */
> +       .long   0                       /* microcode size */
> +       .long   CONFIG_SYS_MONITOR_BASE /* code region base */
> +       .long   CONFIG_SYS_MONITOR_LEN  /* code region size */
> +#endif
> diff --git a/arch/x86/cpu/intel_common/car2_uninit.S b/arch/x86/cpu/intel_common/car2_uninit.S
> new file mode 100644
> index 00000000000..4797ac04279
> --- /dev/null
> +++ b/arch/x86/cpu/intel_common/car2_uninit.S
> @@ -0,0 +1,87 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright 2017 Intel Corp.
> + * Copyright 2019 Google LLC
> + * Taken from coreboot file exit_car.S
> + */
> +
> +#include <config.h>
> +#include <asm/msr-index.h>
> +#include <asm/mtrr.h>
> +
> +.text
> +.global car_uninit
> +car_uninit:
> +
> +       /*
> +        * Retrieve return address from stack as it will get trashed below if
> +        * execution is utilizing the cache-as-ram stack.
> +        */
> +       pop     %ebx
> +
> +       /* Disable MTRRs. */
> +       mov     $(MTRR_DEF_TYPE_MSR), %ecx
> +       rdmsr
> +       and     $(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax
> +       wrmsr
> +
> +#ifdef CONFIG_INTEL_CAR_NEM
> +.global car_nem_teardown
> +car_nem_teardown:
> +
> +       /* invalidate cache contents. */
> +       invd
> +
> +       /* Knock down bit 1 then bit 0 of NEM control not combining steps. */
> +       mov     $(MSR_EVICT_CTL), %ecx
> +       rdmsr
> +       and     $(~(1 << 1)), %eax
> +       wrmsr
> +       and     $(~(1 << 0)), %eax
> +       wrmsr
> +
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
> +.global car_cqos_teardown
> +car_cqos_teardown:
> +
> +       /* Go back to all-evicting mode, set both masks to all-1s */
> +       mov     $MSR_L2_QOS_MASK(0), %ecx
> +       rdmsr
> +       mov     $~0, %al
> +       wrmsr
> +
> +       mov     $MSR_L2_QOS_MASK(1), %ecx
> +       rdmsr
> +       mov     $~0, %al
> +       wrmsr
> +
> +       /* Reset CLOS selector to 0 */
> +       mov     $MSR_IA32_PQR_ASSOC, %ecx
> +       rdmsr
> +       and     $~MSR_IA32_PQR_ASSOC_MASK, %edx
> +       wrmsr
> +
> +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
> +.global car_nem_enhanced_teardown
> +car_nem_enhanced_teardown:
> +
> +       /* invalidate cache contents. */
> +       invd
> +
> +       /* Knock down bit 1 then bit 0 of NEM control not combining steps. */
> +       mov     $(MSR_EVICT_CTL), %ecx
> +       rdmsr
> +       and     $(~(1 << 1)), %eax
> +       wrmsr
> +       and     $(~(1 << 0)), %eax
> +       wrmsr
> +
> +       /* Reset CLOS selector to 0 */
> +       mov     $IA32_PQR_ASSOC, %ecx
> +       rdmsr
> +       and     $~IA32_PQR_ASSOC_MASK, %edx
> +       wrmsr
> +#endif
> +
> +       /* Return to caller. */
> +       jmp     *%ebx
> --

I was not fully convinced we need this in U-Boot if we are using FSP.

Regards,
Bin
Simon Glass Oct. 12, 2019, 3:37 a.m. UTC | #2
Hi Bin,

On Thu, 10 Oct 2019 at 03:50, Bin Meng <bmeng.cn@gmail.com> wrote:
>
> Hi Simon,
>
> On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
> >
> > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> > Add support for these along with suitable configuration options.
> >
>
> I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?

Well actually I have not tried using FSP-T yet on apollolake. I'll see
how it looks.

Regards,
Simon
Bin Meng Oct. 12, 2019, 4:47 a.m. UTC | #3
Hi Simon,

On Sat, Oct 12, 2019 at 11:38 AM Simon Glass <sjg@chromium.org> wrote:
>
> Hi Bin,
>
> On Thu, 10 Oct 2019 at 03:50, Bin Meng <bmeng.cn@gmail.com> wrote:
> >
> > Hi Simon,
> >
> > On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
> > >
> > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> > > Add support for these along with suitable configuration options.
> > >
> >
> > I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?
>
> Well actually I have not tried using FSP-T yet on apollolake. I'll see
> how it looks.

It looks so far only FSP-M is used on your apollolake port.

What I'd like to see is a complete FSP 2.0 support in U-Boot, which
means we need FSP-T for the CAR and FSP-S for the silicon-specific
initialization. With FSP-S, I believe most of your platform support
codes in this patch series are no longer needed.

Regards,
Bin
Simon Glass Oct. 12, 2019, 5:53 p.m. UTC | #4
Hi Bin,

On Fri, 11 Oct 2019 at 22:48, Bin Meng <bmeng.cn@gmail.com> wrote:
>
> Hi Simon,
>
> On Sat, Oct 12, 2019 at 11:38 AM Simon Glass <sjg@chromium.org> wrote:
> >
> > Hi Bin,
> >
> > On Thu, 10 Oct 2019 at 03:50, Bin Meng <bmeng.cn@gmail.com> wrote:
> > >
> > > Hi Simon,
> > >
> > > On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
> > > >
> > > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> > > > Add support for these along with suitable configuration options.
> > > >
> > >
> > > I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?
> >
> > Well actually I have not tried using FSP-T yet on apollolake. I'll see
> > how it looks.
>
> It looks so far only FSP-M is used on your apollolake port.
>
> What I'd like to see is a complete FSP 2.0 support in U-Boot, which
> means we need FSP-T for the CAR and FSP-S for the silicon-specific
> initialization. With FSP-S, I believe most of your platform support
> codes in this patch series are no longer needed.

I have actually got FSP-S running - see u-boot-dm/coral2-working,
along with display, MMC, etc. There is very little init in U-Boot
itself and my feeling is that most of the TPL/SPL init is actually
needed. We cannot run FSP-S until the CAR is turned off, so it has to
run in U-Boot.

I also just got an Up board so can give that a try one day assuming I
have the SPI adaptor. But I think you might have some apollolake
boards too?

Regards,
Simon
Bin Meng Oct. 14, 2019, 1:58 a.m. UTC | #5
Hi Simon,

On Sun, Oct 13, 2019 at 1:53 AM Simon Glass <sjg@chromium.org> wrote:
>
> Hi Bin,
>
> On Fri, 11 Oct 2019 at 22:48, Bin Meng <bmeng.cn@gmail.com> wrote:
> >
> > Hi Simon,
> >
> > On Sat, Oct 12, 2019 at 11:38 AM Simon Glass <sjg@chromium.org> wrote:
> > >
> > > Hi Bin,
> > >
> > > On Thu, 10 Oct 2019 at 03:50, Bin Meng <bmeng.cn@gmail.com> wrote:
> > > >
> > > > Hi Simon,
> > > >
> > > > On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
> > > > >
> > > > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> > > > > Add support for these along with suitable configuration options.
> > > > >
> > > >
> > > > I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?
> > >
> > > Well actually I have not tried using FSP-T yet on apollolake. I'll see
> > > how it looks.
> >
> > It looks so far only FSP-M is used on your apollolake port.
> >
> > What I'd like to see is a complete FSP 2.0 support in U-Boot, which
> > means we need FSP-T for the CAR and FSP-S for the silicon-specific
> > initialization. With FSP-S, I believe most of your platform support
> > codes in this patch series are no longer needed.
>
> I have actually got FSP-S running - see u-boot-dm/coral2-working,
> along with display, MMC, etc. There is very little init in U-Boot
> itself and my feeling is that most of the TPL/SPL init is actually
> needed. We cannot run FSP-S until the CAR is turned off, so it has to
> run in U-Boot.

OK, so why do we want to introduce TPL on ApolloLake? I think there is
no size limitation that you can just run U-Boot directly on
ApolloLake, no?

>
> I also just got an Up board so can give that a try one day assuming I
> have the SPI adaptor. But I think you might have some apollolake
> boards too?

Yes, I have one of the ApolloLake CRB from Intel and planned to try at
some point but some other stuff has been occupying my time :)

Regards,
Bin
Simon Glass Oct. 14, 2019, 8:51 p.m. UTC | #6
Hi Bin,

On Sun, 13 Oct 2019 at 19:58, Bin Meng <bmeng.cn@gmail.com> wrote:
>
> Hi Simon,
>
> On Sun, Oct 13, 2019 at 1:53 AM Simon Glass <sjg@chromium.org> wrote:
> >
> > Hi Bin,
> >
> > On Fri, 11 Oct 2019 at 22:48, Bin Meng <bmeng.cn@gmail.com> wrote:
> > >
> > > Hi Simon,
> > >
> > > On Sat, Oct 12, 2019 at 11:38 AM Simon Glass <sjg@chromium.org> wrote:
> > > >
> > > > Hi Bin,
> > > >
> > > > On Thu, 10 Oct 2019 at 03:50, Bin Meng <bmeng.cn@gmail.com> wrote:
> > > > >
> > > > > Hi Simon,
> > > > >
> > > > > On Wed, Sep 25, 2019 at 10:59 PM Simon Glass <sjg@chromium.org> wrote:
> > > > > >
> > > > > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR).
> > > > > > Add support for these along with suitable configuration options.
> > > > > >
> > > > >
> > > > > I wonder why do we need do this in U-Boot. Isn't FSP-T doing the CAR for us?
> > > >
> > > > Well actually I have not tried using FSP-T yet on apollolake. I'll see
> > > > how it looks.
> > >
> > > It looks so far only FSP-M is used on your apollolake port.
> > >
> > > What I'd like to see is a complete FSP 2.0 support in U-Boot, which
> > > means we need FSP-T for the CAR and FSP-S for the silicon-specific
> > > initialization. With FSP-S, I believe most of your platform support
> > > codes in this patch series are no longer needed.
> >
> > I have actually got FSP-S running - see u-boot-dm/coral2-working,
> > along with display, MMC, etc. There is very little init in U-Boot
> > itself and my feeling is that most of the TPL/SPL init is actually
> > needed. We cannot run FSP-S until the CAR is turned off, so it has to
> > run in U-Boot.
>
> OK, so why do we want to introduce TPL on ApolloLake? I think there is
> no size limitation that you can just run U-Boot directly on
> ApolloLake, no?

More details here:

https://gitlab.denx.de/u-boot/custodians/u-boot-dm/blob/coral2-working/doc/board/google/chromebook_coral.rst

In short, APL only supports booting a 30KB image to start. Then we
need to load something else that can set up DRAM, which is SPL. After
that we can load U-Boot. So we have to have TPL on APL (and perhaps
later) CPUs.

>
> >
> > I also just got an Up board so can give that a try one day assuming I
> > have the SPI adaptor. But I think you might have some apollolake
> > boards too?
>
> Yes, I have one of the ApolloLake CRB from Intel and planned to try at
> some point but some other stuff has been occupying my time :)

Sounds good! I am not sure I will be able to get FSP-T going so may
ask for help on that.

- Simon
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 556e26080de..e34c71ec4cb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -876,4 +876,20 @@  config HIGH_TABLE_SIZE
 	  Increse it if the default size does not fit the board's needs.
 	  This is most likely due to a large ACPI DSDT table is used.
 
+config INTEL_CAR_CQOS
+	bool "Support Intel Cache Quality of Service"
+	help
+	  Cache Quality of Service allows more fine-grained control of cache
+	  usage. As result, it is possible to set up a portion of L2 cache for
+	  CAR and use the remainder for actual caching.
+
+#
+# Each bit in QOS mask controls this many bytes. This is calculated as:
+# (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS
+#
+config CACHE_QOS_SIZE_PER_BIT
+	hex
+	depends on INTEL_CAR_CQOS
+	default 0x20000 # 128 KB
+
 endmenu
diff --git a/arch/x86/cpu/intel_common/Kconfig b/arch/x86/cpu/intel_common/Kconfig
new file mode 100644
index 00000000000..a4f46b1108b
--- /dev/null
+++ b/arch/x86/cpu/intel_common/Kconfig
@@ -0,0 +1,18 @@ 
+config INTEL_PMC
+	bool "Intel Power-management Controller"
+	select POWER_MGR
+	help
+	  Enable support for the common Intel power-management controller which
+	  provides features including checking whether the system started from
+	  resume, powering off the system and enabling/disabling the reset
+	  mechanism.
+
+config SPL_INTEL_PMC
+	bool "Intel Power-management Controller in SPL"
+	default y if SPL && INTEL_PMC
+	select SPL_POWER_MGR
+	help
+	  Enable support for the common Intel power-management controller which
+	  provides features including checking whether the system started from
+	  resume, powering off the system and enabling/disabling the reset
+	  mechanism.
diff --git a/arch/x86/cpu/intel_common/Makefile b/arch/x86/cpu/intel_common/Makefile
index 2de567dd9fe..f620747a7d2 100644
--- a/arch/x86/cpu/intel_common/Makefile
+++ b/arch/x86/cpu/intel_common/Makefile
@@ -8,6 +8,14 @@  obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += me_status.o
 obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += report_platform.o
 obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += mrc.o
 endif
+
+ifdef CONFIG_FSP_VERSION2
+obj-$(CONFIG_TPL_BUILD) += car2.o
+ifndef CONFIG_SPL_BUILD
+obj-y += car2_uninit.o
+endif
+endif
+
 obj-y += cpu.o
 obj-$(CONFIG_SPI_FLASH_INTEL_FAST) += fast_spi.o
 obj-y += lpc.o
diff --git a/arch/x86/cpu/intel_common/car2.S b/arch/x86/cpu/intel_common/car2.S
new file mode 100644
index 00000000000..ac07fe5ea6a
--- /dev/null
+++ b/arch/x86/cpu/intel_common/car2.S
@@ -0,0 +1,490 @@ 
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * This file is part of the coreboot project.
+ *
+ * Copyright (C) 2015-2016 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <config.h>
+#include <asm/msr-index.h>
+#include <asm/mtrr.h>
+#include <asm/post.h>
+#include <asm/processor-flags.h>
+
+#define KiB 1024
+
+.global car_init
+car_init:
+	post_code(0x20)
+
+	/*
+	 * Use the MTRR default type MSR as a proxy for detecting INIT#.
+	 * Reset the system if any known bits are set in that MSR. That is
+	 * an indication of the CPU not being properly reset.
+	 */
+check_for_clean_reset:
+	mov	$MTRR_DEF_TYPE_MSR, %ecx
+	rdmsr
+	and	$(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax
+	cmp	$0, %eax
+	jz	no_reset
+	/* perform warm reset */
+	movw	$0xcf9, %dx
+	movb	$0x06, %al
+	outb	%al, %dx
+
+no_reset:
+	post_code(0x21)
+
+	/* Clear/disable fixed MTRRs */
+	mov	$fixed_mtrr_list_size, %ebx
+	xor	%eax, %eax
+	xor	%edx, %edx
+
+clear_fixed_mtrr:
+	add	$-2, %ebx
+	movzwl	fixed_mtrr_list(%ebx), %ecx
+	wrmsr
+	jnz	clear_fixed_mtrr
+
+	post_code(0x22)
+
+	/* Figure put how many MTRRs we have, and clear them out */
+	mov	$MTRR_CAP_MSR, %ecx
+	rdmsr
+	movzb	%al, %ebx		/* Number of variable MTRRs */
+	mov	$MTRR_PHYS_BASE_MSR(0), %ecx
+	xor	%eax, %eax
+	xor	%edx, %edx
+
+clear_var_mtrr:
+	wrmsr
+	inc	%ecx
+	wrmsr
+	inc	%ecx
+	dec	%ebx
+	jnz	clear_var_mtrr
+
+	post_code(0x23)
+
+	/* Configure default memory type to uncacheable (UC) */
+	mov	$MTRR_DEF_TYPE_MSR, %ecx
+	rdmsr
+	/* Clear enable bits and set default type to UC. */
+	and	$~(MTRR_DEF_TYPE_MASK | MTRR_DEF_TYPE_EN | \
+		 MTRR_DEF_TYPE_FIX_EN), %eax
+	wrmsr
+
+	/* Configure MTRR_PHYS_MASK_HIGH for proper addressing above 4GB
+	 * based on the physical address size supported for this processor
+	 * This is based on read from CPUID EAX = 080000008h, EAX bits [7:0]
+	 *
+	 * Examples:
+	 *  MTRR_PHYS_MASK_HIGH = 00000000Fh  For 36 bit addressing
+	 *  MTRR_PHYS_MASK_HIGH = 0000000FFh  For 40 bit addressing
+	 */
+
+	movl	$0x80000008, %eax 	/* Address sizes leaf */
+	cpuid
+	sub	$32, %al
+	movzx	%al, %eax
+	xorl	%esi, %esi
+	bts	%eax, %esi
+	dec	%esi			/* esi <- MTRR_PHYS_MASK_HIGH */
+
+	post_code(0x24)
+
+#if ((CONFIG_DCACHE_RAM_SIZE & (CONFIG_DCACHE_RAM_SIZE - 1)) == 0)
+	/* Configure CAR region as write-back (WB) */
+	mov	$MTRR_PHYS_BASE_MSR(0), %ecx
+	mov	$CONFIG_DCACHE_RAM_BASE, %eax
+	or	$MTRR_TYPE_WRBACK, %eax
+	xor	%edx,%edx
+	wrmsr
+
+	/* Configure the MTRR mask for the size region */
+	mov	$MTRR_PHYS_MASK(0), %ecx
+	mov	$CONFIG_DCACHE_RAM_SIZE, %eax	/* size mask */
+	dec	%eax
+	not	%eax
+	or	$MTRR_PHYS_MASK_VALID, %eax
+	movl	%esi, %edx	/* edx <- MTRR_PHYS_MASK_HIGH */
+	wrmsr
+#elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */
+	/* Configure CAR region as write-back (WB) */
+	mov	$MTRR_PHYS_BASE_MSR(0), %ecx
+	mov	$CONFIG_DCACHE_RAM_BASE, %eax
+	or	$MTRR_TYPE_WRBACK, %eax
+	xor	%edx,%edx
+	wrmsr
+
+	mov	$MTRR_PHYS_MASK_MSR(0), %ecx
+	mov	$(512 * KiB), %eax	/* size mask */
+	dec	%eax
+	not	%eax
+	or	$MTRR_PHYS_MASK_VALID, %eax
+	movl	%esi, %edx	/* edx <- MTRR_PHYS_MASK_HIGH */
+	wrmsr
+
+	mov	$MTRR_PHYS_BASE_MSR(1), %ecx
+	mov	$(CONFIG_DCACHE_RAM_BASE + 512 * KiB), %eax
+	or	$MTRR_TYPE_WRBACK, %eax
+	xor	%edx,%edx
+	wrmsr
+
+	mov	$MTRR_PHYS_MASK_MSR(1), %ecx
+	mov	$(256 * KiB), %eax	/* size mask */
+	dec	%eax
+	not	%eax
+	or	$MTRR_PHYS_MASK_VALID, %eax
+	movl	%esi, %edx	/* edx <- MTRR_PHYS_MASK_HIGH */
+	wrmsr
+#else
+#error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing"
+#endif
+	post_code(0x25)
+
+	/* start */
+/* 	mov	$0xffff80a8, %ebx */
+/* 	jmp	*%ebx */
+.globl _from_bb
+_from_bb:
+/* 	jmp	car_init_ret */
+	/* end */
+
+	/* Enable variable MTRRs */
+	mov	$MTRR_DEF_TYPE_MSR, %ecx
+	rdmsr
+	or	$MTRR_DEF_TYPE_EN, %eax
+	wrmsr
+
+	/* Enable caching */
+	mov	%cr0, %eax
+	and	$~(X86_CR0_CD | X86_CR0_NW), %eax
+	invd
+	mov	%eax, %cr0
+
+#if IS_ENABLED(CONFIG_INTEL_CAR_NEM)
+	jmp car_nem
+#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
+	jmp car_cqos
+#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
+	jmp car_nem_enhanced
+#else
+#error "No CAR mechanism selected:
+#endif
+	jmp	car_init_ret
+
+#if 0
+.global car_init_done
+car_init_done:
+
+	post_code(0x29)
+
+	/* Setup bootblock stack */
+	mov	$_car_stack_end, %esp
+
+	/* Need to align stack to 16 bytes at call instruction. Account for
+	   the two pushes below. */
+	andl	$0xfffffff0, %esp
+	sub	$8, %esp
+
+	/*push TSC value to stack*/
+	movd	%mm2, %eax
+	pushl	%eax	/* tsc[63:32] */
+	movd	%mm1, %eax
+	pushl	%eax 	/* tsc[31:0] */
+
+before_carstage:
+	post_code(0x2A)
+
+	call	bootblock_c_entry
+	/* Never reached */
+#endif
+
+fixed_mtrr_list:
+	.word	MTRR_FIX_64K_00000_MSR
+	.word	MTRR_FIX_16K_80000_MSR
+	.word	MTRR_FIX_16K_A0000_MSR
+	.word	MTRR_FIX_4K_C0000_MSR
+	.word	MTRR_FIX_4K_C8000_MSR
+	.word	MTRR_FIX_4K_D0000_MSR
+	.word	MTRR_FIX_4K_D8000_MSR
+	.word	MTRR_FIX_4K_E0000_MSR
+	.word	MTRR_FIX_4K_E8000_MSR
+	.word	MTRR_FIX_4K_F0000_MSR
+	.word	MTRR_FIX_4K_F8000_MSR
+fixed_mtrr_list_size = . - fixed_mtrr_list
+
+#if IS_ENABLED(CONFIG_INTEL_CAR_NEM)
+.global car_nem
+car_nem:
+	/* Disable cache eviction (setup stage) */
+	mov	$MSR_EVICT_CTL, %ecx
+	rdmsr
+	or	$0x1, %eax
+	wrmsr
+
+	post_code(0x26)
+
+	/* Clear the cache memory region. This will also fill up the cache */
+	movl	$CONFIG_DCACHE_RAM_BASE, %edi
+	movl	$CONFIG_DCACHE_RAM_SIZE, %ecx
+	shr	$0x02, %ecx
+	xor	%eax, %eax
+	cld
+	rep	stosl
+
+	post_code(0x27)
+
+	/* Disable cache eviction (run stage) */
+	mov	$MSR_EVICT_CTL, %ecx
+	rdmsr
+	or	$0x2, %eax
+	wrmsr
+
+	post_code(0x28)
+
+	jmp car_init_done
+
+#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
+.global car_cqos
+car_cqos:
+	/*
+	 * Create CBM_LEN_MASK based on CBM_LEN
+	 * Get CPUID.(EAX=10H, ECX=2H):EAX.CBM_LEN[bits 4:0]
+	 */
+	mov $0x10, %eax
+	mov $0x2,  %ecx
+	cpuid
+	and $0x1F, %eax
+	add $1, %al
+
+	mov $1, %ebx
+	mov %al, %cl
+	shl %cl, %ebx
+	sub $1, %ebx
+
+	/* Store the CBM_LEN_MASK in mm3 for later use. */
+	movd %ebx, %mm3
+
+	/*
+	 * Disable both L1 and L2 prefetcher. For yet-to-understood reason,
+	 * prefetchers slow down filling cache with rep stos in CQOS mode.
+	 */
+	mov	$MSR_PREFETCH_CTL, %ecx
+	rdmsr
+	or	$(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
+	wrmsr
+
+#if (CONFIG_DCACHE_RAM_SIZE == CONFIG_L2_CACHE_SIZE)
+/*
+ * If CAR size is set to full L2 size, mask is calculated as all-zeros.
+ * This is not supported by the CPU/uCode.
+ */
+#error "CQOS CAR may not use whole L2 cache area"
+#endif
+
+	/* Calculate how many bits to be used for CAR */
+	xor	%edx, %edx
+	mov	$CONFIG_DCACHE_RAM_SIZE, %eax	/* dividend */
+	mov	$CONFIG_CACHE_QOS_SIZE_PER_BIT, %ecx	/* divisor */
+	div	%ecx		/* result is in eax */
+	mov	%eax, %ecx	/* save to ecx */
+	mov	$1, %ebx
+	shl	%cl, %ebx
+	sub	$1, %ebx	/* resulting mask is is in ebx */
+
+	/* Set this mask for initial cache fill */
+	mov	$MSR_L2_QOS_MASK(0), %ecx
+	rdmsr
+	mov	%ebx, %eax
+	wrmsr
+
+	/* Set CLOS selector to 0 */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~MSR_IA32_PQR_ASSOC_MASK, %edx	/* select mask 0 */
+	wrmsr
+
+	/* We will need to block CAR region from evicts */
+	mov	$MSR_L2_QOS_MASK(1), %ecx
+	rdmsr
+	/* Invert bits that are to be used for cache */
+	mov	%ebx, %eax
+	xor	$~0, %eax			/* invert 32 bits */
+
+	/*
+	 * Use CBM_LEN_MASK stored in mm3 to set bits based on Capacity Bit
+	 * Mask Length.
+	 */
+	movd	%mm3, %ebx
+	and	%ebx, %eax
+	wrmsr
+
+	post_code(0x26)
+
+	/* Clear the cache memory region. This will also fill up the cache */
+	movl	$CONFIG_DCACHE_RAM_BASE, %edi
+	movl	$CONFIG_DCACHE_RAM_SIZE, %ecx
+	shr	$0x02, %ecx
+	xor	%eax, %eax
+	cld
+	rep	stosl
+
+	post_code(0x27)
+
+	/* Cache is populated. Use mask 1 that will block evicts */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~MSR_IA32_PQR_ASSOC_MASK, %edx	/* clear index bits first */
+	or	$1, %edx			/* select mask 1 */
+	wrmsr
+
+	/* Enable prefetchers */
+	mov	$MSR_PREFETCH_CTL, %ecx
+	rdmsr
+	and	$~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax
+	wrmsr
+
+	post_code(0x28)
+
+/* 	jmp car_init_done */
+	jmp	car_init_ret
+
+#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
+.global car_nem_enhanced
+car_nem_enhanced:
+	/* Disable cache eviction (setup stage) */
+	mov	$MSR_EVICT_CTL, %ecx
+	rdmsr
+	or	$0x1, %eax
+	wrmsr
+	post_code(0x26)
+
+	/* Create n-way set associativity of cache */
+	xorl	%edi, %edi
+find_llc_subleaf:
+	movl	%edi, %ecx
+	movl	$0x04, %eax
+	cpuid
+	inc	%edi
+	and	$0xe0, %al	/* EAX[7:5] = Cache Level */
+	cmp	$0x60, %al	/* Check to see if it is LLC */
+	jnz	find_llc_subleaf
+
+	/*
+	 * Set MSR 0xC91 IA32_L3_MASK_! = 0xE/0xFE/0xFFE/0xFFFE
+	 * for 4/8/16 way of LLC
+	*/
+	shr	$22, %ebx
+	inc	%ebx
+	/* Calculate n-way associativity of LLC */
+	mov	%bl, %cl
+
+	/*
+	 * Maximizing RO cacheability while locking in the CAR to a
+	 * single way since that particular way won't be victim candidate
+	 * for evictions.
+	 * This has been done after programing LLC_WAY_MASK_1 MSR
+	 * with desired LLC way as mentioned below.
+	 *
+	 * Hence create Code and Data Size as per request
+	 * Code Size (RO) : Up to 16M
+	 * Data Size (RW) : Up to 256K
+	 */
+	movl	$0x01, %eax
+	/*
+	 * LLC Ways -> LLC_WAY_MASK_1:
+	 *  4: 0x000E
+	 *  8: 0x00FE
+	 * 12: 0x0FFE
+	 * 16: 0xFFFE
+	 *
+	 * These MSRs contain one bit per each way of LLC
+	 * - If this bit is '0' - the way is protected from eviction
+	 * - If this bit is '1' - the way is not protected from eviction
+	 */
+	shl	%cl, %eax
+	subl	$0x02, %eax
+	movl	$MSR_IA32_L3_MASK_1, %ecx
+	xorl	%edx, %edx
+	wrmsr
+	/*
+	 * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1
+	 *
+	 * For SKL SOC, data size remains 256K consistently.
+	 * Hence, creating 1-way associative cache for Data
+	*/
+	mov	$MSR_IA32_L3_MASK_2, %ecx
+	mov	$0x01, %eax
+	xorl	%edx, %edx
+	wrmsr
+	/*
+	 * Set MSR_IA32_PQR_ASSOC = 0x02
+	 *
+	 * Possible values:
+	 * 0: Default value, no way mask should be applied
+	 * 1: Apply way mask 1 to LLC
+	 * 2: Apply way mask 2 to LLC
+	 * 3: Shouldn't be use in NEM Mode
+	 */
+	movl	$MSR_IA32_PQR_ASSOC, %ecx
+	movl	$0x02, %eax
+	xorl	%edx, %edx
+	wrmsr
+
+	movl	$CONFIG_DCACHE_RAM_BASE, %edi
+	movl	$CONFIG_DCACHE_RAM_SIZE, %ecx
+	shr	$0x02, %ecx
+	xor	%eax, %eax
+	cld
+	rep	stosl
+	/*
+	 * Set MSR_IA32_PQR_ASSOC = 0x01
+	 * At this stage we apply LLC_WAY_MASK_1 to the cache.
+	 * i.e. way 0 is protected from eviction.
+	*/
+	movl	$MSR_IA32_PQR_ASSOC, %ecx
+	movl	$0x01, %eax
+	xorl	%edx, %edx
+	wrmsr
+
+	post_code(0x27)
+	/*
+	 * Enable No-Eviction Mode Run State by setting
+	 * NO_EVICT_MODE MSR 2E0h bit [1] = '1'.
+	 */
+
+	movl	$MSR_EVICT_CTL, %ecx
+	rdmsr
+	orl	$0x02, %eax
+	wrmsr
+
+	post_code(0x28)
+
+	jmp car_init_done
+#endif
+
+#if CONFIG_IS_ENABLED(X86_16BIT_INIT)
+_dt_ucode_base_size:
+	/* These next two fields are filled in by binman */
+.globl ucode_base
+ucode_base:	/* Declared in microcode.h */
+	.long	0			/* microcode base */
+.globl ucode_size
+ucode_size:	/* Declared in microcode.h */
+	.long	0			/* microcode size */
+	.long	CONFIG_SYS_MONITOR_BASE	/* code region base */
+	.long	CONFIG_SYS_MONITOR_LEN	/* code region size */
+#endif
diff --git a/arch/x86/cpu/intel_common/car2_uninit.S b/arch/x86/cpu/intel_common/car2_uninit.S
new file mode 100644
index 00000000000..4797ac04279
--- /dev/null
+++ b/arch/x86/cpu/intel_common/car2_uninit.S
@@ -0,0 +1,87 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017 Intel Corp.
+ * Copyright 2019 Google LLC
+ * Taken from coreboot file exit_car.S
+ */
+
+#include <config.h>
+#include <asm/msr-index.h>
+#include <asm/mtrr.h>
+
+.text
+.global car_uninit
+car_uninit:
+
+	/*
+	 * Retrieve return address from stack as it will get trashed below if
+	 * execution is utilizing the cache-as-ram stack.
+	 */
+	pop	%ebx
+
+	/* Disable MTRRs. */
+	mov	$(MTRR_DEF_TYPE_MSR), %ecx
+	rdmsr
+	and	$(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax
+	wrmsr
+
+#ifdef CONFIG_INTEL_CAR_NEM
+.global car_nem_teardown
+car_nem_teardown:
+
+	/* invalidate cache contents. */
+	invd
+
+	/* Knock down bit 1 then bit 0 of NEM control not combining steps. */
+	mov	$(MSR_EVICT_CTL), %ecx
+	rdmsr
+	and	$(~(1 << 1)), %eax
+	wrmsr
+	and	$(~(1 << 0)), %eax
+	wrmsr
+
+#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS)
+.global car_cqos_teardown
+car_cqos_teardown:
+
+	/* Go back to all-evicting mode, set both masks to all-1s */
+	mov	$MSR_L2_QOS_MASK(0), %ecx
+	rdmsr
+	mov	$~0, %al
+	wrmsr
+
+	mov	$MSR_L2_QOS_MASK(1), %ecx
+	rdmsr
+	mov	$~0, %al
+	wrmsr
+
+	/* Reset CLOS selector to 0 */
+	mov	$MSR_IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~MSR_IA32_PQR_ASSOC_MASK, %edx
+	wrmsr
+
+#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED)
+.global car_nem_enhanced_teardown
+car_nem_enhanced_teardown:
+
+	/* invalidate cache contents. */
+	invd
+
+	/* Knock down bit 1 then bit 0 of NEM control not combining steps. */
+	mov	$(MSR_EVICT_CTL), %ecx
+	rdmsr
+	and	$(~(1 << 1)), %eax
+	wrmsr
+	and	$(~(1 << 0)), %eax
+	wrmsr
+
+	/* Reset CLOS selector to 0 */
+	mov	$IA32_PQR_ASSOC, %ecx
+	rdmsr
+	and	$~IA32_PQR_ASSOC_MASK, %edx
+	wrmsr
+#endif
+
+	/* Return to caller. */
+	jmp	*%ebx