Message ID | 20191021033913.220758-64-sjg@chromium.org |
---|---|
State | Superseded |
Delegated to: | Bin Meng |
Headers | show |
Series | x86: Add initial support for apollolake | expand |
On Mon, Oct 21, 2019 at 7:01 AM Simon Glass <sjg@chromium.org> wrote: > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR). > Add support for these along with suitable configuration options. > +#if ((CONFIG_DCACHE_RAM_SIZE & (CONFIG_DCACHE_RAM_SIZE - 1)) == 0) Perhaps it would be useful to have a generic macro #define CONFIG_VALUE_IS_POWER_OF_2(_x) (CONFIG_##_x ...) > +#elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */ > +#else > +#error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing" > +#endif
Hi Simon, On Mon, Oct 21, 2019 at 11:40 AM Simon Glass <sjg@chromium.org> wrote: > > Newer Intel SoCs have different ways of setting up cache-as-ram (CAR). > Add support for these along with suitable configuration options. > > Signed-off-by: Simon Glass <sjg@chromium.org> > --- > > Changes in v3: > - Drop unneeded Kconfig file > > Changes in v2: None > > arch/x86/Kconfig | 16 + > arch/x86/cpu/intel_common/Makefile | 8 + > arch/x86/cpu/intel_common/car2.S | 490 ++++++++++++++++++++++++ > arch/x86/cpu/intel_common/car2_uninit.S | 87 +++++ > 4 files changed, 601 insertions(+) > create mode 100644 arch/x86/cpu/intel_common/car2.S > create mode 100644 arch/x86/cpu/intel_common/car2_uninit.S > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 54f51e002b8..69327bd746a 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -889,4 +889,20 @@ config HIGH_TABLE_SIZE > Increse it if the default size does not fit the board's needs. > This is most likely due to a large ACPI DSDT table is used. > > +config INTEL_CAR_CQOS > + bool "Support Intel Cache Quality of Service" > + help > + Cache Quality of Service allows more fine-grained control of cache > + usage. As result, it is possible to set up a portion of L2 cache for > + CAR and use the remainder for actual caching. > + > +# > +# Each bit in QOS mask controls this many bytes. This is calculated as: > +# (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS > +# > +config CACHE_QOS_SIZE_PER_BIT > + hex > + depends on INTEL_CAR_CQOS > + default 0x20000 # 128 KB > + > endmenu > diff --git a/arch/x86/cpu/intel_common/Makefile b/arch/x86/cpu/intel_common/Makefile > index dfbc29f0475..4c733f46067 100644 > --- a/arch/x86/cpu/intel_common/Makefile > +++ b/arch/x86/cpu/intel_common/Makefile > @@ -8,6 +8,14 @@ obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += me_status.o > obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += report_platform.o > obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += mrc.o > endif > + > +ifdef CONFIG_FSP_VERSION2 The new CAR should not be dependent on FSP_VERSION2. It's OK that we implement a native U-Boot port without FSP2. Besides, when FSP2 FSP-T is used, the car2 is not needed too. > +obj-$(CONFIG_TPL_BUILD) += car2.o > +ifndef CONFIG_SPL_BUILD > +obj-y += car2_uninit.o > +endif > +endif > + > obj-y += cpu.o > obj-y += fast_spi.o > obj-y += lpc.o > diff --git a/arch/x86/cpu/intel_common/car2.S b/arch/x86/cpu/intel_common/car2.S > new file mode 100644 > index 00000000000..bf01b0da849 > --- /dev/null > +++ b/arch/x86/cpu/intel_common/car2.S > @@ -0,0 +1,490 @@ > +/* SPDX-License-Identifier: GPL-2.0+ */ > +/* > + * This file is part of the coreboot project. > + * > + * Copyright (C) 2015-2016 Intel Corp. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; version 2 of the License. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. The above license text is not needed, given SPDX format is already used. > + * > + */ > + > +#include <config.h> > +#include <asm/msr-index.h> > +#include <asm/mtrr.h> > +#include <asm/post.h> > +#include <asm/processor-flags.h> > + > +#define KiB 1024 > + > +.global car_init > +car_init: > + post_code(0x20) Looks 0x20 is occupied by POST_CAR_SIPI. Can we use a macro, and a different value? Please fix this globally in this file. > + > + /* > + * Use the MTRR default type MSR as a proxy for detecting INIT#. > + * Reset the system if any known bits are set in that MSR. That is > + * an indication of the CPU not being properly reset. > + */ > +check_for_clean_reset: > + mov $MTRR_DEF_TYPE_MSR, %ecx > + rdmsr > + and $(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax > + cmp $0, %eax > + jz no_reset > + /* perform warm reset */ > + movw $0xcf9, %dx Use IO_PORT_RESET > + movb $0x06, %al Use (SYS_RST | RST_CPU) > + outb %al, %dx > + > +no_reset: > + post_code(0x21) > + > + /* Clear/disable fixed MTRRs */ > + mov $fixed_mtrr_list_size, %ebx > + xor %eax, %eax > + xor %edx, %edx > + > +clear_fixed_mtrr: > + add $-2, %ebx > + movzwl fixed_mtrr_list(%ebx), %ecx > + wrmsr > + jnz clear_fixed_mtrr > + > + post_code(0x22) Ditto > + > + /* Figure put how many MTRRs we have, and clear them out */ > + mov $MTRR_CAP_MSR, %ecx > + rdmsr > + movzb %al, %ebx /* Number of variable MTRRs */ > + mov $MTRR_PHYS_BASE_MSR(0), %ecx > + xor %eax, %eax > + xor %edx, %edx > + > +clear_var_mtrr: > + wrmsr > + inc %ecx > + wrmsr > + inc %ecx > + dec %ebx > + jnz clear_var_mtrr > + > + post_code(0x23) > + > + /* Configure default memory type to uncacheable (UC) */ > + mov $MTRR_DEF_TYPE_MSR, %ecx > + rdmsr > + /* Clear enable bits and set default type to UC */ > + and $~(MTRR_DEF_TYPE_MASK | MTRR_DEF_TYPE_EN | \ > + MTRR_DEF_TYPE_FIX_EN), %eax > + wrmsr > + > + /* Configure MTRR_PHYS_MASK_HIGH for proper addressing above 4GB nits: wrong multi-line comment format > + * based on the physical address size supported for this processor > + * This is based on read from CPUID EAX = 080000008h, EAX bits [7:0] > + * > + * Examples: > + * MTRR_PHYS_MASK_HIGH = 00000000Fh For 36 bit addressing > + * MTRR_PHYS_MASK_HIGH = 0000000FFh For 40 bit addressing > + */ > + > + movl $0x80000008, %eax /* Address sizes leaf */ > + cpuid > + sub $32, %al > + movzx %al, %eax > + xorl %esi, %esi > + bts %eax, %esi > + dec %esi /* esi <- MTRR_PHYS_MASK_HIGH */ > + > + post_code(0x24) > + > +#if ((CONFIG_DCACHE_RAM_SIZE & (CONFIG_DCACHE_RAM_SIZE - 1)) == 0) > + /* Configure CAR region as write-back (WB) */ > + mov $MTRR_PHYS_BASE_MSR(0), %ecx > + mov $CONFIG_DCACHE_RAM_BASE, %eax > + or $MTRR_TYPE_WRBACK, %eax > + xor %edx,%edx > + wrmsr > + > + /* Configure the MTRR mask for the size region */ > + mov $MTRR_PHYS_MASK(0), %ecx > + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* size mask */ > + dec %eax > + not %eax > + or $MTRR_PHYS_MASK_VALID, %eax > + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ > + wrmsr > +#elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */ > + /* Configure CAR region as write-back (WB) */ > + mov $MTRR_PHYS_BASE_MSR(0), %ecx > + mov $CONFIG_DCACHE_RAM_BASE, %eax > + or $MTRR_TYPE_WRBACK, %eax > + xor %edx,%edx > + wrmsr > + > + mov $MTRR_PHYS_MASK_MSR(0), %ecx > + mov $(512 * KiB), %eax /* size mask */ > + dec %eax > + not %eax > + or $MTRR_PHYS_MASK_VALID, %eax > + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ > + wrmsr > + > + mov $MTRR_PHYS_BASE_MSR(1), %ecx > + mov $(CONFIG_DCACHE_RAM_BASE + 512 * KiB), %eax > + or $MTRR_TYPE_WRBACK, %eax > + xor %edx,%edx > + wrmsr > + > + mov $MTRR_PHYS_MASK_MSR(1), %ecx > + mov $(256 * KiB), %eax /* size mask */ > + dec %eax > + not %eax > + or $MTRR_PHYS_MASK_VALID, %eax > + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ > + wrmsr > +#else > +#error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing" > +#endif > + post_code(0x25) > + > + /* start */ > +/* mov $0xffff80a8, %ebx */ > +/* jmp *%ebx */ > +.globl _from_bb > +_from_bb: > +/* jmp car_init_ret */ > + /* end */ > + > + /* Enable variable MTRRs */ > + mov $MTRR_DEF_TYPE_MSR, %ecx > + rdmsr > + or $MTRR_DEF_TYPE_EN, %eax > + wrmsr > + > + /* Enable caching */ > + mov %cr0, %eax > + and $~(X86_CR0_CD | X86_CR0_NW), %eax > + invd > + mov %eax, %cr0 > + > +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM) > + jmp car_nem nits: indentation of "car_nem" not correct > +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) > + jmp car_cqos ditto > +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) > + jmp car_nem_enhanced ditto > +#else > +#error "No CAR mechanism selected: > +#endif > + jmp car_init_ret > + > +#if 0 Dead codes? If yes, please remove the whole block in #if 0 > +.global car_init_done > +car_init_done: > + > + post_code(0x29) > + > + /* Setup bootblock stack */ > + mov $_car_stack_end, %esp > + > + /* Need to align stack to 16 bytes at call instruction. Account for nits: wrong multi-line comment format > + the two pushes below */ > + andl $0xfffffff0, %esp > + sub $8, %esp > + > + /*push TSC value to stack*/ > + movd %mm2, %eax > + pushl %eax /* tsc[63:32] */ > + movd %mm1, %eax > + pushl %eax /* tsc[31:0] */ > + > +before_carstage: > + post_code(0x2A) > + > + call bootblock_c_entry > + /* Never reached */ > +#endif > + > +fixed_mtrr_list: > + .word MTRR_FIX_64K_00000_MSR > + .word MTRR_FIX_16K_80000_MSR > + .word MTRR_FIX_16K_A0000_MSR > + .word MTRR_FIX_4K_C0000_MSR > + .word MTRR_FIX_4K_C8000_MSR > + .word MTRR_FIX_4K_D0000_MSR > + .word MTRR_FIX_4K_D8000_MSR > + .word MTRR_FIX_4K_E0000_MSR > + .word MTRR_FIX_4K_E8000_MSR > + .word MTRR_FIX_4K_F0000_MSR > + .word MTRR_FIX_4K_F8000_MSR > +fixed_mtrr_list_size = . - fixed_mtrr_list > + > +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM) > +.global car_nem > +car_nem: > + /* Disable cache eviction (setup stage) */ > + mov $MSR_EVICT_CTL, %ecx > + rdmsr > + or $0x1, %eax > + wrmsr > + > + post_code(0x26) > + > + /* Clear the cache memory region. This will also fill up the cache */ > + movl $CONFIG_DCACHE_RAM_BASE, %edi > + movl $CONFIG_DCACHE_RAM_SIZE, %ecx > + shr $0x02, %ecx > + xor %eax, %eax > + cld > + rep stosl > + > + post_code(0x27) > + > + /* Disable cache eviction (run stage) */ > + mov $MSR_EVICT_CTL, %ecx > + rdmsr > + or $0x2, %eax > + wrmsr > + > + post_code(0x28) > + > + jmp car_init_done > + > +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) > +.global car_cqos > +car_cqos: > + /* > + * Create CBM_LEN_MASK based on CBM_LEN > + * Get CPUID.(EAX=10H, ECX=2H):EAX.CBM_LEN[bits 4:0] > + */ > + mov $0x10, %eax > + mov $0x2, %ecx > + cpuid > + and $0x1F, %eax > + add $1, %al > + > + mov $1, %ebx > + mov %al, %cl > + shl %cl, %ebx > + sub $1, %ebx > + > + /* Store the CBM_LEN_MASK in mm3 for later use */ > + movd %ebx, %mm3 The indentation style inside above block is not consistent with others > + > + /* > + * Disable both L1 and L2 prefetcher. For yet-to-understood reason, > + * prefetchers slow down filling cache with rep stos in CQOS mode. > + */ > + mov $MSR_PREFETCH_CTL, %ecx > + rdmsr > + or $(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax > + wrmsr > + > +#if (CONFIG_DCACHE_RAM_SIZE == CONFIG_L2_CACHE_SIZE) > +/* > + * If CAR size is set to full L2 size, mask is calculated as all-zeros. > + * This is not supported by the CPU/uCode. > + */ > +#error "CQOS CAR may not use whole L2 cache area" > +#endif > + > + /* Calculate how many bits to be used for CAR */ > + xor %edx, %edx > + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* dividend */ > + mov $CONFIG_CACHE_QOS_SIZE_PER_BIT, %ecx /* divisor */ > + div %ecx /* result is in eax */ > + mov %eax, %ecx /* save to ecx */ > + mov $1, %ebx > + shl %cl, %ebx > + sub $1, %ebx /* resulting mask is is in ebx */ > + > + /* Set this mask for initial cache fill */ > + mov $MSR_L2_QOS_MASK(0), %ecx > + rdmsr > + mov %ebx, %eax > + wrmsr > + > + /* Set CLOS selector to 0 */ > + mov $MSR_IA32_PQR_ASSOC, %ecx > + rdmsr > + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* select mask 0 */ > + wrmsr > + > + /* We will need to block CAR region from evicts */ > + mov $MSR_L2_QOS_MASK(1), %ecx > + rdmsr > + /* Invert bits that are to be used for cache */ > + mov %ebx, %eax > + xor $~0, %eax /* invert 32 bits */ > + > + /* > + * Use CBM_LEN_MASK stored in mm3 to set bits based on Capacity Bit > + * Mask Length. > + */ > + movd %mm3, %ebx > + and %ebx, %eax > + wrmsr > + > + post_code(0x26) > + > + /* Clear the cache memory region. This will also fill up the cache */ > + movl $CONFIG_DCACHE_RAM_BASE, %edi > + movl $CONFIG_DCACHE_RAM_SIZE, %ecx > + shr $0x02, %ecx > + xor %eax, %eax > + cld > + rep stosl > + > + post_code(0x27) > + > + /* Cache is populated. Use mask 1 that will block evicts */ > + mov $MSR_IA32_PQR_ASSOC, %ecx > + rdmsr > + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* clear index bits first */ > + or $1, %edx /* select mask 1 */ > + wrmsr > + > + /* Enable prefetchers */ > + mov $MSR_PREFETCH_CTL, %ecx > + rdmsr > + and $~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax > + wrmsr > + > + post_code(0x28) > + > +/* jmp car_init_done */ Is this really not used and commented out? > + jmp car_init_ret > + > +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) > +.global car_nem_enhanced > +car_nem_enhanced: > + /* Disable cache eviction (setup stage) */ > + mov $MSR_EVICT_CTL, %ecx > + rdmsr > + or $0x1, %eax > + wrmsr > + post_code(0x26) > + > + /* Create n-way set associativity of cache */ > + xorl %edi, %edi > +find_llc_subleaf: > + movl %edi, %ecx > + movl $0x04, %eax > + cpuid > + inc %edi > + and $0xe0, %al /* EAX[7:5] = Cache Level */ > + cmp $0x60, %al /* Check to see if it is LLC */ > + jnz find_llc_subleaf > + > + /* > + * Set MSR 0xC91 IA32_L3_MASK_! = 0xE/0xFE/0xFFE/0xFFFE > + * for 4/8/16 way of LLC > + */ > + shr $22, %ebx > + inc %ebx > + /* Calculate n-way associativity of LLC */ > + mov %bl, %cl > + > + /* > + * Maximizing RO cacheability while locking in the CAR to a > + * single way since that particular way won't be victim candidate > + * for evictions. > + * This has been done after programing LLC_WAY_MASK_1 MSR > + * with desired LLC way as mentioned below. > + * > + * Hence create Code and Data Size as per request > + * Code Size (RO) : Up to 16M > + * Data Size (RW) : Up to 256K > + */ > + movl $0x01, %eax > + /* > + * LLC Ways -> LLC_WAY_MASK_1: > + * 4: 0x000E > + * 8: 0x00FE > + * 12: 0x0FFE > + * 16: 0xFFFE > + * > + * These MSRs contain one bit per each way of LLC > + * - If this bit is '0' - the way is protected from eviction > + * - If this bit is '1' - the way is not protected from eviction > + */ > + shl %cl, %eax > + subl $0x02, %eax > + movl $MSR_IA32_L3_MASK_1, %ecx > + xorl %edx, %edx > + wrmsr > + /* > + * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1 > + * > + * For SKL SOC, data size remains 256K consistently. > + * Hence, creating 1-way associative cache for Data > + */ > + mov $MSR_IA32_L3_MASK_2, %ecx > + mov $0x01, %eax > + xorl %edx, %edx > + wrmsr > + /* > + * Set MSR_IA32_PQR_ASSOC = 0x02 > + * > + * Possible values: > + * 0: Default value, no way mask should be applied > + * 1: Apply way mask 1 to LLC > + * 2: Apply way mask 2 to LLC > + * 3: Shouldn't be use in NEM Mode > + */ > + movl $MSR_IA32_PQR_ASSOC, %ecx > + movl $0x02, %eax > + xorl %edx, %edx > + wrmsr > + > + movl $CONFIG_DCACHE_RAM_BASE, %edi > + movl $CONFIG_DCACHE_RAM_SIZE, %ecx > + shr $0x02, %ecx > + xor %eax, %eax > + cld > + rep stosl > + /* > + * Set MSR_IA32_PQR_ASSOC = 0x01 > + * At this stage we apply LLC_WAY_MASK_1 to the cache. > + * i.e. way 0 is protected from eviction. > + */ > + movl $MSR_IA32_PQR_ASSOC, %ecx > + movl $0x01, %eax > + xorl %edx, %edx > + wrmsr > + > + post_code(0x27) > + /* > + * Enable No-Eviction Mode Run State by setting > + * NO_EVICT_MODE MSR 2E0h bit [1] = '1'. > + */ > + > + movl $MSR_EVICT_CTL, %ecx > + rdmsr > + orl $0x02, %eax > + wrmsr > + > + post_code(0x28) > + > + jmp car_init_done nits: incorrect indentation of car_init_done > +#endif > + > +#if CONFIG_IS_ENABLED(X86_16BIT_INIT) > +_dt_ucode_base_size: > + /* These next two fields are filled in by binman */ > +.globl ucode_base > +ucode_base: /* Declared in microcode.h */ > + .long 0 /* microcode base */ > +.globl ucode_size > +ucode_size: /* Declared in microcode.h */ > + .long 0 /* microcode size */ > + .long CONFIG_SYS_MONITOR_BASE /* code region base */ > + .long CONFIG_SYS_MONITOR_LEN /* code region size */ > +#endif > diff --git a/arch/x86/cpu/intel_common/car2_uninit.S b/arch/x86/cpu/intel_common/car2_uninit.S > new file mode 100644 > index 00000000000..aba3a5381e5 > --- /dev/null > +++ b/arch/x86/cpu/intel_common/car2_uninit.S > @@ -0,0 +1,87 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright 2017 Intel Corp. > + * Copyright 2019 Google LLC > + * Taken from coreboot file exit_car.S > + */ > + > +#include <config.h> > +#include <asm/msr-index.h> > +#include <asm/mtrr.h> > + > +.text > +.global car_uninit > +car_uninit: > + > + /* > + * Retrieve return address from stack as it will get trashed below if > + * execution is utilizing the cache-as-ram stack. > + */ > + pop %ebx > + > + /* Disable MTRRs */ > + mov $(MTRR_DEF_TYPE_MSR), %ecx > + rdmsr > + and $(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax > + wrmsr > + > +#ifdef CONFIG_INTEL_CAR_NEM > +.global car_nem_teardown > +car_nem_teardown: > + > + /* invalidate cache contents */ > + invd > + > + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ > + mov $(MSR_EVICT_CTL), %ecx > + rdmsr > + and $(~(1 << 1)), %eax > + wrmsr > + and $(~(1 << 0)), %eax > + wrmsr > + > +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) > +.global car_cqos_teardown > +car_cqos_teardown: > + > + /* Go back to all-evicting mode, set both masks to all-1s */ > + mov $MSR_L2_QOS_MASK(0), %ecx > + rdmsr > + mov $~0, %al > + wrmsr > + > + mov $MSR_L2_QOS_MASK(1), %ecx > + rdmsr > + mov $~0, %al > + wrmsr > + > + /* Reset CLOS selector to 0 */ > + mov $MSR_IA32_PQR_ASSOC, %ecx > + rdmsr > + and $~MSR_IA32_PQR_ASSOC_MASK, %edx > + wrmsr > + > +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) > +.global car_nem_enhanced_teardown > +car_nem_enhanced_teardown: > + > + /* invalidate cache contents */ > + invd > + > + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ > + mov $(MSR_EVICT_CTL), %ecx > + rdmsr > + and $(~(1 << 1)), %eax > + wrmsr > + and $(~(1 << 0)), %eax > + wrmsr > + > + /* Reset CLOS selector to 0 */ > + mov $IA32_PQR_ASSOC, %ecx > + rdmsr > + and $~IA32_PQR_ASSOC_MASK, %edx > + wrmsr > +#endif > + > + /* Return to caller */ > + jmp *%ebx > -- Regards, Bin
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 54f51e002b8..69327bd746a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -889,4 +889,20 @@ config HIGH_TABLE_SIZE Increse it if the default size does not fit the board's needs. This is most likely due to a large ACPI DSDT table is used. +config INTEL_CAR_CQOS + bool "Support Intel Cache Quality of Service" + help + Cache Quality of Service allows more fine-grained control of cache + usage. As result, it is possible to set up a portion of L2 cache for + CAR and use the remainder for actual caching. + +# +# Each bit in QOS mask controls this many bytes. This is calculated as: +# (CACHE_WAYS / CACHE_BITS_PER_MASK) * CACHE_LINE_SIZE * CACHE_SETS +# +config CACHE_QOS_SIZE_PER_BIT + hex + depends on INTEL_CAR_CQOS + default 0x20000 # 128 KB + endmenu diff --git a/arch/x86/cpu/intel_common/Makefile b/arch/x86/cpu/intel_common/Makefile index dfbc29f0475..4c733f46067 100644 --- a/arch/x86/cpu/intel_common/Makefile +++ b/arch/x86/cpu/intel_common/Makefile @@ -8,6 +8,14 @@ obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += me_status.o obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += report_platform.o obj-$(CONFIG_$(SPL_TPL_)X86_32BIT_INIT) += mrc.o endif + +ifdef CONFIG_FSP_VERSION2 +obj-$(CONFIG_TPL_BUILD) += car2.o +ifndef CONFIG_SPL_BUILD +obj-y += car2_uninit.o +endif +endif + obj-y += cpu.o obj-y += fast_spi.o obj-y += lpc.o diff --git a/arch/x86/cpu/intel_common/car2.S b/arch/x86/cpu/intel_common/car2.S new file mode 100644 index 00000000000..bf01b0da849 --- /dev/null +++ b/arch/x86/cpu/intel_common/car2.S @@ -0,0 +1,490 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * This file is part of the coreboot project. + * + * Copyright (C) 2015-2016 Intel Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <config.h> +#include <asm/msr-index.h> +#include <asm/mtrr.h> +#include <asm/post.h> +#include <asm/processor-flags.h> + +#define KiB 1024 + +.global car_init +car_init: + post_code(0x20) + + /* + * Use the MTRR default type MSR as a proxy for detecting INIT#. + * Reset the system if any known bits are set in that MSR. That is + * an indication of the CPU not being properly reset. + */ +check_for_clean_reset: + mov $MTRR_DEF_TYPE_MSR, %ecx + rdmsr + and $(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN), %eax + cmp $0, %eax + jz no_reset + /* perform warm reset */ + movw $0xcf9, %dx + movb $0x06, %al + outb %al, %dx + +no_reset: + post_code(0x21) + + /* Clear/disable fixed MTRRs */ + mov $fixed_mtrr_list_size, %ebx + xor %eax, %eax + xor %edx, %edx + +clear_fixed_mtrr: + add $-2, %ebx + movzwl fixed_mtrr_list(%ebx), %ecx + wrmsr + jnz clear_fixed_mtrr + + post_code(0x22) + + /* Figure put how many MTRRs we have, and clear them out */ + mov $MTRR_CAP_MSR, %ecx + rdmsr + movzb %al, %ebx /* Number of variable MTRRs */ + mov $MTRR_PHYS_BASE_MSR(0), %ecx + xor %eax, %eax + xor %edx, %edx + +clear_var_mtrr: + wrmsr + inc %ecx + wrmsr + inc %ecx + dec %ebx + jnz clear_var_mtrr + + post_code(0x23) + + /* Configure default memory type to uncacheable (UC) */ + mov $MTRR_DEF_TYPE_MSR, %ecx + rdmsr + /* Clear enable bits and set default type to UC */ + and $~(MTRR_DEF_TYPE_MASK | MTRR_DEF_TYPE_EN | \ + MTRR_DEF_TYPE_FIX_EN), %eax + wrmsr + + /* Configure MTRR_PHYS_MASK_HIGH for proper addressing above 4GB + * based on the physical address size supported for this processor + * This is based on read from CPUID EAX = 080000008h, EAX bits [7:0] + * + * Examples: + * MTRR_PHYS_MASK_HIGH = 00000000Fh For 36 bit addressing + * MTRR_PHYS_MASK_HIGH = 0000000FFh For 40 bit addressing + */ + + movl $0x80000008, %eax /* Address sizes leaf */ + cpuid + sub $32, %al + movzx %al, %eax + xorl %esi, %esi + bts %eax, %esi + dec %esi /* esi <- MTRR_PHYS_MASK_HIGH */ + + post_code(0x24) + +#if ((CONFIG_DCACHE_RAM_SIZE & (CONFIG_DCACHE_RAM_SIZE - 1)) == 0) + /* Configure CAR region as write-back (WB) */ + mov $MTRR_PHYS_BASE_MSR(0), %ecx + mov $CONFIG_DCACHE_RAM_BASE, %eax + or $MTRR_TYPE_WRBACK, %eax + xor %edx,%edx + wrmsr + + /* Configure the MTRR mask for the size region */ + mov $MTRR_PHYS_MASK(0), %ecx + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* size mask */ + dec %eax + not %eax + or $MTRR_PHYS_MASK_VALID, %eax + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ + wrmsr +#elif (CONFIG_DCACHE_RAM_SIZE == 768 * KiB) /* 768 KiB */ + /* Configure CAR region as write-back (WB) */ + mov $MTRR_PHYS_BASE_MSR(0), %ecx + mov $CONFIG_DCACHE_RAM_BASE, %eax + or $MTRR_TYPE_WRBACK, %eax + xor %edx,%edx + wrmsr + + mov $MTRR_PHYS_MASK_MSR(0), %ecx + mov $(512 * KiB), %eax /* size mask */ + dec %eax + not %eax + or $MTRR_PHYS_MASK_VALID, %eax + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ + wrmsr + + mov $MTRR_PHYS_BASE_MSR(1), %ecx + mov $(CONFIG_DCACHE_RAM_BASE + 512 * KiB), %eax + or $MTRR_TYPE_WRBACK, %eax + xor %edx,%edx + wrmsr + + mov $MTRR_PHYS_MASK_MSR(1), %ecx + mov $(256 * KiB), %eax /* size mask */ + dec %eax + not %eax + or $MTRR_PHYS_MASK_VALID, %eax + movl %esi, %edx /* edx <- MTRR_PHYS_MASK_HIGH */ + wrmsr +#else +#error "DCACHE_RAM_SIZE is not a power of 2 and setup code is missing" +#endif + post_code(0x25) + + /* start */ +/* mov $0xffff80a8, %ebx */ +/* jmp *%ebx */ +.globl _from_bb +_from_bb: +/* jmp car_init_ret */ + /* end */ + + /* Enable variable MTRRs */ + mov $MTRR_DEF_TYPE_MSR, %ecx + rdmsr + or $MTRR_DEF_TYPE_EN, %eax + wrmsr + + /* Enable caching */ + mov %cr0, %eax + and $~(X86_CR0_CD | X86_CR0_NW), %eax + invd + mov %eax, %cr0 + +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM) + jmp car_nem +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) + jmp car_cqos +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) + jmp car_nem_enhanced +#else +#error "No CAR mechanism selected: +#endif + jmp car_init_ret + +#if 0 +.global car_init_done +car_init_done: + + post_code(0x29) + + /* Setup bootblock stack */ + mov $_car_stack_end, %esp + + /* Need to align stack to 16 bytes at call instruction. Account for + the two pushes below */ + andl $0xfffffff0, %esp + sub $8, %esp + + /*push TSC value to stack*/ + movd %mm2, %eax + pushl %eax /* tsc[63:32] */ + movd %mm1, %eax + pushl %eax /* tsc[31:0] */ + +before_carstage: + post_code(0x2A) + + call bootblock_c_entry + /* Never reached */ +#endif + +fixed_mtrr_list: + .word MTRR_FIX_64K_00000_MSR + .word MTRR_FIX_16K_80000_MSR + .word MTRR_FIX_16K_A0000_MSR + .word MTRR_FIX_4K_C0000_MSR + .word MTRR_FIX_4K_C8000_MSR + .word MTRR_FIX_4K_D0000_MSR + .word MTRR_FIX_4K_D8000_MSR + .word MTRR_FIX_4K_E0000_MSR + .word MTRR_FIX_4K_E8000_MSR + .word MTRR_FIX_4K_F0000_MSR + .word MTRR_FIX_4K_F8000_MSR +fixed_mtrr_list_size = . - fixed_mtrr_list + +#if IS_ENABLED(CONFIG_INTEL_CAR_NEM) +.global car_nem +car_nem: + /* Disable cache eviction (setup stage) */ + mov $MSR_EVICT_CTL, %ecx + rdmsr + or $0x1, %eax + wrmsr + + post_code(0x26) + + /* Clear the cache memory region. This will also fill up the cache */ + movl $CONFIG_DCACHE_RAM_BASE, %edi + movl $CONFIG_DCACHE_RAM_SIZE, %ecx + shr $0x02, %ecx + xor %eax, %eax + cld + rep stosl + + post_code(0x27) + + /* Disable cache eviction (run stage) */ + mov $MSR_EVICT_CTL, %ecx + rdmsr + or $0x2, %eax + wrmsr + + post_code(0x28) + + jmp car_init_done + +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) +.global car_cqos +car_cqos: + /* + * Create CBM_LEN_MASK based on CBM_LEN + * Get CPUID.(EAX=10H, ECX=2H):EAX.CBM_LEN[bits 4:0] + */ + mov $0x10, %eax + mov $0x2, %ecx + cpuid + and $0x1F, %eax + add $1, %al + + mov $1, %ebx + mov %al, %cl + shl %cl, %ebx + sub $1, %ebx + + /* Store the CBM_LEN_MASK in mm3 for later use */ + movd %ebx, %mm3 + + /* + * Disable both L1 and L2 prefetcher. For yet-to-understood reason, + * prefetchers slow down filling cache with rep stos in CQOS mode. + */ + mov $MSR_PREFETCH_CTL, %ecx + rdmsr + or $(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax + wrmsr + +#if (CONFIG_DCACHE_RAM_SIZE == CONFIG_L2_CACHE_SIZE) +/* + * If CAR size is set to full L2 size, mask is calculated as all-zeros. + * This is not supported by the CPU/uCode. + */ +#error "CQOS CAR may not use whole L2 cache area" +#endif + + /* Calculate how many bits to be used for CAR */ + xor %edx, %edx + mov $CONFIG_DCACHE_RAM_SIZE, %eax /* dividend */ + mov $CONFIG_CACHE_QOS_SIZE_PER_BIT, %ecx /* divisor */ + div %ecx /* result is in eax */ + mov %eax, %ecx /* save to ecx */ + mov $1, %ebx + shl %cl, %ebx + sub $1, %ebx /* resulting mask is is in ebx */ + + /* Set this mask for initial cache fill */ + mov $MSR_L2_QOS_MASK(0), %ecx + rdmsr + mov %ebx, %eax + wrmsr + + /* Set CLOS selector to 0 */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* select mask 0 */ + wrmsr + + /* We will need to block CAR region from evicts */ + mov $MSR_L2_QOS_MASK(1), %ecx + rdmsr + /* Invert bits that are to be used for cache */ + mov %ebx, %eax + xor $~0, %eax /* invert 32 bits */ + + /* + * Use CBM_LEN_MASK stored in mm3 to set bits based on Capacity Bit + * Mask Length. + */ + movd %mm3, %ebx + and %ebx, %eax + wrmsr + + post_code(0x26) + + /* Clear the cache memory region. This will also fill up the cache */ + movl $CONFIG_DCACHE_RAM_BASE, %edi + movl $CONFIG_DCACHE_RAM_SIZE, %ecx + shr $0x02, %ecx + xor %eax, %eax + cld + rep stosl + + post_code(0x27) + + /* Cache is populated. Use mask 1 that will block evicts */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~MSR_IA32_PQR_ASSOC_MASK, %edx /* clear index bits first */ + or $1, %edx /* select mask 1 */ + wrmsr + + /* Enable prefetchers */ + mov $MSR_PREFETCH_CTL, %ecx + rdmsr + and $~(PREFETCH_L1_DISABLE | PREFETCH_L2_DISABLE), %eax + wrmsr + + post_code(0x28) + +/* jmp car_init_done */ + jmp car_init_ret + +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) +.global car_nem_enhanced +car_nem_enhanced: + /* Disable cache eviction (setup stage) */ + mov $MSR_EVICT_CTL, %ecx + rdmsr + or $0x1, %eax + wrmsr + post_code(0x26) + + /* Create n-way set associativity of cache */ + xorl %edi, %edi +find_llc_subleaf: + movl %edi, %ecx + movl $0x04, %eax + cpuid + inc %edi + and $0xe0, %al /* EAX[7:5] = Cache Level */ + cmp $0x60, %al /* Check to see if it is LLC */ + jnz find_llc_subleaf + + /* + * Set MSR 0xC91 IA32_L3_MASK_! = 0xE/0xFE/0xFFE/0xFFFE + * for 4/8/16 way of LLC + */ + shr $22, %ebx + inc %ebx + /* Calculate n-way associativity of LLC */ + mov %bl, %cl + + /* + * Maximizing RO cacheability while locking in the CAR to a + * single way since that particular way won't be victim candidate + * for evictions. + * This has been done after programing LLC_WAY_MASK_1 MSR + * with desired LLC way as mentioned below. + * + * Hence create Code and Data Size as per request + * Code Size (RO) : Up to 16M + * Data Size (RW) : Up to 256K + */ + movl $0x01, %eax + /* + * LLC Ways -> LLC_WAY_MASK_1: + * 4: 0x000E + * 8: 0x00FE + * 12: 0x0FFE + * 16: 0xFFFE + * + * These MSRs contain one bit per each way of LLC + * - If this bit is '0' - the way is protected from eviction + * - If this bit is '1' - the way is not protected from eviction + */ + shl %cl, %eax + subl $0x02, %eax + movl $MSR_IA32_L3_MASK_1, %ecx + xorl %edx, %edx + wrmsr + /* + * Set MSR 0xC92 IA32_L3_MASK_2 = 0x1 + * + * For SKL SOC, data size remains 256K consistently. + * Hence, creating 1-way associative cache for Data + */ + mov $MSR_IA32_L3_MASK_2, %ecx + mov $0x01, %eax + xorl %edx, %edx + wrmsr + /* + * Set MSR_IA32_PQR_ASSOC = 0x02 + * + * Possible values: + * 0: Default value, no way mask should be applied + * 1: Apply way mask 1 to LLC + * 2: Apply way mask 2 to LLC + * 3: Shouldn't be use in NEM Mode + */ + movl $MSR_IA32_PQR_ASSOC, %ecx + movl $0x02, %eax + xorl %edx, %edx + wrmsr + + movl $CONFIG_DCACHE_RAM_BASE, %edi + movl $CONFIG_DCACHE_RAM_SIZE, %ecx + shr $0x02, %ecx + xor %eax, %eax + cld + rep stosl + /* + * Set MSR_IA32_PQR_ASSOC = 0x01 + * At this stage we apply LLC_WAY_MASK_1 to the cache. + * i.e. way 0 is protected from eviction. + */ + movl $MSR_IA32_PQR_ASSOC, %ecx + movl $0x01, %eax + xorl %edx, %edx + wrmsr + + post_code(0x27) + /* + * Enable No-Eviction Mode Run State by setting + * NO_EVICT_MODE MSR 2E0h bit [1] = '1'. + */ + + movl $MSR_EVICT_CTL, %ecx + rdmsr + orl $0x02, %eax + wrmsr + + post_code(0x28) + + jmp car_init_done +#endif + +#if CONFIG_IS_ENABLED(X86_16BIT_INIT) +_dt_ucode_base_size: + /* These next two fields are filled in by binman */ +.globl ucode_base +ucode_base: /* Declared in microcode.h */ + .long 0 /* microcode base */ +.globl ucode_size +ucode_size: /* Declared in microcode.h */ + .long 0 /* microcode size */ + .long CONFIG_SYS_MONITOR_BASE /* code region base */ + .long CONFIG_SYS_MONITOR_LEN /* code region size */ +#endif diff --git a/arch/x86/cpu/intel_common/car2_uninit.S b/arch/x86/cpu/intel_common/car2_uninit.S new file mode 100644 index 00000000000..aba3a5381e5 --- /dev/null +++ b/arch/x86/cpu/intel_common/car2_uninit.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2017 Intel Corp. + * Copyright 2019 Google LLC + * Taken from coreboot file exit_car.S + */ + +#include <config.h> +#include <asm/msr-index.h> +#include <asm/mtrr.h> + +.text +.global car_uninit +car_uninit: + + /* + * Retrieve return address from stack as it will get trashed below if + * execution is utilizing the cache-as-ram stack. + */ + pop %ebx + + /* Disable MTRRs */ + mov $(MTRR_DEF_TYPE_MSR), %ecx + rdmsr + and $(~(MTRR_DEF_TYPE_EN | MTRR_DEF_TYPE_FIX_EN)), %eax + wrmsr + +#ifdef CONFIG_INTEL_CAR_NEM +.global car_nem_teardown +car_nem_teardown: + + /* invalidate cache contents */ + invd + + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ + mov $(MSR_EVICT_CTL), %ecx + rdmsr + and $(~(1 << 1)), %eax + wrmsr + and $(~(1 << 0)), %eax + wrmsr + +#elif IS_ENABLED(CONFIG_INTEL_CAR_CQOS) +.global car_cqos_teardown +car_cqos_teardown: + + /* Go back to all-evicting mode, set both masks to all-1s */ + mov $MSR_L2_QOS_MASK(0), %ecx + rdmsr + mov $~0, %al + wrmsr + + mov $MSR_L2_QOS_MASK(1), %ecx + rdmsr + mov $~0, %al + wrmsr + + /* Reset CLOS selector to 0 */ + mov $MSR_IA32_PQR_ASSOC, %ecx + rdmsr + and $~MSR_IA32_PQR_ASSOC_MASK, %edx + wrmsr + +#elif IS_ENABLED(CONFIG_INTEL_CAR_NEM_ENHANCED) +.global car_nem_enhanced_teardown +car_nem_enhanced_teardown: + + /* invalidate cache contents */ + invd + + /* Knock down bit 1 then bit 0 of NEM control not combining steps */ + mov $(MSR_EVICT_CTL), %ecx + rdmsr + and $(~(1 << 1)), %eax + wrmsr + and $(~(1 << 0)), %eax + wrmsr + + /* Reset CLOS selector to 0 */ + mov $IA32_PQR_ASSOC, %ecx + rdmsr + and $~IA32_PQR_ASSOC_MASK, %edx + wrmsr +#endif + + /* Return to caller */ + jmp *%ebx
Newer Intel SoCs have different ways of setting up cache-as-ram (CAR). Add support for these along with suitable configuration options. Signed-off-by: Simon Glass <sjg@chromium.org> --- Changes in v3: - Drop unneeded Kconfig file Changes in v2: None arch/x86/Kconfig | 16 + arch/x86/cpu/intel_common/Makefile | 8 + arch/x86/cpu/intel_common/car2.S | 490 ++++++++++++++++++++++++ arch/x86/cpu/intel_common/car2_uninit.S | 87 +++++ 4 files changed, 601 insertions(+) create mode 100644 arch/x86/cpu/intel_common/car2.S create mode 100644 arch/x86/cpu/intel_common/car2_uninit.S