Message ID | 1422963927-32421-5-git-send-email-bmeng.cn@gmail.com |
---|---|
State | Superseded |
Delegated to: | Simon Glass |
Headers | show |
Hi Bin, On 3 February 2015 at 04:45, Bin Meng <bmeng.cn@gmail.com> wrote: > Add various utility codes needed for Quark MRC. > > Signed-off-by: Bin Meng <bmeng.cn@gmail.com> > > --- > There are 12 checkpatch warnings in this patch, which are: > > warning: arch/x86/cpu/quark/mrc_util.c,1446: Too many leading tabs - consider code refactoring > warning: arch/x86/cpu/quark/mrc_util.c,1450: line over 80 characters > ... > > Fixing 'Too many leading tabs ...' will be very dangerous, as I don't have > all the details on how Intel's MRC codes are actually written to play with > the hardware. Trying to refactor them may lead to a non-working MRC codes. > For the 'line over 80 characters' issue, we have to leave them as is now > due to the 'Too many leading tabs ...', sigh. The code looks fine for the most part - I only have nits. I'm not keen on BIT though. See my comments and what improvements you can make. It would be great to drop BIT. Re the debug macros, I suppose they are OK to keep. U-Boot doesn't have the concept of debug() for different categories or levels of verbosity. > > arch/x86/cpu/quark/hte.c | 398 +++++++++++ > arch/x86/cpu/quark/hte.h | 44 ++ > arch/x86/cpu/quark/mrc_util.c | 1499 +++++++++++++++++++++++++++++++++++++++++ > arch/x86/cpu/quark/mrc_util.h | 153 +++++ > 4 files changed, 2094 insertions(+) > create mode 100644 arch/x86/cpu/quark/hte.c > create mode 100644 arch/x86/cpu/quark/hte.h > create mode 100644 arch/x86/cpu/quark/mrc_util.c > create mode 100644 arch/x86/cpu/quark/mrc_util.h > > diff --git a/arch/x86/cpu/quark/hte.c b/arch/x86/cpu/quark/hte.c > new file mode 100644 > index 0000000..d813c9c > --- /dev/null > +++ b/arch/x86/cpu/quark/hte.c > @@ -0,0 +1,398 @@ > +/* > + * Copyright (C) 2013, Intel Corporation > + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> > + * > + * Ported from Intel released Quark UEFI BIOS > + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ Remove training slash? > + * > + * SPDX-License-Identifier: Intel > + */ > + > +#include <common.h> > +#include <asm/arch/mrc.h> > +#include <asm/arch/msg_port.h> > +#include "mrc_util.h" > +#include "hte.h" > + > +/** > + * This function enables HTE to detect all possible errors for s/This function// globally I'd suggest present tense, like "enable HTE to detect all possible errors for... > + * the given training parameters (per-bit or full byte lane). > + */ > +static void hte_enable_all_errors(void) > +{ > + msg_port_write(HTE, 0x000200A2, 0xFFFFFFFF); > + msg_port_write(HTE, 0x000200A3, 0x000000FF); > + msg_port_write(HTE, 0x000200A4, 0x00000000); Lower case hex again. > +} > + > +/** > + * This function goes and reads the HTE register in order to find any error > + * > + * @return: The errors detected in the HTE status register > + */ > +static u32 hte_check_errors(void) > +{ > + return msg_port_read(HTE, 0x000200A7); > +} > + > +/** > + * This function waits until HTE finishes > + */ > +static void hte_wait_for_complete(void) > +{ > + u32 tmp; > + > + ENTERFN(); > + > + do {} while ((msg_port_read(HTE, 0x00020012) & BIT30) != 0); > + > + tmp = msg_port_read(HTE, 0x00020011); > + tmp |= BIT9; > + tmp &= ~(BIT12 | BIT13); > + msg_port_write(HTE, 0x00020011, tmp); > + > + LEAVEFN(); > +} > + > +/** > + * This function clears registers related with errors in the HTE > + */ > +static void hte_clear_error_regs(void) > +{ > + u32 tmp; > + > + /* > + * Clear all HTE errors and enable error checking > + * for burst and chunk. > + */ > + tmp = msg_port_read(HTE, 0x000200A1); > + tmp |= BIT8; > + msg_port_write(HTE, 0x000200A1, tmp); > +} > + > +/** > + * This function executes basic single cache line memory write/read/verify > + * test using simple constant pattern, different for READ_RAIN and REAS_TRAIN? > + * WRITE_TRAIN modes. > + * > + * See hte_basic_write_read() which is external visible wrapper. the external (fix below also) > + * > + * @mrc_params: host struture for all MRC global data > + * @addr: memory adress being tested (must hit specific channel/rank) > + * @first_run: if set then hte registers are configured, otherwise it is the hte? > + * assumed configuration is done and just re-run the test assumed configuration is done the we just re-run the test , (fix below also) > + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) > + * > + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) > + */ > +static u16 hte_basic_data_cmp(struct mrc_params *mrc_params, u32 addr, > + u8 first_run, u8 mode) > +{ > + u32 pattern; > + u32 offset; > + > + if (first_run) { > + msg_port_write(HTE, 0x00020020, 0x01B10021); > + msg_port_write(HTE, 0x00020021, 0x06000000); > + msg_port_write(HTE, 0x00020022, addr >> 6); > + msg_port_write(HTE, 0x00020062, 0x00800015); > + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); > + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); > + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); > + msg_port_write(HTE, 0x00020061, 0x00030008); > + > + if (mode == WRITE_TRAIN) > + pattern = 0xC33C0000; > + else /* READ_TRAIN */ > + pattern = 0xAA5555AA; > + > + for (offset = 0x80; offset <= 0x8F; offset++) > + msg_port_write(HTE, offset, pattern); > + } > + > + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); > + msg_port_write(HTE, 0x00020011, 0x00011000); > + msg_port_write(HTE, 0x00020011, 0x00011100); > + > + hte_wait_for_complete(); > + > + /* > + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for > + * any bytelane errors. > + */ > + return (hte_check_errors() >> 8) & 0xFF; > +} > + > +/** > + * This function examines single cache line memory with write/read/verify > + * test using multiple data patterns (victim-aggressor algorithm). > + * > + * See hte_write_stress_bit_lanes() which is external visible wrapper. > + * > + * @mrc_params: host struture for all MRC global data structure > + * @addr: memory adress being tested (must hit specific channel/rank) > + * @loop_cnt: number of test iterations > + * @seed_victim: victim data pattern seed > + * @seed_aggressor: aggressor data pattern seed > + * @victim_bit: should be 0 as auto rotate feature is in use auto-rotate > + * @first_run: if set then hte registers are configured, otherwise it is Actually I wonder if HTE would be better than hte, which looks like a 'the' typo, particularly if you leave out 'the'. Also can you please comment at the top of the file (first function) what HTE stands for)? > + * assumed configuration is done and just re-run the test > + * > + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) > + */ > +static u16 hte_rw_data_cmp(struct mrc_params *mrc_params, u32 addr, > + u8 loop_cnt, u32 seed_victim, u32 seed_aggressor, > + u8 victim_bit, u8 first_run) > +{ > + u32 offset; > + u32 tmp; > + > + if (first_run) { > + msg_port_write(HTE, 0x00020020, 0x00910024); > + msg_port_write(HTE, 0x00020023, 0x00810024); > + msg_port_write(HTE, 0x00020021, 0x06070000); > + msg_port_write(HTE, 0x00020024, 0x06070000); > + msg_port_write(HTE, 0x00020022, addr >> 6); > + msg_port_write(HTE, 0x00020025, addr >> 6); > + msg_port_write(HTE, 0x00020062, 0x0000002A); > + msg_port_write(HTE, 0x00020063, seed_victim); > + msg_port_write(HTE, 0x00020064, seed_aggressor); > + msg_port_write(HTE, 0x00020065, seed_victim); > + > + /* > + * Write the pattern buffers to select the victim bit > + * > + * Start with bit0 > + */ > + for (offset = 0x80; offset <= 0x8F; offset++) { > + if ((offset % 8) == victim_bit) > + msg_port_write(HTE, offset, 0x55555555); > + else > + msg_port_write(HTE, offset, 0xCCCCCCCC); > + } > + > + msg_port_write(HTE, 0x00020061, 0x00000000); > + msg_port_write(HTE, 0x00020066, 0x03440000); > + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); > + } > + > + tmp = 0x10001000 | (loop_cnt << 16); > + msg_port_write(HTE, 0x00020011, tmp); > + msg_port_write(HTE, 0x00020011, tmp | BIT8); > + > + hte_wait_for_complete(); > + > + /* > + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for > + * any bytelane errors. > + */ > + return (hte_check_errors() >> 8) & 0xFF; > +} > + > +/** > + * This function uses HW HTE engine to initialize or test all memory attached > + * to a given DUNIT. If flag is MRC_MEM_INIT, this routine writes 0s to all > + * memory locations to initialize ECC. If flag is MRC_MEM_TEST, this routine > + * will send an 5AA55AA5 pattern to all memory locations on the RankMask and > + * then read it back. Then it sends an A55AA55A pattern to all memory locations > + * on the RankMask and reads it back. > + * > + * @mrc_params: host struture for all MRC global data > + * @flag: MRC_MEM_INIT or MRC_MEM_TEST > + * > + * @return: errors register showing HTE failures. Also prints out which rank > + * failed the HTE test if failure occurs. For rank detection to work, > + * the address map must be left in its default state. If MRC changes > + * the address map, this function must be modified to change it back > + * to default at the beginning, then restore it at the end. > + */ > +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag) > +{ > + u32 offset; > + int test_num; > + int i; > + > + /* > + * Clear out the error registers at the start of each memory > + * init or memory test run. > + */ > + hte_clear_error_regs(); > + > + msg_port_write(HTE, 0x00020062, 0x00000015); > + > + for (offset = 0x80; offset <= 0x8F; offset++) > + msg_port_write(HTE, offset, ((offset & 1) ? 0xA55A : 0x5AA5)); > + > + msg_port_write(HTE, 0x00020021, 0x00000000); > + msg_port_write(HTE, 0x00020022, (mrc_params->mem_size >> 6) - 1); > + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); > + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); > + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); > + msg_port_write(HTE, 0x00020066, 0x03000000); > + > + switch (flag) { > + case MRC_MEM_INIT: > + /* > + * Only 1 write pass through memory is needed > + * to initialize ECC > + */ > + test_num = 1; > + break; > + case MRC_MEM_TEST: > + /* Write/read then write/read with inverted pattern */ > + test_num = 4; > + break; > + default: > + DPF(D_INFO, "Unknown parameter for flag: %d\n", flag); > + return 0xFFFFFFFF; > + } > + > + DPF(D_INFO, "hte_mem_init"); debug() > + > + for (i = 0; i < test_num; i++) { > + DPF(D_INFO, "."); > + > + if (i == 0) { > + msg_port_write(HTE, 0x00020061, 0x00000000); > + msg_port_write(HTE, 0x00020020, 0x00110010); > + } else if (i == 1) { > + msg_port_write(HTE, 0x00020061, 0x00000000); > + msg_port_write(HTE, 0x00020020, 0x00010010); > + } else if (i == 2) { > + msg_port_write(HTE, 0x00020061, 0x00010100); > + msg_port_write(HTE, 0x00020020, 0x00110010); > + } else { > + msg_port_write(HTE, 0x00020061, 0x00010100); > + msg_port_write(HTE, 0x00020020, 0x00010010); > + } > + > + msg_port_write(HTE, 0x00020011, 0x00111000); > + msg_port_write(HTE, 0x00020011, 0x00111100); > + > + hte_wait_for_complete(); > + > + /* If this is a READ pass, check for errors at the end */ > + if ((i % 2) == 1) { > + /* Return immediately if error */ > + if (hte_check_errors()) > + break; > + } > + } > + > + DPF(D_INFO, "done\n"); > + > + return hte_check_errors(); > +} > + > +/** > + * This function executes basic single cache line memory write/read/verify 'executes a basic' > + * test using simple constant pattern, different for READ_RAIN and > + * WRITE_TRAIN modes. > + * > + * @mrc_params: host struture for all MRC global data structure, please fix globally > + * @addr: memory adress being tested (must hit specific channel/rank) > + * @first_run: if set then hte registers are configured, otherwise it is > + * assumed configuration is done and just re-run the test > + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) > + * > + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) > + */ > +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, > + u8 first_run, u8 mode) Why do we use u8 for these? Would uint be good enough? Just a suggestion. > +{ > + u16 errors; > + > + ENTERFN(); > + > + /* Enable all error reporting in preparation for HTE test */ > + hte_enable_all_errors(); > + hte_clear_error_regs(); > + > + errors = hte_basic_data_cmp(mrc_params, addr, first_run, mode); > + > + LEAVEFN(); > + > + return errors; > +} > + > +/** > + * This function examines single cache line memory with write/read/verify examines a single-cache-line memory (at least I think this is what it is saying) > + * test using multiple data patterns (victim-aggressor algorithm). > + * > + * @mrc_params: host struture for all MRC global data > + * @addr: memory adress being tested (must hit specific channel/rank) > + * @first_run: if set then hte registers are configured, otherwise it is > + * assumed configuration is done and just re-run the test > + * > + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) > + */ > +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, > + u32 addr, u8 first_run) > +{ > + u16 errors; > + u8 victim_bit = 0; > + > + ENTERFN(); > + > + /* Enable all error reporting in preparation for HTE test */ > + hte_enable_all_errors(); > + hte_clear_error_regs(); > + > + /* > + * Loop through each bit in the bytelane. > + * > + * Each pass creates a victim bit while keeping all other bits the same > + * as aggressors. AVN HTE adds an auto-rotate feature which allows us > + * to program the entire victim/aggressor sequence in 1 step. What is AVN? > + * > + * The victim bit rotates on each pass so no need to have software > + * implement a victim bit loop like on VLV. VLV? I think it is sometimes better to write these out and put the abbreviation in brackets after it, at least once in the file. > + */ > + errors = hte_rw_data_cmp(mrc_params, addr, HTE_LOOP_CNT, > + HTE_LFSR_VICTIM_SEED, HTE_LFSR_AGRESSOR_SEED, > + victim_bit, first_run); > + > + LEAVEFN(); > + > + return errors; > +} > + > +/** > + * This function execute basic single cache line memory write or read. as above > + * This is just for receive enable / fine write levelling purpose. write-levelling (I think that's what you mean) > + * > + * @addr: memory adress being tested (must hit specific channel/rank) > + * @first_run: if set then hte registers are configured, otherwise it is > + * assumed configuration is done and just re-run the test > + * @is_write: when non-zero memory write operation executed, otherwise read > + */ > +void hte_mem_op(u32 addr, u8 first_run, u8 is_write) > +{ > + u32 offset; > + u32 tmp; > + > + hte_enable_all_errors(); > + hte_clear_error_regs(); > + > + if (first_run) { > + tmp = is_write ? 0x01110021 : 0x01010021; > + msg_port_write(HTE, 0x00020020, tmp); > + > + msg_port_write(HTE, 0x00020021, 0x06000000); > + msg_port_write(HTE, 0x00020022, addr >> 6); > + msg_port_write(HTE, 0x00020062, 0x00800015); > + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); > + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); > + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); > + msg_port_write(HTE, 0x00020061, 0x00030008); > + > + for (offset = 0x80; offset <= 0x8F; offset++) > + msg_port_write(HTE, offset, 0xC33C0000); > + } > + > + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); > + msg_port_write(HTE, 0x00020011, 0x00011000); > + msg_port_write(HTE, 0x00020011, 0x00011100); > + > + hte_wait_for_complete(); > +} > diff --git a/arch/x86/cpu/quark/hte.h b/arch/x86/cpu/quark/hte.h > new file mode 100644 > index 0000000..3a173ea > --- /dev/null > +++ b/arch/x86/cpu/quark/hte.h > @@ -0,0 +1,44 @@ > +/* > + * Copyright (C) 2013, Intel Corporation > + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> > + * > + * Ported from Intel released Quark UEFI BIOS > + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ > + * > + * SPDX-License-Identifier: Intel > + */ > + > +#ifndef _HTE_H_ > +#define _HTE_H_ > + > +enum { > + MRC_MEM_INIT, > + MRC_MEM_TEST > +}; > + > +enum { > + READ_TRAIN, > + WRITE_TRAIN > +}; > + > +/* > + * EXP_LOOP_CNT field of HTE_CMD_CTL > + * > + * This CANNOT be less than 4! > + */ > +#define HTE_LOOP_CNT 5 > + > +/* random seed for victim */ > +#define HTE_LFSR_VICTIM_SEED 0xF294BA21 > + > +/* random seed for aggressor */ > +#define HTE_LFSR_AGRESSOR_SEED 0xEBA7492D > + > +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag); > +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, > + u8 first_run, u8 mode); > +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, > + u32 addr, u8 first_run); > +void hte_mem_op(u32 addr, u8 first_run, u8 is_write); Can you move the comments from the .c to the .h for these exported functions? > + > +#endif /* _HTE_H_ */ > diff --git a/arch/x86/cpu/quark/mrc_util.c b/arch/x86/cpu/quark/mrc_util.c > new file mode 100644 > index 0000000..1ae42d6 > --- /dev/null > +++ b/arch/x86/cpu/quark/mrc_util.c > @@ -0,0 +1,1499 @@ > +/* > + * Copyright (C) 2013, Intel Corporation > + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> > + * > + * Ported from Intel released Quark UEFI BIOS > + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ > + * > + * SPDX-License-Identifier: Intel > + */ > + > +#include <common.h> > +#include <asm/arch/device.h> > +#include <asm/arch/mrc.h> > +#include <asm/arch/msg_port.h> > +#include "mrc_util.h" > +#include "hte.h" > +#include "smc.h" > + > +static const uint8_t vref_codes[64] = { > + /* lowest to highest */ > + 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, > + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, > + 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, > + 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, > + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, > + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, > + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, > + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F > +}; > + > +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask) > +{ > + msg_port_write(unit, addr, > + (msg_port_read(unit, addr) & ~(mask)) | > + ((data) & (mask))); > +} > + > +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask) > +{ > + msg_port_alt_write(unit, addr, > + (msg_port_alt_read(unit, addr) & ~(mask)) | > + ((data) & (mask))); > +} > + > +void mrc_post_code(uint8_t major, uint8_t minor) > +{ > + /* send message to UART */ > + DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor); > + > + /* error check */ > + if (major == 0xEE) > + hang(); > +} > + > +/* Delay number of nanoseconds */ > +void delay_n(uint32_t ns) > +{ > + /* 1000 MHz clock has 1ns period --> no conversion required */ > + uint64_t final_tsc = rdtsc(); blank line here after declarations end > + final_tsc += ((get_tbclk_mhz() * ns) / 1000); > + > + while (rdtsc() < final_tsc) > + ; > +} > + > +/* Delay number of microseconds */ > +void delay_u(uint32_t ms) > +{ > + /* 64-bit math is not an option, just use loops */ > + while (ms--) > + delay_n(1000); > +} Some day I suspect these could be pulled out into general x86 functions. Let's see if anything else needs them first. > + > +/* Select Memory Manager as the source for PRI interface */ > +void select_mem_mgr(void) > +{ > + u32 dco; > + > + ENTERFN(); > + > + dco = msg_port_read(MEM_CTLR, DCO); > + dco &= ~BIT28; ~(1 << 28) Ah but I see you are using this everywhere. U-Boot tries to avoid defining this sort of thing. See some comments below about this. > + msg_port_write(MEM_CTLR, DCO, dco); > + > + LEAVEFN(); > +} > + > +/* Select HTE as the source for PRI interface */ > +void select_hte(void) > +{ > + u32 dco; > + > + ENTERFN(); > + > + dco = msg_port_read(MEM_CTLR, DCO); > + dco |= BIT28; > + msg_port_write(MEM_CTLR, DCO, dco); > + > + LEAVEFN(); > +} > + > +/* > + * Send DRAM command > + * data should be formated using DCMD_Xxxx macro or emrsXCommand structure > + */ > +void dram_init_command(uint32_t data) > +{ > + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data); > + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0); > + msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0); > + > + DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data); > +} > + > +/* Send DRAM wake command using special MCU side-band WAKE opcode */ > +void dram_wake_command(void) > +{ > + ENTERFN(); > + > + msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0); > + > + LEAVEFN(); > +} > + > +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane) > +{ > + /* send message to UART */ > + DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane); > +} > + > +/* > + * This function will program the RCVEN delays > + * > + * (currently doesn't comprehend rank) > + */ > +void set_rcvn(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count) reformat to 80cols. Should this or any other function in this file be static? > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", > + channel, rank, byte_lane, pi_count); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) > + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : > + (BIT11 | BIT10 | BIT9 | BIT8); Would this be better as: (0xf << 20) | (0xf << 8) It might be more meaningful also. I really don't think these long strings of | are nice. > + temp = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : > + ((pi_count / HALF_CLK) << 8); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) lower case hex again > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); > + temp = pi_count << 24; > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * BL0/1 -> B01DBCTL1[08/11] (+1 select) > + * BL0/1 -> B01DBCTL1[02/05] (enable) > + */ > + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2); Remove () around BIT5 > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8); > + if (pi_count < EARLY_DB) > + temp |= msk; These uses of BIT seem more useful to me. Still it would be better to have #defines for the bits which actually describe their meaning. Maybe you don't know the meaning though... > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) { > + training_message(channel, rank, byte_lane); > + mrc_post_code(0xEE, 0xE0); > + } > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the current RCVEN delay on the given > + * channel, rank, byte_lane as an absolute PI count. > + * > + * (currently doesn't comprehend rank) > + */ > +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) > + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= (byte_lane & BIT0) ? (20) : (8); > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = temp * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); Please avoid () around simple constants. Put them in the #define/enum if needed. > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 24; > + temp &= 0x3F; > + > + /* Adjust PI_COUNT */ > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the RDQS delays based on an absolute > + * amount of PIs. > + * > + * (currently doesn't comprehend rank) > + */ > +void set_rdqs(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", > + channel, rank, byte_lane, pi_count); > + > + /* > + * PI (1/128 MCLK) > + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) > + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) > + */ > + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); > + temp = pi_count << 0; > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check (shouldn't go above 0x3F) */ > + if (pi_count > 0x47) { > + training_message(channel, rank, byte_lane); > + mrc_post_code(0xEE, 0xE1); > + } > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the current RDQS delay on the given > + * channel, rank, byte_lane as an absolute PI count. > + * > + * (currently doesn't comprehend rank) > + */ > +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * PI (1/128 MCLK) > + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) > + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) > + */ > + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + temp = msg_port_alt_read(DDRPHY, reg); > + > + /* Adjust PI_COUNT */ > + pi_count = temp & 0x7F; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the WDQS delays based on an absolute > + * amount of PIs. > + * > + * (currently doesn't comprehend rank) > + */ > +void set_wdqs(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", > + channel, rank, byte_lane, pi_count); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) > + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : > + (BIT7 | BIT6 | BIT5 | BIT4); > + temp = pi_count / HALF_CLK; > + temp <<= (byte_lane & BIT0) ? (16) : (4); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16); > + temp = pi_count << 16; > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * BL0/1 -> B01DBCTL1[07/10] (+1 select) > + * BL0/1 -> B01DBCTL1[01/04] (enable) > + */ > + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1); > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7); > + if (pi_count < EARLY_DB) > + temp |= msk; > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) { > + training_message(channel, rank, byte_lane); > + mrc_post_code(0xEE, 0xE2); > + } > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the amount of WDQS delay on the given > + * channel, rank, byte_lane as an absolute PI count. > + * > + * (currently doesn't comprehend rank) > + */ > +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) > + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= (byte_lane & BIT0) ? (16) : (4); > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = (temp * HALF_CLK); > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 16; > + temp &= 0x3F; > + > + /* Adjust PI_COUNT */ > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the WDQ delays based on an absolute > + * number of PIs. > + * > + * (currently doesn't comprehend rank) > + */ > +void set_wdq(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", > + channel, rank, byte_lane, pi_count); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) > + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : > + (BIT3 | BIT2 | BIT1 | BIT0); > + temp = pi_count / HALF_CLK; > + temp <<= (byte_lane & BIT0) ? (12) : (0); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); > + temp = pi_count << 8; > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * BL0/1 -> B01DBCTL1[06/09] (+1 select) > + * BL0/1 -> B01DBCTL1[00/03] (enable) > + */ > + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0); > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6); > + if (pi_count < EARLY_DB) > + temp |= msk; > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) { > + training_message(channel, rank, byte_lane); > + mrc_post_code(0xEE, 0xE3); > + } > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the amount of WDQ delay on the given > + * channel, rank, byte_lane as an absolute PI count. > + * > + * (currently doesn't comprehend rank) > + */ > +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) > + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) > + */ > + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= (byte_lane & BIT0) ? (12) : (0); > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = (temp * HALF_CLK); > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) > + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) > + */ > + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET)); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 8; > + temp &= 0x3F; > + > + /* Adjust PI_COUNT */ > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the WCMD delays based on an absolute > + * number of PIs. > + */ > +void set_wcmd(uint8_t channel, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CMDPTRREG[11:08] (0x0-0xF) > + */ > + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + msk = (BIT11 | BIT10 | BIT9 | BIT8); > + temp = pi_count / HALF_CLK; > + temp <<= 8; > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) > + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) > + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) > + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) > + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) > + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) > + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) > + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) > + */ > + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); > + > + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24 | > + BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | > + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8 | > + BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); > + > + temp = (pi_count << 24) | (pi_count << 16) | > + (pi_count << 8) | (pi_count << 0); > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); /* PO */ > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * CMDCFGREG0[17] (+1 select) > + * CMDCFGREG0[16] (enable) > + */ > + reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= BIT16; > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= BIT17; > + if (pi_count < EARLY_DB) > + temp |= msk; > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) > + mrc_post_code(0xEE, 0xE4); > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the amount of WCMD delay on the given > + * channel as an absolute PI count. > + */ > +uint32_t get_wcmd(uint8_t channel) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CMDPTRREG[11:08] (0x0-0xF) > + */ > + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 8; > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = temp * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) > + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) > + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) > + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) > + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) > + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) > + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) > + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) > + */ > + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 16; > + temp &= 0x3F; > + > + /* Adjust PI_COUNT */ > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the WCLK delays based on an absolute > + * number of PIs. > + */ > +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) > + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) > + */ > + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + msk = (BIT15 | BIT14 | BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); mask = 0xff00 is much better, isn't it? > + temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) > + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) > + */ > + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); > + reg += (channel * DDRIOCCC_CH_OFFSET); > + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | > + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); Ick! > + temp = (pi_count << 16) | (pi_count << 8); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1); Remove all (), and below. Please fix globally. > + reg += (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2); > + reg += (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3); > + reg += (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * CCCFGREG1[11:08] (+1 select) > + * CCCFGREG1[03:00] (enable) > + */ > + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= (BIT3 | BIT2 | BIT1 | BIT0); > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= (BIT11 | BIT10 | BIT9 | BIT8); > + if (pi_count < EARLY_DB) > + temp |= msk; > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) > + mrc_post_code(0xEE, 0xE5); > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the amout of WCLK delay on the given > + * channel, rank as an absolute PI count. > + */ > +uint32_t get_wclk(uint8_t channel, uint8_t rank) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) > + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) > + */ > + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= (rank) ? (12) : (8); > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = temp * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) > + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) > + */ > + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); > + reg += (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= (rank) ? (16) : (8); > + temp &= 0x3F; > + > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the WCTL delays based on an absolute > + * number of PIs. > + * > + * (currently doesn't comprehend rank) > + */ > +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count) > +{ > + uint32_t reg; > + uint32_t msk; > + uint32_t temp; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CCPTRREG[31:28] (0x0-0xF) > + * CCPTRREG[27:24] (0x0-0xF) > + */ > + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + msk = (BIT31 | BIT30 | BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); > + temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* Adjust PI_COUNT */ > + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) > + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) > + */ > + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); > + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); > + temp = (pi_count << 24); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET); > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* > + * DEADBAND > + * CCCFGREG1[13:12] (+1 select) > + * CCCFGREG1[05:04] (enable) > + */ > + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); > + msk = 0x00; > + temp = 0x00; > + > + /* enable */ > + msk |= (BIT5 | BIT4); > + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) > + temp |= msk; > + > + /* select */ > + msk |= (BIT13 | BIT12); > + if (pi_count < EARLY_DB) > + temp |= msk; > + > + mrc_alt_write_mask(DDRPHY, reg, temp, msk); > + > + /* error check */ > + if (pi_count > 0x3F) > + mrc_post_code(0xEE, 0xE6); > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the amount of WCTL delay on the given > + * channel, rank as an absolute PI count. > + * > + * (currently doesn't comprehend rank) > + */ > +uint32_t get_wctl(uint8_t channel, uint8_t rank) > +{ > + uint32_t reg; > + uint32_t temp; > + uint32_t pi_count; > + > + ENTERFN(); > + > + /* > + * RDPTR (1/2 MCLK, 64 PIs) > + * CCPTRREG[31:28] (0x0-0xF) > + * CCPTRREG[27:24] (0x0-0xF) > + */ > + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 24; > + temp &= 0xF; > + > + /* Adjust PI_COUNT */ > + pi_count = temp * HALF_CLK; > + > + /* > + * PI (1/64 MCLK, 1 PIs) > + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) > + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) > + */ > + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); > + temp = msg_port_alt_read(DDRPHY, reg); > + temp >>= 24; > + temp &= 0x3F; > + > + /* Adjust PI_COUNT */ > + pi_count += temp; > + > + LEAVEFN(); > + > + return pi_count; > +} > + > +/* > + * This function will program the internal Vref setting in a given > + * byte lane in a given channel. > + */ > +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting) > +{ > + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); > + > + ENTERFN(); > + > + DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", > + channel, byte_lane, setting); > + > + mrc_alt_write_mask(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + > + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), > + (vref_codes[setting] << 2), > + (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2)); > + > + /* > + * need to wait ~300ns for Vref to settle > + * (check that this is necessary) > + */ > + delay_n(300); > + > + /* ??? may need to clear pointers ??? */ > + > + LEAVEFN(); > +} > + > +/* > + * This function will return the internal Vref setting for the given > + * channel, byte_lane. > + */ > +uint32_t get_vref(uint8_t channel, uint8_t byte_lane) > +{ > + uint8_t j; > + uint32_t ret_val = sizeof(vref_codes) / 2; > + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); > + uint32_t temp; > + > + ENTERFN(); > + > + temp = msg_port_alt_read(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + > + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET))); > + temp >>= 2; > + temp &= 0x3F; > + > + for (j = 0; j < sizeof(vref_codes); j++) { > + if (vref_codes[j] == temp) { > + ret_val = j; > + break; > + } > + } > + > + LEAVEFN(); > + > + return ret_val; > +} > + > +/* > + * This function will return a 32 bit address in the desired 32-bit > + * channel and rank. > + */ > +uint32_t get_addr(uint8_t channel, uint8_t rank) > +{ > + uint32_t offset = 0x02000000; /* 32MB */ > + > + /* Begin product specific code */ > + if (channel > 0) { > + DPF(D_ERROR, "ILLEGAL CHANNEL\n"); > + DEAD_LOOP(); > + } > + > + if (rank > 1) { > + DPF(D_ERROR, "ILLEGAL RANK\n"); > + DEAD_LOOP(); > + } > + > + /* use 256MB lowest density as per DRP == 0x0003 */ > + offset += rank * (256 * 1024 * 1024); > + > + return offset; > +} > + > +/* > + * This function will sample the DQTRAINSTS registers in the given > + * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'. > + * > + * It will return an encoded DWORD in which each bit corresponds to DWORD? > + * the sampled value on the byte lane. > + */ > +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, > + uint8_t rank, bool rcvn) > +{ > + uint8_t j; /* just a counter */ > + uint8_t bl; /* which BL in the module (always 2 per module) */ > + uint8_t bl_grp; /* which BL module */ > + /* byte lane divisor */ Maybe rename the variable so you can drop the comment? > + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; > + uint32_t msk[2]; /* BLx in module */ > + /* DQTRAINSTS register contents for each sample */ > + uint32_t sampled_val[SAMPLE_SIZE]; > + uint32_t num_0s; /* tracks the number of '0' samples */ > + uint32_t num_1s; /* tracks the number of '1' samples */ > + uint32_t ret_val = 0x00; /* assume all '0' samples */ > + uint32_t address = get_addr(channel, rank); > + > + /* initialise msk[] */ > + msk[0] = (rcvn) ? (BIT1) : (BIT9); /* BL0 */ > + msk[1] = (rcvn) ? (BIT0) : (BIT8); /* BL1 */ > + > + /* cycle through each byte lane group */ > + for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) { > + /* take SAMPLE_SIZE samples */ > + for (j = 0; j < SAMPLE_SIZE; j++) { > + hte_mem_op(address, mrc_params->first_run, > + rcvn ? 0 : 1); > + mrc_params->first_run = 0; > + > + /* > + * record the contents of the proper > + * DQTRAINSTS register > + */ > + sampled_val[j] = msg_port_alt_read(DDRPHY, > + (DQTRAINSTS + > + (bl_grp * DDRIODQ_BL_OFFSET) + > + (channel * DDRIODQ_CH_OFFSET))); > + } > + > + /* > + * look for a majority value (SAMPLE_SIZE / 2) + 1 > + * on the byte lane and set that value in the corresponding > + * ret_val bit > + */ > + for (bl = 0; bl < 2; bl++) { > + num_0s = 0x00; /* reset '0' tracker for byte lane */ > + num_1s = 0x00; /* reset '1' tracker for byte lane */ > + for (j = 0; j < SAMPLE_SIZE; j++) { > + if (sampled_val[j] & msk[bl]) > + num_1s++; > + else > + num_0s++; > + } > + if (num_1s > num_0s) > + ret_val |= (1 << (bl + (bl_grp * 2))); > + } > + } > + > + /* > + * "ret_val.0" contains the status of BL0 > + * "ret_val.1" contains the status of BL1 > + * "ret_val.2" contains the status of BL2 > + * etc. This comment should go in @return in the function comment. > + */ > + return ret_val; > +} > + > +/* This function will find the rising edge transition on RCVN or WDQS */ > +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], > + uint8_t channel, uint8_t rank, bool rcvn) > +{ > + bool all_edges_found; /* determines stop condition */ > + bool direction[NUM_BYTE_LANES]; /* direction indicator */ > + uint8_t sample; /* sample counter */ > + uint8_t bl; /* byte lane counter */ > + /* byte lane divisor */ > + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; > + uint32_t sample_result[SAMPLE_CNT]; /* results of sample_dqs() */ > + uint32_t temp; > + uint32_t transition_pattern; > + > + ENTERFN(); > + > + /* select hte and request initial configuration */ > + select_hte(); > + mrc_params->first_run = 1; > + > + /* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */ > + for (sample = 0; sample < SAMPLE_CNT; sample++) { > + /* program the desired delays for sample */ > + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { > + /* increase sample delay by 26 PI (0.2 CLK) */ > + if (rcvn) { > + set_rcvn(channel, rank, bl, > + delay[bl] + (sample * SAMPLE_DLY)); > + } else { > + set_wdqs(channel, rank, bl, > + delay[bl] + (sample * SAMPLE_DLY)); > + } > + } > + > + /* take samples (Tsample_i) */ > + sample_result[sample] = sample_dqs(mrc_params, > + channel, rank, rcvn); > + > + DPF(D_TRN, > + "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n", > + (rcvn ? "RCVN" : "WDQS"), channel, rank, sample, > + sample * SAMPLE_DLY, sample_result[sample]); > + } > + > + /* > + * This pattern will help determine where we landed and ultimately > + * how to place RCVEN/WDQS. > + */ > + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { > + /* build transition_pattern (MSB is 1st sample) */ > + transition_pattern = 0; > + for (sample = 0; sample < SAMPLE_CNT; sample++) { > + transition_pattern |= > + ((sample_result[sample] & (1 << bl)) >> bl) << > + (SAMPLE_CNT - 1 - sample); > + } > + > + DPF(D_TRN, "=== transition pattern %d\n", transition_pattern); > + > + /* > + * set up to look for rising edge based on > + * transition_pattern > + */ > + switch (transition_pattern) { > + case 0: /* sampled 0->0->0 */ > + /* move forward from T3 looking for 0->1 */ > + delay[bl] += 2 * SAMPLE_DLY; > + direction[bl] = FORWARD; > + break; > + case 1: /* sampled 0->0->1 */ > + case 5: /* sampled 1->0->1 (bad duty cycle) *HSD#237503* */ > + /* move forward from T2 looking for 0->1 */ > + delay[bl] += 1 * SAMPLE_DLY; > + direction[bl] = FORWARD; > + break; > + case 2: /* sampled 0->1->0 (bad duty cycle) *HSD#237503* */ > + case 3: /* sampled 0->1->1 */ > + /* move forward from T1 looking for 0->1 */ > + delay[bl] += 0 * SAMPLE_DLY; > + direction[bl] = FORWARD; > + break; > + case 4: /* sampled 1->0->0 (assumes BL8, HSD#234975) */ > + /* move forward from T3 looking for 0->1 */ > + delay[bl] += 2 * SAMPLE_DLY; > + direction[bl] = FORWARD; > + break; > + case 6: /* sampled 1->1->0 */ > + case 7: /* sampled 1->1->1 */ > + /* move backward from T1 looking for 1->0 */ > + delay[bl] += 0 * SAMPLE_DLY; > + direction[bl] = BACKWARD; > + break; > + default: > + mrc_post_code(0xEE, 0xEE); > + break; > + } > + > + /* program delays */ > + if (rcvn) > + set_rcvn(channel, rank, bl, delay[bl]); > + else > + set_wdqs(channel, rank, bl, delay[bl]); > + } > + > + /* > + * Based on the observed transition pattern on the byte lane, > + * begin looking for a rising edge with single PI granularity. > + */ > + do { > + all_edges_found = true; /* assume all byte lanes passed */ > + /* take a sample */ > + temp = sample_dqs(mrc_params, channel, rank, rcvn); > + /* check all each byte lane for proper edge */ > + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { > + if (temp & (1 << bl)) { > + /* sampled "1" */ > + if (direction[bl] == BACKWARD) { > + /* > + * keep looking for edge > + * on this byte lane > + */ > + all_edges_found = false; > + delay[bl] -= 1; > + if (rcvn) { > + set_rcvn(channel, rank, > + bl, delay[bl]); > + } else { > + set_wdqs(channel, rank, > + bl, delay[bl]); > + } > + } > + } else { > + /* sampled "0" */ > + if (direction[bl] == FORWARD) { > + /* > + * keep looking for edge > + * on this byte lane > + */ > + all_edges_found = false; > + delay[bl] += 1; > + if (rcvn) { > + set_rcvn(channel, rank, > + bl, delay[bl]); > + } else { > + set_wdqs(channel, rank, > + bl, delay[bl]); > + } > + } > + } > + } > + } while (!all_edges_found); > + > + /* restore DDR idle state */ > + dram_init_command(DCMD_PREA(rank)); > + > + DPF(D_TRN, "Delay %03X %03X %03X %03X\n", > + delay[0], delay[1], delay[2], delay[3]); > + > + LEAVEFN(); > +} > + > +/* > + * This function will return a 32 bit mask that will be used to > + * check for byte lane failures. > + */ > +uint32_t byte_lane_mask(struct mrc_params *mrc_params) > +{ > + uint32_t j; > + uint32_t ret_val = 0x00; > + > + /* > + * set ret_val based on NUM_BYTE_LANES such that you will check > + * only BL0 in result > + * > + * (each bit in result represents a byte lane) > + */ > + for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES) > + ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES)); > + > + /* > + * HSD#235037 > + * need to adjust the mask for 16-bit mode > + */ > + if (mrc_params->channel_width == X16) > + ret_val |= (ret_val << 2); > + > + return ret_val; > +} > + > +/* > + * Check memory executing simple write/read/verify at the specified address. > + * > + * Bits in the result indicate failure on specific byte lane. > + */ > +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address) > +{ > + uint32_t result = 0; > + uint8_t first_run = 0; > + > + if (mrc_params->hte_setup) { > + mrc_params->hte_setup = 0; > + first_run = 1; > + select_hte(); > + } > + > + result = hte_basic_write_read(mrc_params, address, > + first_run, WRITE_TRAIN); reformat to 80cols > + > + DPF(D_TRN, "check_rw_coarse result is %x\n", result); > + > + return result; > +} > + > +/* > + * Check memory executing write/read/verify of many data patterns > + * at the specified address. Bits in the result indicate failure > + * on specific byte lane. > + */ > +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address) > +{ > + uint32_t result; > + uint8_t first_run = 0; > + > + if (mrc_params->hte_setup) { > + mrc_params->hte_setup = 0; > + first_run = 1; > + select_hte(); > + } > + > + result = hte_write_stress_bit_lanes(mrc_params, address, first_run); > + > + DPF(D_TRN, "check_bls_ex result is %x\n", result); > + > + return result; > +} > + > +/* > + * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1 > + * > + * The function takes pointer to previous 32 bit value and > + * modifies it to next value. > + */ > +void lfsr32(uint32_t *lfsr_ptr) > +{ > + uint32_t bit; > + uint32_t lfsr; > + int i; > + > + lfsr = *lfsr_ptr; > + > + for (i = 0; i < 32; i++) { > + bit = 1 ^ (lfsr & BIT0); > + bit = bit ^ ((lfsr & BIT1) >> 1); > + bit = bit ^ ((lfsr & BIT2) >> 2); > + bit = bit ^ ((lfsr & BIT22) >> 22); > + > + lfsr = ((lfsr >> 1) | (bit << 31)); > + } > + > + *lfsr_ptr = lfsr; > +} > + > +/* Clear the pointers in a given byte lane in a given channel */ > +void clear_pointers(void) > +{ > + uint8_t channel; > + uint8_t bl; > + > + ENTERFN(); > + > + for (channel = 0; channel < NUM_CHANNELS; channel++) { > + for (bl = 0; bl < NUM_BYTE_LANES; bl++) { > + mrc_alt_write_mask(DDRPHY, > + (B01PTRCTL1 + > + (channel * DDRIODQ_CH_OFFSET) + > + ((bl >> 1) * DDRIODQ_BL_OFFSET)), > + ~BIT8, BIT8); > + > + mrc_alt_write_mask(DDRPHY, > + (B01PTRCTL1 + > + (channel * DDRIODQ_CH_OFFSET) + > + ((bl >> 1) * DDRIODQ_BL_OFFSET)), > + BIT8, BIT8); > + } > + } > + > + LEAVEFN(); > +} > + > +void print_timings(struct mrc_params *mrc_params) > +{ > + uint8_t algo; > + uint8_t channel; > + uint8_t rank; > + uint8_t bl; > + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; > + > + DPF(D_INFO, "\n---------------------------"); > + DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3"); > + DPF(D_INFO, "\n==========================="); > + > + for (algo = 0; algo < MAX_ALGOS; algo++) { > + for (channel = 0; channel < NUM_CHANNELS; channel++) { > + if (mrc_params->channel_enables & (1 << channel)) { > + for (rank = 0; rank < NUM_RANKS; rank++) { Can we put this block in its own function to fix the over-indenting? > + if (mrc_params->rank_enables & > + (1 << rank)) { > + switch (algo) { > + case RCVN: > + DPF(D_INFO, > + "\nRCVN[%02d:%02d]", > + channel, rank); > + break; > + case WDQS: > + DPF(D_INFO, > + "\nWDQS[%02d:%02d]", > + channel, rank); > + break; > + case WDQX: > + DPF(D_INFO, > + "\nWDQx[%02d:%02d]", > + channel, rank); > + break; > + case RDQS: > + DPF(D_INFO, > + "\nRDQS[%02d:%02d]", > + channel, rank); > + break; > + case VREF: > + DPF(D_INFO, > + "\nVREF[%02d:%02d]", > + channel, rank); > + break; > + case WCMD: > + DPF(D_INFO, > + "\nWCMD[%02d:%02d]", > + channel, rank); > + break; > + case WCTL: > + DPF(D_INFO, > + "\nWCTL[%02d:%02d]", > + channel, rank); > + break; > + case WCLK: > + DPF(D_INFO, > + "\nWCLK[%02d:%02d]", > + channel, rank); > + break; > + default: > + break; > + } > + > + for (bl = 0; > + bl < (NUM_BYTE_LANES / bl_divisor); > + bl++) { > + switch (algo) { > + case RCVN: > + DPF(D_INFO, > + " %03d", > + get_rcvn(channel, rank, bl)); > + break; > + case WDQS: > + DPF(D_INFO, > + " %03d", > + get_wdqs(channel, rank, bl)); > + break; > + case WDQX: > + DPF(D_INFO, > + " %03d", > + get_wdq(channel, rank, bl)); > + break; > + case RDQS: > + DPF(D_INFO, > + " %03d", > + get_rdqs(channel, rank, bl)); > + break; > + case VREF: > + DPF(D_INFO, > + " %03d", > + get_vref(channel, bl)); > + break; > + case WCMD: > + DPF(D_INFO, > + " %03d", > + get_wcmd(channel)); > + break; > + case WCTL: > + DPF(D_INFO, > + " %03d", > + get_wctl(channel, rank)); > + break; > + case WCLK: > + DPF(D_INFO, > + " %03d", > + get_wclk(channel, rank)); > + break; > + default: > + break; > + } > + } > + } > + } > + } > + } > + } > + > + DPF(D_INFO, "\n---------------------------"); > + DPF(D_INFO, "\n"); > +} > diff --git a/arch/x86/cpu/quark/mrc_util.h b/arch/x86/cpu/quark/mrc_util.h > new file mode 100644 > index 0000000..edbe219 > --- /dev/null > +++ b/arch/x86/cpu/quark/mrc_util.h > @@ -0,0 +1,153 @@ > +/* > + * Copyright (C) 2013, Intel Corporation > + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> > + * > + * Ported from Intel released Quark UEFI BIOS > + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ > + * > + * SPDX-License-Identifier: Intel > + */ > + > +#ifndef _MRC_UTIL_H_ > +#define _MRC_UTIL_H_ > + > +/* Turn on this macro to enable MRC debugging output */ > +#undef MRC_DEBUG > + > +/* MRC Debug Support */ > +#define DPF debug_cond > + > +/* debug print type */ > + > +#ifdef MRC_DEBUG > +#define D_ERROR 0x0001 > +#define D_INFO 0x0002 > +#define D_REGRD 0x0004 > +#define D_REGWR 0x0008 > +#define D_FCALL 0x0010 > +#define D_TRN 0x0020 > +#define D_TIME 0x0040 > +#else > +#define D_ERROR 0 > +#define D_INFO 0 > +#define D_REGRD 0 > +#define D_REGWR 0 > +#define D_FCALL 0 > +#define D_TRN 0 > +#define D_TIME 0 > +#endif > + > +#define ENTERFN(...) debug_cond(D_FCALL, "<%s>\n", __func__) > +#define LEAVEFN(...) debug_cond(D_FCALL, "</%s>\n", __func__) > +#define REPORTFN(...) debug_cond(D_FCALL, "<%s/>\n", __func__) > + > +/* Generic Register Bits */ > +#define BIT0 0x00000001 > +#define BIT1 0x00000002 > +#define BIT2 0x00000004 > +#define BIT3 0x00000008 > +#define BIT4 0x00000010 > +#define BIT5 0x00000020 > +#define BIT6 0x00000040 > +#define BIT7 0x00000080 > +#define BIT8 0x00000100 > +#define BIT9 0x00000200 > +#define BIT10 0x00000400 > +#define BIT11 0x00000800 > +#define BIT12 0x00001000 > +#define BIT13 0x00002000 > +#define BIT14 0x00004000 > +#define BIT15 0x00008000 > +#define BIT16 0x00010000 > +#define BIT17 0x00020000 > +#define BIT18 0x00040000 > +#define BIT19 0x00080000 > +#define BIT20 0x00100000 > +#define BIT21 0x00200000 > +#define BIT22 0x00400000 > +#define BIT23 0x00800000 > +#define BIT24 0x01000000 > +#define BIT25 0x02000000 > +#define BIT26 0x04000000 > +#define BIT27 0x08000000 > +#define BIT28 0x10000000 > +#define BIT29 0x20000000 > +#define BIT30 0x40000000 > +#define BIT31 0x80000000 > + > +/* Message Bus Port */ > +#define MEM_CTLR 0x01 > +#define HOST_BRIDGE 0x03 > +#define MEM_MGR 0x05 > +#define HTE 0x11 > +#define DDRPHY 0x12 > + > +/* number of sample points */ > +#define SAMPLE_CNT 3 > +/* number of PIs to increment per sample */ > +#define SAMPLE_DLY 26 > + > +enum { > + /* indicates to decrease delays when looking for edge */ > + BACKWARD, > + /* indicates to increase delays when looking for edge */ > + FORWARD > +}; > + > +enum { > + RCVN, > + WDQS, > + WDQX, > + RDQS, > + VREF, > + WCMD, > + WCTL, > + WCLK, > + MAX_ALGOS, > +}; > + > +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask); > +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask); > +void mrc_post_code(uint8_t major, uint8_t minor); > +void delay_n(uint32_t ns); > +void delay_u(uint32_t ms); > +void select_mem_mgr(void); > +void select_hte(void); > +void dram_init_command(uint32_t data); > +void dram_wake_command(void); > +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane); > + > +void set_rcvn(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count); > +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane); > +void set_rdqs(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count); > +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); > +void set_wdqs(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count); > +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); > +void set_wdq(uint8_t channel, uint8_t rank, > + uint8_t byte_lane, uint32_t pi_count); > +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane); > +void set_wcmd(uint8_t channel, uint32_t pi_count); > +uint32_t get_wcmd(uint8_t channel); > +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count); > +uint32_t get_wclk(uint8_t channel, uint8_t rank); > +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count); > +uint32_t get_wctl(uint8_t channel, uint8_t rank); > +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting); > +uint32_t get_vref(uint8_t channel, uint8_t byte_lane); > + > +uint32_t get_addr(uint8_t channel, uint8_t rank); > +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, > + uint8_t rank, bool rcvn); > +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], > + uint8_t channel, uint8_t rank, bool rcvn); > +uint32_t byte_lane_mask(struct mrc_params *mrc_params); > +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address); > +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address); > +void lfsr32(uint32_t *lfsr_ptr); > +void clear_pointers(void); > +void print_timings(struct mrc_params *mrc_params); If these are all truly exported, can we please put the function comments here in the header file? > + > +#endif /* _MRC_UTIL_H_ */ > -- > 1.8.2.1 > Regards, Simon
Hi Simon, On Thu, Feb 5, 2015 at 12:24 AM, Simon Glass <sjg@chromium.org> wrote: > Hi Bin, > > On 3 February 2015 at 04:45, Bin Meng <bmeng.cn@gmail.com> wrote: >> Add various utility codes needed for Quark MRC. >> >> Signed-off-by: Bin Meng <bmeng.cn@gmail.com> >> >> --- >> There are 12 checkpatch warnings in this patch, which are: >> >> warning: arch/x86/cpu/quark/mrc_util.c,1446: Too many leading tabs - consider code refactoring >> warning: arch/x86/cpu/quark/mrc_util.c,1450: line over 80 characters >> ... >> >> Fixing 'Too many leading tabs ...' will be very dangerous, as I don't have >> all the details on how Intel's MRC codes are actually written to play with >> the hardware. Trying to refactor them may lead to a non-working MRC codes. >> For the 'line over 80 characters' issue, we have to leave them as is now >> due to the 'Too many leading tabs ...', sigh. > > The code looks fine for the most part - I only have nits. > > I'm not keen on BIT though. See my comments and what improvements you > can make. It would be great to drop BIT. I found it is hard to replace BIT to something meaningful, as lots of registers are undocumented. > Re the debug macros, I suppose they are OK to keep. U-Boot doesn't > have the concept of debug() for different categories or levels of > verbosity. Yep, maybe we can enhance U-Boot's debug() in the future. >> >> arch/x86/cpu/quark/hte.c | 398 +++++++++++ >> arch/x86/cpu/quark/hte.h | 44 ++ >> arch/x86/cpu/quark/mrc_util.c | 1499 +++++++++++++++++++++++++++++++++++++++++ >> arch/x86/cpu/quark/mrc_util.h | 153 +++++ >> 4 files changed, 2094 insertions(+) >> create mode 100644 arch/x86/cpu/quark/hte.c >> create mode 100644 arch/x86/cpu/quark/hte.h >> create mode 100644 arch/x86/cpu/quark/mrc_util.c >> create mode 100644 arch/x86/cpu/quark/mrc_util.h >> >> diff --git a/arch/x86/cpu/quark/hte.c b/arch/x86/cpu/quark/hte.c >> new file mode 100644 >> index 0000000..d813c9c >> --- /dev/null >> +++ b/arch/x86/cpu/quark/hte.c >> @@ -0,0 +1,398 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ > > Remove training slash? Fixed globally. >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#include <common.h> >> +#include <asm/arch/mrc.h> >> +#include <asm/arch/msg_port.h> >> +#include "mrc_util.h" >> +#include "hte.h" >> + >> +/** >> + * This function enables HTE to detect all possible errors for > > s/This function// globally > > I'd suggest present tense, like "enable HTE to detect all possible errors for... Fixed globally. >> + * the given training parameters (per-bit or full byte lane). >> + */ >> +static void hte_enable_all_errors(void) >> +{ >> + msg_port_write(HTE, 0x000200A2, 0xFFFFFFFF); >> + msg_port_write(HTE, 0x000200A3, 0x000000FF); >> + msg_port_write(HTE, 0x000200A4, 0x00000000); > > Lower case hex again. All of Intel's MRC codes are using upper case, to keep it consistent (or maybe I don't want to replace every place to lower case ..) I chose not to fix these lower case, and just leave them as is now. >> +} >> + >> +/** >> + * This function goes and reads the HTE register in order to find any error >> + * >> + * @return: The errors detected in the HTE status register >> + */ >> +static u32 hte_check_errors(void) >> +{ >> + return msg_port_read(HTE, 0x000200A7); >> +} >> + >> +/** >> + * This function waits until HTE finishes >> + */ >> +static void hte_wait_for_complete(void) >> +{ >> + u32 tmp; >> + >> + ENTERFN(); >> + >> + do {} while ((msg_port_read(HTE, 0x00020012) & BIT30) != 0); >> + >> + tmp = msg_port_read(HTE, 0x00020011); >> + tmp |= BIT9; >> + tmp &= ~(BIT12 | BIT13); >> + msg_port_write(HTE, 0x00020011, tmp); >> + >> + LEAVEFN(); >> +} >> + >> +/** >> + * This function clears registers related with errors in the HTE >> + */ >> +static void hte_clear_error_regs(void) >> +{ >> + u32 tmp; >> + >> + /* >> + * Clear all HTE errors and enable error checking >> + * for burst and chunk. >> + */ >> + tmp = msg_port_read(HTE, 0x000200A1); >> + tmp |= BIT8; >> + msg_port_write(HTE, 0x000200A1, tmp); >> +} >> + >> +/** >> + * This function executes basic single cache line memory write/read/verify >> + * test using simple constant pattern, different for READ_RAIN and > > REAS_TRAIN? Fixed globally. >> + * WRITE_TRAIN modes. >> + * >> + * See hte_basic_write_read() which is external visible wrapper. > > the external (fix below also) > >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is > > the hte? Changed to 'the HTE' >> + * assumed configuration is done and just re-run the test > > assumed configuration is done the we just re-run the test > , > (fix below also) Fixed. >> + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +static u16 hte_basic_data_cmp(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode) >> +{ >> + u32 pattern; >> + u32 offset; >> + >> + if (first_run) { >> + msg_port_write(HTE, 0x00020020, 0x01B10021); >> + msg_port_write(HTE, 0x00020021, 0x06000000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x00800015); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020061, 0x00030008); >> + >> + if (mode == WRITE_TRAIN) >> + pattern = 0xC33C0000; >> + else /* READ_TRAIN */ >> + pattern = 0xAA5555AA; >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, pattern); >> + } >> + >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + msg_port_write(HTE, 0x00020011, 0x00011000); >> + msg_port_write(HTE, 0x00020011, 0x00011100); >> + >> + hte_wait_for_complete(); >> + >> + /* >> + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for >> + * any bytelane errors. >> + */ >> + return (hte_check_errors() >> 8) & 0xFF; >> +} >> + >> +/** >> + * This function examines single cache line memory with write/read/verify >> + * test using multiple data patterns (victim-aggressor algorithm). >> + * >> + * See hte_write_stress_bit_lanes() which is external visible wrapper. >> + * >> + * @mrc_params: host struture for all MRC global data > > structure Fixed globally. >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @loop_cnt: number of test iterations >> + * @seed_victim: victim data pattern seed >> + * @seed_aggressor: aggressor data pattern seed >> + * @victim_bit: should be 0 as auto rotate feature is in use > > auto-rotate Fixed. >> + * @first_run: if set then hte registers are configured, otherwise it is > > Actually I wonder if HTE would be better than hte, which looks like a > 'the' typo, particularly if you leave out 'the'. Also can you please > comment at the top of the file (first function) what HTE stands for)? Changed to 'the HTE'. Intel doc only mentions MTE (Memory Training Engine), and its registers are undocumented. I am not sure if this HTE means MTE in Intel's doc. So I don't add any comment for HTE. >> + * assumed configuration is done and just re-run the test >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +static u16 hte_rw_data_cmp(struct mrc_params *mrc_params, u32 addr, >> + u8 loop_cnt, u32 seed_victim, u32 seed_aggressor, >> + u8 victim_bit, u8 first_run) >> +{ >> + u32 offset; >> + u32 tmp; >> + >> + if (first_run) { >> + msg_port_write(HTE, 0x00020020, 0x00910024); >> + msg_port_write(HTE, 0x00020023, 0x00810024); >> + msg_port_write(HTE, 0x00020021, 0x06070000); >> + msg_port_write(HTE, 0x00020024, 0x06070000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020025, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x0000002A); >> + msg_port_write(HTE, 0x00020063, seed_victim); >> + msg_port_write(HTE, 0x00020064, seed_aggressor); >> + msg_port_write(HTE, 0x00020065, seed_victim); >> + >> + /* >> + * Write the pattern buffers to select the victim bit >> + * >> + * Start with bit0 >> + */ >> + for (offset = 0x80; offset <= 0x8F; offset++) { >> + if ((offset % 8) == victim_bit) >> + msg_port_write(HTE, offset, 0x55555555); >> + else >> + msg_port_write(HTE, offset, 0xCCCCCCCC); >> + } >> + >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020066, 0x03440000); >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + } >> + >> + tmp = 0x10001000 | (loop_cnt << 16); >> + msg_port_write(HTE, 0x00020011, tmp); >> + msg_port_write(HTE, 0x00020011, tmp | BIT8); >> + >> + hte_wait_for_complete(); >> + >> + /* >> + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for >> + * any bytelane errors. >> + */ >> + return (hte_check_errors() >> 8) & 0xFF; >> +} >> + >> +/** >> + * This function uses HW HTE engine to initialize or test all memory attached >> + * to a given DUNIT. If flag is MRC_MEM_INIT, this routine writes 0s to all >> + * memory locations to initialize ECC. If flag is MRC_MEM_TEST, this routine >> + * will send an 5AA55AA5 pattern to all memory locations on the RankMask and >> + * then read it back. Then it sends an A55AA55A pattern to all memory locations >> + * on the RankMask and reads it back. >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @flag: MRC_MEM_INIT or MRC_MEM_TEST >> + * >> + * @return: errors register showing HTE failures. Also prints out which rank >> + * failed the HTE test if failure occurs. For rank detection to work, >> + * the address map must be left in its default state. If MRC changes >> + * the address map, this function must be modified to change it back >> + * to default at the beginning, then restore it at the end. >> + */ >> +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag) >> +{ >> + u32 offset; >> + int test_num; >> + int i; >> + >> + /* >> + * Clear out the error registers at the start of each memory >> + * init or memory test run. >> + */ >> + hte_clear_error_regs(); >> + >> + msg_port_write(HTE, 0x00020062, 0x00000015); >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, ((offset & 1) ? 0xA55A : 0x5AA5)); >> + >> + msg_port_write(HTE, 0x00020021, 0x00000000); >> + msg_port_write(HTE, 0x00020022, (mrc_params->mem_size >> 6) - 1); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020066, 0x03000000); >> + >> + switch (flag) { >> + case MRC_MEM_INIT: >> + /* >> + * Only 1 write pass through memory is needed >> + * to initialize ECC >> + */ >> + test_num = 1; >> + break; >> + case MRC_MEM_TEST: >> + /* Write/read then write/read with inverted pattern */ >> + test_num = 4; >> + break; >> + default: >> + DPF(D_INFO, "Unknown parameter for flag: %d\n", flag); >> + return 0xFFFFFFFF; >> + } >> + >> + DPF(D_INFO, "hte_mem_init"); > > debug() Keep to use DPF(). >> + >> + for (i = 0; i < test_num; i++) { >> + DPF(D_INFO, "."); >> + >> + if (i == 0) { >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020020, 0x00110010); >> + } else if (i == 1) { >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020020, 0x00010010); >> + } else if (i == 2) { >> + msg_port_write(HTE, 0x00020061, 0x00010100); >> + msg_port_write(HTE, 0x00020020, 0x00110010); >> + } else { >> + msg_port_write(HTE, 0x00020061, 0x00010100); >> + msg_port_write(HTE, 0x00020020, 0x00010010); >> + } >> + >> + msg_port_write(HTE, 0x00020011, 0x00111000); >> + msg_port_write(HTE, 0x00020011, 0x00111100); >> + >> + hte_wait_for_complete(); >> + >> + /* If this is a READ pass, check for errors at the end */ >> + if ((i % 2) == 1) { >> + /* Return immediately if error */ >> + if (hte_check_errors()) >> + break; >> + } >> + } >> + >> + DPF(D_INFO, "done\n"); >> + >> + return hte_check_errors(); >> +} >> + >> +/** >> + * This function executes basic single cache line memory write/read/verify > > 'executes a basic' Fixed. >> + * test using simple constant pattern, different for READ_RAIN and >> + * WRITE_TRAIN modes. >> + * >> + * @mrc_params: host struture for all MRC global data > > structure, please fix globally Fixed globally. >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode) > > Why do we use u8 for these? Would uint be good enough? Just a suggestion. > >> +{ >> + u16 errors; >> + >> + ENTERFN(); >> + >> + /* Enable all error reporting in preparation for HTE test */ >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + errors = hte_basic_data_cmp(mrc_params, addr, first_run, mode); >> + >> + LEAVEFN(); >> + >> + return errors; >> +} >> + >> +/** >> + * This function examines single cache line memory with write/read/verify > > examines a single-cache-line memory > > (at least I think this is what it is saying) Fixed globally. >> + * test using multiple data patterns (victim-aggressor algorithm). >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, >> + u32 addr, u8 first_run) >> +{ >> + u16 errors; >> + u8 victim_bit = 0; >> + >> + ENTERFN(); >> + >> + /* Enable all error reporting in preparation for HTE test */ >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + /* >> + * Loop through each bit in the bytelane. >> + * >> + * Each pass creates a victim bit while keeping all other bits the same >> + * as aggressors. AVN HTE adds an auto-rotate feature which allows us >> + * to program the entire victim/aggressor sequence in 1 step. > > What is AVN? I don't know. I guess it might be Intel's Avoton core in some Atom processors. So does not change the comment .. >> + * >> + * The victim bit rotates on each pass so no need to have software >> + * implement a victim bit loop like on VLV. > > VLV? I think it is sometimes better to write these out and put the > abbreviation in brackets after it, at least once in the file. Like above, I guess it is Valleyview (VLV). Don't know if it is correct. So keep it unchanged. > >> + */ >> + errors = hte_rw_data_cmp(mrc_params, addr, HTE_LOOP_CNT, >> + HTE_LFSR_VICTIM_SEED, HTE_LFSR_AGRESSOR_SEED, >> + victim_bit, first_run); >> + >> + LEAVEFN(); >> + >> + return errors; >> +} >> + >> +/** >> + * This function execute basic single cache line memory write or read. > > as above > >> + * This is just for receive enable / fine write levelling purpose. > > write-levelling (I think that's what you mean) Fixed >> + * >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * @is_write: when non-zero memory write operation executed, otherwise read >> + */ >> +void hte_mem_op(u32 addr, u8 first_run, u8 is_write) >> +{ >> + u32 offset; >> + u32 tmp; >> + >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + if (first_run) { >> + tmp = is_write ? 0x01110021 : 0x01010021; >> + msg_port_write(HTE, 0x00020020, tmp); >> + >> + msg_port_write(HTE, 0x00020021, 0x06000000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x00800015); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020061, 0x00030008); >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, 0xC33C0000); >> + } >> + >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + msg_port_write(HTE, 0x00020011, 0x00011000); >> + msg_port_write(HTE, 0x00020011, 0x00011100); >> + >> + hte_wait_for_complete(); >> +} >> diff --git a/arch/x86/cpu/quark/hte.h b/arch/x86/cpu/quark/hte.h >> new file mode 100644 >> index 0000000..3a173ea >> --- /dev/null >> +++ b/arch/x86/cpu/quark/hte.h >> @@ -0,0 +1,44 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#ifndef _HTE_H_ >> +#define _HTE_H_ >> + >> +enum { >> + MRC_MEM_INIT, >> + MRC_MEM_TEST >> +}; >> + >> +enum { >> + READ_TRAIN, >> + WRITE_TRAIN >> +}; >> + >> +/* >> + * EXP_LOOP_CNT field of HTE_CMD_CTL >> + * >> + * This CANNOT be less than 4! >> + */ >> +#define HTE_LOOP_CNT 5 >> + >> +/* random seed for victim */ >> +#define HTE_LFSR_VICTIM_SEED 0xF294BA21 >> + >> +/* random seed for aggressor */ >> +#define HTE_LFSR_AGRESSOR_SEED 0xEBA7492D >> + >> +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag); >> +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode); >> +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, >> + u32 addr, u8 first_run); >> +void hte_mem_op(u32 addr, u8 first_run, u8 is_write); > > Can you move the comments from the .c to the .h for these exported functions? > These routines are only used by MRC internally, and not public APIs. Thus I don't move the comments to header files. >> + >> +#endif /* _HTE_H_ */ >> diff --git a/arch/x86/cpu/quark/mrc_util.c b/arch/x86/cpu/quark/mrc_util.c >> new file mode 100644 >> index 0000000..1ae42d6 >> --- /dev/null >> +++ b/arch/x86/cpu/quark/mrc_util.c >> @@ -0,0 +1,1499 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#include <common.h> >> +#include <asm/arch/device.h> >> +#include <asm/arch/mrc.h> >> +#include <asm/arch/msg_port.h> >> +#include "mrc_util.h" >> +#include "hte.h" >> +#include "smc.h" >> + >> +static const uint8_t vref_codes[64] = { >> + /* lowest to highest */ >> + 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, >> + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, >> + 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, >> + 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, >> + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, >> + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, >> + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, >> + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F >> +}; >> + >> +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask) >> +{ >> + msg_port_write(unit, addr, >> + (msg_port_read(unit, addr) & ~(mask)) | >> + ((data) & (mask))); >> +} >> + >> +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask) >> +{ >> + msg_port_alt_write(unit, addr, >> + (msg_port_alt_read(unit, addr) & ~(mask)) | >> + ((data) & (mask))); >> +} >> + >> +void mrc_post_code(uint8_t major, uint8_t minor) >> +{ >> + /* send message to UART */ >> + DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor); >> + >> + /* error check */ >> + if (major == 0xEE) >> + hang(); >> +} >> + >> +/* Delay number of nanoseconds */ >> +void delay_n(uint32_t ns) >> +{ >> + /* 1000 MHz clock has 1ns period --> no conversion required */ >> + uint64_t final_tsc = rdtsc(); > > blank line here after declarations end > Fixed. >> + final_tsc += ((get_tbclk_mhz() * ns) / 1000); >> + >> + while (rdtsc() < final_tsc) >> + ; >> +} >> + >> +/* Delay number of microseconds */ >> +void delay_u(uint32_t ms) >> +{ >> + /* 64-bit math is not an option, just use loops */ >> + while (ms--) >> + delay_n(1000); >> +} > > Some day I suspect these could be pulled out into general x86 > functions. Let's see if anything else needs them first. Agreed. >> + >> +/* Select Memory Manager as the source for PRI interface */ >> +void select_mem_mgr(void) >> +{ >> + u32 dco; >> + >> + ENTERFN(); >> + >> + dco = msg_port_read(MEM_CTLR, DCO); >> + dco &= ~BIT28; > > ~(1 << 28) > > Ah but I see you are using this everywhere. > > U-Boot tries to avoid defining this sort of thing. See some comments > below about this. Did not fix the BIT stuff. >> + msg_port_write(MEM_CTLR, DCO, dco); >> + >> + LEAVEFN(); >> +} >> + >> +/* Select HTE as the source for PRI interface */ >> +void select_hte(void) >> +{ >> + u32 dco; >> + >> + ENTERFN(); >> + >> + dco = msg_port_read(MEM_CTLR, DCO); >> + dco |= BIT28; >> + msg_port_write(MEM_CTLR, DCO, dco); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * Send DRAM command >> + * data should be formated using DCMD_Xxxx macro or emrsXCommand structure >> + */ >> +void dram_init_command(uint32_t data) >> +{ >> + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data); >> + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0); >> + msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0); >> + >> + DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data); >> +} >> + >> +/* Send DRAM wake command using special MCU side-band WAKE opcode */ >> +void dram_wake_command(void) >> +{ >> + ENTERFN(); >> + >> + msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0); >> + >> + LEAVEFN(); >> +} >> + >> +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + /* send message to UART */ >> + DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane); >> +} >> + >> +/* >> + * This function will program the RCVEN delays >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_rcvn(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) > > reformat to 80cols. Should this or any other function in this file be static? These functions are used by MRC internally. Also I did not fix this 'reformat to 80cols) as I think it is fine. >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : >> + (BIT11 | BIT10 | BIT9 | BIT8); > > Would this be better as: > > (0xf << 20) | (0xf << 8) > > It might be more meaningful also. > > I really don't think these long strings of | are nice. I agree, but I did not fix this. Changing those globally is really error prone and may break the whole MRC (my brain could be bad converting these bits to hex numbers). >> + temp = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : >> + ((pi_count / HALF_CLK) << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) > > lower case hex again Did not fix these for consistency. >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); >> + temp = pi_count << 24; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[08/11] (+1 select) >> + * BL0/1 -> B01DBCTL1[02/05] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2); > > Remove () around BIT5 Fixed globally. >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8); >> + if (pi_count < EARLY_DB) >> + temp |= msk; > > These uses of BIT seem more useful to me. > > Still it would be better to have #defines for the bits which actually > describe their meaning. > > Maybe you don't know the meaning though... Yes!!! >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE0); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the current RCVEN delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (20) : (8); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > > Please avoid () around simple constants. Put them in the #define/enum if needed. Fixed globally by removing (). >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the RDQS delays based on an absolute >> + * amount of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_rdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * PI (1/128 MCLK) >> + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) >> + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) >> + */ >> + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); >> + temp = pi_count << 0; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check (shouldn't go above 0x3F) */ >> + if (pi_count > 0x47) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE1); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the current RDQS delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * PI (1/128 MCLK) >> + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) >> + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) >> + */ >> + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp & 0x7F; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WDQS delays based on an absolute >> + * amount of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : >> + (BIT7 | BIT6 | BIT5 | BIT4); >> + temp = pi_count / HALF_CLK; >> + temp <<= (byte_lane & BIT0) ? (16) : (4); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16); >> + temp = pi_count << 16; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[07/10] (+1 select) >> + * BL0/1 -> B01DBCTL1[01/04] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE2); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WDQS delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (16) : (4); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = (temp * HALF_CLK); >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 16; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WDQ delays based on an absolute >> + * number of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wdq(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : >> + (BIT3 | BIT2 | BIT1 | BIT0); >> + temp = pi_count / HALF_CLK; >> + temp <<= (byte_lane & BIT0) ? (12) : (0); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); >> + temp = pi_count << 8; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[06/09] (+1 select) >> + * BL0/1 -> B01DBCTL1[00/03] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE3); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WDQ delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (12) : (0); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = (temp * HALF_CLK); >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 8; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCMD delays based on an absolute >> + * number of PIs. >> + */ >> +void set_wcmd(uint8_t channel, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CMDPTRREG[11:08] (0x0-0xF) >> + */ >> + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT11 | BIT10 | BIT9 | BIT8); >> + temp = pi_count / HALF_CLK; >> + temp <<= 8; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) >> + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) >> + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) >> + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) >> + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) >> + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) >> + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) >> + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) >> + */ >> + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24 | >> + BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | >> + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8 | >> + BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); >> + >> + temp = (pi_count << 24) | (pi_count << 16) | >> + (pi_count << 8) | (pi_count << 0); >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); /* PO */ >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CMDCFGREG0[17] (+1 select) >> + * CMDCFGREG0[16] (enable) >> + */ >> + reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= BIT16; >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= BIT17; >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE4); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WCMD delay on the given >> + * channel as an absolute PI count. >> + */ >> +uint32_t get_wcmd(uint8_t channel) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CMDPTRREG[11:08] (0x0-0xF) >> + */ >> + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 8; >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) >> + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) >> + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) >> + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) >> + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) >> + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) >> + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) >> + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) >> + */ >> + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 16; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCLK delays based on an absolute >> + * number of PIs. >> + */ >> +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) >> + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT15 | BIT14 | BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); > > mask = 0xff00 is much better, isn't it? Agreed, but did not fix it. >> + temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) >> + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) >> + */ >> + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | >> + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); > > Ick! > Echo!! >> + temp = (pi_count << 16) | (pi_count << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1); > > Remove all (), and below. Please fix globally. Fixed globally. >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CCCFGREG1[11:08] (+1 select) >> + * CCCFGREG1[03:00] (enable) >> + */ >> + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (BIT3 | BIT2 | BIT1 | BIT0); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (BIT11 | BIT10 | BIT9 | BIT8); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE5); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amout of WCLK delay on the given >> + * channel, rank as an absolute PI count. >> + */ >> +uint32_t get_wclk(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) >> + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (rank) ? (12) : (8); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) >> + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) >> + */ >> + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (rank) ? (16) : (8); >> + temp &= 0x3F; >> + >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCTL delays based on an absolute >> + * number of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[31:28] (0x0-0xF) >> + * CCPTRREG[27:24] (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT31 | BIT30 | BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); >> + temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + */ >> + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); >> + temp = (pi_count << 24); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CCCFGREG1[13:12] (+1 select) >> + * CCCFGREG1[05:04] (enable) >> + */ >> + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (BIT5 | BIT4); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (BIT13 | BIT12); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE6); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WCTL delay on the given >> + * channel, rank as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wctl(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[31:28] (0x0-0xF) >> + * CCPTRREG[27:24] (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + */ >> + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the internal Vref setting in a given >> + * byte lane in a given channel. >> + */ >> +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting) >> +{ >> + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", >> + channel, byte_lane, setting); >> + >> + mrc_alt_write_mask(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + >> + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), >> + (vref_codes[setting] << 2), >> + (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2)); >> + >> + /* >> + * need to wait ~300ns for Vref to settle >> + * (check that this is necessary) >> + */ >> + delay_n(300); >> + >> + /* ??? may need to clear pointers ??? */ >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the internal Vref setting for the given >> + * channel, byte_lane. >> + */ >> +uint32_t get_vref(uint8_t channel, uint8_t byte_lane) >> +{ >> + uint8_t j; >> + uint32_t ret_val = sizeof(vref_codes) / 2; >> + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + temp = msg_port_alt_read(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + >> + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET))); >> + temp >>= 2; >> + temp &= 0x3F; >> + >> + for (j = 0; j < sizeof(vref_codes); j++) { >> + if (vref_codes[j] == temp) { >> + ret_val = j; >> + break; >> + } >> + } >> + >> + LEAVEFN(); >> + >> + return ret_val; >> +} >> + >> +/* >> + * This function will return a 32 bit address in the desired > > 32-bit Fixed >> + * channel and rank. >> + */ >> +uint32_t get_addr(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t offset = 0x02000000; /* 32MB */ >> + >> + /* Begin product specific code */ >> + if (channel > 0) { >> + DPF(D_ERROR, "ILLEGAL CHANNEL\n"); >> + DEAD_LOOP(); >> + } >> + >> + if (rank > 1) { >> + DPF(D_ERROR, "ILLEGAL RANK\n"); >> + DEAD_LOOP(); >> + } >> + >> + /* use 256MB lowest density as per DRP == 0x0003 */ >> + offset += rank * (256 * 1024 * 1024); >> + >> + return offset; >> +} >> + >> +/* >> + * This function will sample the DQTRAINSTS registers in the given >> + * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'. >> + * >> + * It will return an encoded DWORD in which each bit corresponds to > > DWORD? Changed to 32-bit data >> + * the sampled value on the byte lane. >> + */ >> +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, >> + uint8_t rank, bool rcvn) >> +{ >> + uint8_t j; /* just a counter */ >> + uint8_t bl; /* which BL in the module (always 2 per module) */ >> + uint8_t bl_grp; /* which BL module */ >> + /* byte lane divisor */ > > Maybe rename the variable so you can drop the comment? That does not help much. Unchanged. >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + uint32_t msk[2]; /* BLx in module */ >> + /* DQTRAINSTS register contents for each sample */ >> + uint32_t sampled_val[SAMPLE_SIZE]; >> + uint32_t num_0s; /* tracks the number of '0' samples */ >> + uint32_t num_1s; /* tracks the number of '1' samples */ >> + uint32_t ret_val = 0x00; /* assume all '0' samples */ >> + uint32_t address = get_addr(channel, rank); >> + >> + /* initialise msk[] */ >> + msk[0] = (rcvn) ? (BIT1) : (BIT9); /* BL0 */ >> + msk[1] = (rcvn) ? (BIT0) : (BIT8); /* BL1 */ >> + >> + /* cycle through each byte lane group */ >> + for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) { >> + /* take SAMPLE_SIZE samples */ >> + for (j = 0; j < SAMPLE_SIZE; j++) { >> + hte_mem_op(address, mrc_params->first_run, >> + rcvn ? 0 : 1); >> + mrc_params->first_run = 0; >> + >> + /* >> + * record the contents of the proper >> + * DQTRAINSTS register >> + */ >> + sampled_val[j] = msg_port_alt_read(DDRPHY, >> + (DQTRAINSTS + >> + (bl_grp * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET))); >> + } >> + >> + /* >> + * look for a majority value (SAMPLE_SIZE / 2) + 1 >> + * on the byte lane and set that value in the corresponding >> + * ret_val bit >> + */ >> + for (bl = 0; bl < 2; bl++) { >> + num_0s = 0x00; /* reset '0' tracker for byte lane */ >> + num_1s = 0x00; /* reset '1' tracker for byte lane */ >> + for (j = 0; j < SAMPLE_SIZE; j++) { >> + if (sampled_val[j] & msk[bl]) >> + num_1s++; >> + else >> + num_0s++; >> + } >> + if (num_1s > num_0s) >> + ret_val |= (1 << (bl + (bl_grp * 2))); >> + } >> + } >> + >> + /* >> + * "ret_val.0" contains the status of BL0 >> + * "ret_val.1" contains the status of BL1 >> + * "ret_val.2" contains the status of BL2 >> + * etc. > > This comment should go in @return in the function comment. Actually I failed to understand what it really means, so leave it unchanged :( >> + */ >> + return ret_val; >> +} >> + >> +/* This function will find the rising edge transition on RCVN or WDQS */ >> +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], >> + uint8_t channel, uint8_t rank, bool rcvn) >> +{ >> + bool all_edges_found; /* determines stop condition */ >> + bool direction[NUM_BYTE_LANES]; /* direction indicator */ >> + uint8_t sample; /* sample counter */ >> + uint8_t bl; /* byte lane counter */ >> + /* byte lane divisor */ >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + uint32_t sample_result[SAMPLE_CNT]; /* results of sample_dqs() */ >> + uint32_t temp; >> + uint32_t transition_pattern; >> + >> + ENTERFN(); >> + >> + /* select hte and request initial configuration */ >> + select_hte(); >> + mrc_params->first_run = 1; >> + >> + /* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */ >> + for (sample = 0; sample < SAMPLE_CNT; sample++) { >> + /* program the desired delays for sample */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + /* increase sample delay by 26 PI (0.2 CLK) */ >> + if (rcvn) { >> + set_rcvn(channel, rank, bl, >> + delay[bl] + (sample * SAMPLE_DLY)); >> + } else { >> + set_wdqs(channel, rank, bl, >> + delay[bl] + (sample * SAMPLE_DLY)); >> + } >> + } >> + >> + /* take samples (Tsample_i) */ >> + sample_result[sample] = sample_dqs(mrc_params, >> + channel, rank, rcvn); >> + >> + DPF(D_TRN, >> + "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n", >> + (rcvn ? "RCVN" : "WDQS"), channel, rank, sample, >> + sample * SAMPLE_DLY, sample_result[sample]); >> + } >> + >> + /* >> + * This pattern will help determine where we landed and ultimately >> + * how to place RCVEN/WDQS. >> + */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + /* build transition_pattern (MSB is 1st sample) */ >> + transition_pattern = 0; >> + for (sample = 0; sample < SAMPLE_CNT; sample++) { >> + transition_pattern |= >> + ((sample_result[sample] & (1 << bl)) >> bl) << >> + (SAMPLE_CNT - 1 - sample); >> + } >> + >> + DPF(D_TRN, "=== transition pattern %d\n", transition_pattern); >> + >> + /* >> + * set up to look for rising edge based on >> + * transition_pattern >> + */ >> + switch (transition_pattern) { >> + case 0: /* sampled 0->0->0 */ >> + /* move forward from T3 looking for 0->1 */ >> + delay[bl] += 2 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 1: /* sampled 0->0->1 */ >> + case 5: /* sampled 1->0->1 (bad duty cycle) *HSD#237503* */ >> + /* move forward from T2 looking for 0->1 */ >> + delay[bl] += 1 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 2: /* sampled 0->1->0 (bad duty cycle) *HSD#237503* */ >> + case 3: /* sampled 0->1->1 */ >> + /* move forward from T1 looking for 0->1 */ >> + delay[bl] += 0 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 4: /* sampled 1->0->0 (assumes BL8, HSD#234975) */ >> + /* move forward from T3 looking for 0->1 */ >> + delay[bl] += 2 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 6: /* sampled 1->1->0 */ >> + case 7: /* sampled 1->1->1 */ >> + /* move backward from T1 looking for 1->0 */ >> + delay[bl] += 0 * SAMPLE_DLY; >> + direction[bl] = BACKWARD; >> + break; >> + default: >> + mrc_post_code(0xEE, 0xEE); >> + break; >> + } >> + >> + /* program delays */ >> + if (rcvn) >> + set_rcvn(channel, rank, bl, delay[bl]); >> + else >> + set_wdqs(channel, rank, bl, delay[bl]); >> + } >> + >> + /* >> + * Based on the observed transition pattern on the byte lane, >> + * begin looking for a rising edge with single PI granularity. >> + */ >> + do { >> + all_edges_found = true; /* assume all byte lanes passed */ >> + /* take a sample */ >> + temp = sample_dqs(mrc_params, channel, rank, rcvn); >> + /* check all each byte lane for proper edge */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + if (temp & (1 << bl)) { >> + /* sampled "1" */ >> + if (direction[bl] == BACKWARD) { >> + /* >> + * keep looking for edge >> + * on this byte lane >> + */ >> + all_edges_found = false; >> + delay[bl] -= 1; >> + if (rcvn) { >> + set_rcvn(channel, rank, >> + bl, delay[bl]); >> + } else { >> + set_wdqs(channel, rank, >> + bl, delay[bl]); >> + } >> + } >> + } else { >> + /* sampled "0" */ >> + if (direction[bl] == FORWARD) { >> + /* >> + * keep looking for edge >> + * on this byte lane >> + */ >> + all_edges_found = false; >> + delay[bl] += 1; >> + if (rcvn) { >> + set_rcvn(channel, rank, >> + bl, delay[bl]); >> + } else { >> + set_wdqs(channel, rank, >> + bl, delay[bl]); >> + } >> + } >> + } >> + } >> + } while (!all_edges_found); >> + >> + /* restore DDR idle state */ >> + dram_init_command(DCMD_PREA(rank)); >> + >> + DPF(D_TRN, "Delay %03X %03X %03X %03X\n", >> + delay[0], delay[1], delay[2], delay[3]); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return a 32 bit mask that will be used to >> + * check for byte lane failures. >> + */ >> +uint32_t byte_lane_mask(struct mrc_params *mrc_params) >> +{ >> + uint32_t j; >> + uint32_t ret_val = 0x00; >> + >> + /* >> + * set ret_val based on NUM_BYTE_LANES such that you will check >> + * only BL0 in result >> + * >> + * (each bit in result represents a byte lane) >> + */ >> + for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES) >> + ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES)); >> + >> + /* >> + * HSD#235037 >> + * need to adjust the mask for 16-bit mode >> + */ >> + if (mrc_params->channel_width == X16) >> + ret_val |= (ret_val << 2); >> + >> + return ret_val; >> +} >> + >> +/* >> + * Check memory executing simple write/read/verify at the specified address. >> + * >> + * Bits in the result indicate failure on specific byte lane. >> + */ >> +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address) >> +{ >> + uint32_t result = 0; >> + uint8_t first_run = 0; >> + >> + if (mrc_params->hte_setup) { >> + mrc_params->hte_setup = 0; >> + first_run = 1; >> + select_hte(); >> + } >> + >> + result = hte_basic_write_read(mrc_params, address, >> + first_run, WRITE_TRAIN); > > reformat to 80cols Fixed. >> + >> + DPF(D_TRN, "check_rw_coarse result is %x\n", result); >> + >> + return result; >> +} >> + >> +/* >> + * Check memory executing write/read/verify of many data patterns >> + * at the specified address. Bits in the result indicate failure >> + * on specific byte lane. >> + */ >> +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address) >> +{ >> + uint32_t result; >> + uint8_t first_run = 0; >> + >> + if (mrc_params->hte_setup) { >> + mrc_params->hte_setup = 0; >> + first_run = 1; >> + select_hte(); >> + } >> + >> + result = hte_write_stress_bit_lanes(mrc_params, address, first_run); >> + >> + DPF(D_TRN, "check_bls_ex result is %x\n", result); >> + >> + return result; >> +} >> + >> +/* >> + * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1 >> + * >> + * The function takes pointer to previous 32 bit value and >> + * modifies it to next value. >> + */ >> +void lfsr32(uint32_t *lfsr_ptr) >> +{ >> + uint32_t bit; >> + uint32_t lfsr; >> + int i; >> + >> + lfsr = *lfsr_ptr; >> + >> + for (i = 0; i < 32; i++) { >> + bit = 1 ^ (lfsr & BIT0); >> + bit = bit ^ ((lfsr & BIT1) >> 1); >> + bit = bit ^ ((lfsr & BIT2) >> 2); >> + bit = bit ^ ((lfsr & BIT22) >> 22); >> + >> + lfsr = ((lfsr >> 1) | (bit << 31)); >> + } >> + >> + *lfsr_ptr = lfsr; >> +} >> + >> +/* Clear the pointers in a given byte lane in a given channel */ >> +void clear_pointers(void) >> +{ >> + uint8_t channel; >> + uint8_t bl; >> + >> + ENTERFN(); >> + >> + for (channel = 0; channel < NUM_CHANNELS; channel++) { >> + for (bl = 0; bl < NUM_BYTE_LANES; bl++) { >> + mrc_alt_write_mask(DDRPHY, >> + (B01PTRCTL1 + >> + (channel * DDRIODQ_CH_OFFSET) + >> + ((bl >> 1) * DDRIODQ_BL_OFFSET)), >> + ~BIT8, BIT8); >> + >> + mrc_alt_write_mask(DDRPHY, >> + (B01PTRCTL1 + >> + (channel * DDRIODQ_CH_OFFSET) + >> + ((bl >> 1) * DDRIODQ_BL_OFFSET)), >> + BIT8, BIT8); >> + } >> + } >> + >> + LEAVEFN(); >> +} >> + >> +void print_timings(struct mrc_params *mrc_params) >> +{ >> + uint8_t algo; >> + uint8_t channel; >> + uint8_t rank; >> + uint8_t bl; >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + >> + DPF(D_INFO, "\n---------------------------"); >> + DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3"); >> + DPF(D_INFO, "\n==========================="); >> + >> + for (algo = 0; algo < MAX_ALGOS; algo++) { >> + for (channel = 0; channel < NUM_CHANNELS; channel++) { >> + if (mrc_params->channel_enables & (1 << channel)) { >> + for (rank = 0; rank < NUM_RANKS; rank++) { > > Can we put this block in its own function to fix the over-indenting? Fixed. >> + if (mrc_params->rank_enables & >> + (1 << rank)) { >> + switch (algo) { >> + case RCVN: >> + DPF(D_INFO, >> + "\nRCVN[%02d:%02d]", >> + channel, rank); >> + break; >> + case WDQS: >> + DPF(D_INFO, >> + "\nWDQS[%02d:%02d]", >> + channel, rank); >> + break; >> + case WDQX: >> + DPF(D_INFO, >> + "\nWDQx[%02d:%02d]", >> + channel, rank); >> + break; >> + case RDQS: >> + DPF(D_INFO, >> + "\nRDQS[%02d:%02d]", >> + channel, rank); >> + break; >> + case VREF: >> + DPF(D_INFO, >> + "\nVREF[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCMD: >> + DPF(D_INFO, >> + "\nWCMD[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCTL: >> + DPF(D_INFO, >> + "\nWCTL[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCLK: >> + DPF(D_INFO, >> + "\nWCLK[%02d:%02d]", >> + channel, rank); >> + break; >> + default: >> + break; >> + } >> + >> + for (bl = 0; >> + bl < (NUM_BYTE_LANES / bl_divisor); >> + bl++) { >> + switch (algo) { >> + case RCVN: >> + DPF(D_INFO, >> + " %03d", >> + get_rcvn(channel, rank, bl)); >> + break; >> + case WDQS: >> + DPF(D_INFO, >> + " %03d", >> + get_wdqs(channel, rank, bl)); >> + break; >> + case WDQX: >> + DPF(D_INFO, >> + " %03d", >> + get_wdq(channel, rank, bl)); >> + break; >> + case RDQS: >> + DPF(D_INFO, >> + " %03d", >> + get_rdqs(channel, rank, bl)); >> + break; >> + case VREF: >> + DPF(D_INFO, >> + " %03d", >> + get_vref(channel, bl)); >> + break; >> + case WCMD: >> + DPF(D_INFO, >> + " %03d", >> + get_wcmd(channel)); >> + break; >> + case WCTL: >> + DPF(D_INFO, >> + " %03d", >> + get_wctl(channel, rank)); >> + break; >> + case WCLK: >> + DPF(D_INFO, >> + " %03d", >> + get_wclk(channel, rank)); >> + break; >> + default: >> + break; >> + } >> + } >> + } >> + } >> + } >> + } >> + } >> + >> + DPF(D_INFO, "\n---------------------------"); >> + DPF(D_INFO, "\n"); >> +} >> diff --git a/arch/x86/cpu/quark/mrc_util.h b/arch/x86/cpu/quark/mrc_util.h >> new file mode 100644 >> index 0000000..edbe219 >> --- /dev/null >> +++ b/arch/x86/cpu/quark/mrc_util.h >> @@ -0,0 +1,153 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#ifndef _MRC_UTIL_H_ >> +#define _MRC_UTIL_H_ >> + >> +/* Turn on this macro to enable MRC debugging output */ >> +#undef MRC_DEBUG >> + >> +/* MRC Debug Support */ >> +#define DPF debug_cond >> + >> +/* debug print type */ >> + >> +#ifdef MRC_DEBUG >> +#define D_ERROR 0x0001 >> +#define D_INFO 0x0002 >> +#define D_REGRD 0x0004 >> +#define D_REGWR 0x0008 >> +#define D_FCALL 0x0010 >> +#define D_TRN 0x0020 >> +#define D_TIME 0x0040 >> +#else >> +#define D_ERROR 0 >> +#define D_INFO 0 >> +#define D_REGRD 0 >> +#define D_REGWR 0 >> +#define D_FCALL 0 >> +#define D_TRN 0 >> +#define D_TIME 0 >> +#endif >> + >> +#define ENTERFN(...) debug_cond(D_FCALL, "<%s>\n", __func__) >> +#define LEAVEFN(...) debug_cond(D_FCALL, "</%s>\n", __func__) >> +#define REPORTFN(...) debug_cond(D_FCALL, "<%s/>\n", __func__) >> + >> +/* Generic Register Bits */ >> +#define BIT0 0x00000001 >> +#define BIT1 0x00000002 >> +#define BIT2 0x00000004 >> +#define BIT3 0x00000008 >> +#define BIT4 0x00000010 >> +#define BIT5 0x00000020 >> +#define BIT6 0x00000040 >> +#define BIT7 0x00000080 >> +#define BIT8 0x00000100 >> +#define BIT9 0x00000200 >> +#define BIT10 0x00000400 >> +#define BIT11 0x00000800 >> +#define BIT12 0x00001000 >> +#define BIT13 0x00002000 >> +#define BIT14 0x00004000 >> +#define BIT15 0x00008000 >> +#define BIT16 0x00010000 >> +#define BIT17 0x00020000 >> +#define BIT18 0x00040000 >> +#define BIT19 0x00080000 >> +#define BIT20 0x00100000 >> +#define BIT21 0x00200000 >> +#define BIT22 0x00400000 >> +#define BIT23 0x00800000 >> +#define BIT24 0x01000000 >> +#define BIT25 0x02000000 >> +#define BIT26 0x04000000 >> +#define BIT27 0x08000000 >> +#define BIT28 0x10000000 >> +#define BIT29 0x20000000 >> +#define BIT30 0x40000000 >> +#define BIT31 0x80000000 >> + >> +/* Message Bus Port */ >> +#define MEM_CTLR 0x01 >> +#define HOST_BRIDGE 0x03 >> +#define MEM_MGR 0x05 >> +#define HTE 0x11 >> +#define DDRPHY 0x12 >> + >> +/* number of sample points */ >> +#define SAMPLE_CNT 3 >> +/* number of PIs to increment per sample */ >> +#define SAMPLE_DLY 26 >> + >> +enum { >> + /* indicates to decrease delays when looking for edge */ >> + BACKWARD, >> + /* indicates to increase delays when looking for edge */ >> + FORWARD >> +}; >> + >> +enum { >> + RCVN, >> + WDQS, >> + WDQX, >> + RDQS, >> + VREF, >> + WCMD, >> + WCTL, >> + WCLK, >> + MAX_ALGOS, >> +}; >> + >> +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask); >> +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask); >> +void mrc_post_code(uint8_t major, uint8_t minor); >> +void delay_n(uint32_t ns); >> +void delay_u(uint32_t ms); >> +void select_mem_mgr(void); >> +void select_hte(void); >> +void dram_init_command(uint32_t data); >> +void dram_wake_command(void); >> +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> + >> +void set_rcvn(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_rdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wdq(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wcmd(uint8_t channel, uint32_t pi_count); >> +uint32_t get_wcmd(uint8_t channel); >> +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count); >> +uint32_t get_wclk(uint8_t channel, uint8_t rank); >> +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count); >> +uint32_t get_wctl(uint8_t channel, uint8_t rank); >> +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting); >> +uint32_t get_vref(uint8_t channel, uint8_t byte_lane); >> + >> +uint32_t get_addr(uint8_t channel, uint8_t rank); >> +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, >> + uint8_t rank, bool rcvn); >> +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], >> + uint8_t channel, uint8_t rank, bool rcvn); >> +uint32_t byte_lane_mask(struct mrc_params *mrc_params); >> +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address); >> +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address); >> +void lfsr32(uint32_t *lfsr_ptr); >> +void clear_pointers(void); >> +void print_timings(struct mrc_params *mrc_params); > > If these are all truly exported, can we please put the function > comments here in the header file? No, they are only used internally by MRC. >> + >> +#endif /* _MRC_UTIL_H_ */ >> -- >> 1.8.2.1 >> > > Regards, > Simon Regards, Bin
diff --git a/arch/x86/cpu/quark/hte.c b/arch/x86/cpu/quark/hte.c new file mode 100644 index 0000000..d813c9c --- /dev/null +++ b/arch/x86/cpu/quark/hte.c @@ -0,0 +1,398 @@ +/* + * Copyright (C) 2013, Intel Corporation + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> + * + * Ported from Intel released Quark UEFI BIOS + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ + * + * SPDX-License-Identifier: Intel + */ + +#include <common.h> +#include <asm/arch/mrc.h> +#include <asm/arch/msg_port.h> +#include "mrc_util.h" +#include "hte.h" + +/** + * This function enables HTE to detect all possible errors for + * the given training parameters (per-bit or full byte lane). + */ +static void hte_enable_all_errors(void) +{ + msg_port_write(HTE, 0x000200A2, 0xFFFFFFFF); + msg_port_write(HTE, 0x000200A3, 0x000000FF); + msg_port_write(HTE, 0x000200A4, 0x00000000); +} + +/** + * This function goes and reads the HTE register in order to find any error + * + * @return: The errors detected in the HTE status register + */ +static u32 hte_check_errors(void) +{ + return msg_port_read(HTE, 0x000200A7); +} + +/** + * This function waits until HTE finishes + */ +static void hte_wait_for_complete(void) +{ + u32 tmp; + + ENTERFN(); + + do {} while ((msg_port_read(HTE, 0x00020012) & BIT30) != 0); + + tmp = msg_port_read(HTE, 0x00020011); + tmp |= BIT9; + tmp &= ~(BIT12 | BIT13); + msg_port_write(HTE, 0x00020011, tmp); + + LEAVEFN(); +} + +/** + * This function clears registers related with errors in the HTE + */ +static void hte_clear_error_regs(void) +{ + u32 tmp; + + /* + * Clear all HTE errors and enable error checking + * for burst and chunk. + */ + tmp = msg_port_read(HTE, 0x000200A1); + tmp |= BIT8; + msg_port_write(HTE, 0x000200A1, tmp); +} + +/** + * This function executes basic single cache line memory write/read/verify + * test using simple constant pattern, different for READ_RAIN and + * WRITE_TRAIN modes. + * + * See hte_basic_write_read() which is external visible wrapper. + * + * @mrc_params: host struture for all MRC global data + * @addr: memory adress being tested (must hit specific channel/rank) + * @first_run: if set then hte registers are configured, otherwise it is + * assumed configuration is done and just re-run the test + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) + * + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) + */ +static u16 hte_basic_data_cmp(struct mrc_params *mrc_params, u32 addr, + u8 first_run, u8 mode) +{ + u32 pattern; + u32 offset; + + if (first_run) { + msg_port_write(HTE, 0x00020020, 0x01B10021); + msg_port_write(HTE, 0x00020021, 0x06000000); + msg_port_write(HTE, 0x00020022, addr >> 6); + msg_port_write(HTE, 0x00020062, 0x00800015); + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); + msg_port_write(HTE, 0x00020061, 0x00030008); + + if (mode == WRITE_TRAIN) + pattern = 0xC33C0000; + else /* READ_TRAIN */ + pattern = 0xAA5555AA; + + for (offset = 0x80; offset <= 0x8F; offset++) + msg_port_write(HTE, offset, pattern); + } + + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); + msg_port_write(HTE, 0x00020011, 0x00011000); + msg_port_write(HTE, 0x00020011, 0x00011100); + + hte_wait_for_complete(); + + /* + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for + * any bytelane errors. + */ + return (hte_check_errors() >> 8) & 0xFF; +} + +/** + * This function examines single cache line memory with write/read/verify + * test using multiple data patterns (victim-aggressor algorithm). + * + * See hte_write_stress_bit_lanes() which is external visible wrapper. + * + * @mrc_params: host struture for all MRC global data + * @addr: memory adress being tested (must hit specific channel/rank) + * @loop_cnt: number of test iterations + * @seed_victim: victim data pattern seed + * @seed_aggressor: aggressor data pattern seed + * @victim_bit: should be 0 as auto rotate feature is in use + * @first_run: if set then hte registers are configured, otherwise it is + * assumed configuration is done and just re-run the test + * + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) + */ +static u16 hte_rw_data_cmp(struct mrc_params *mrc_params, u32 addr, + u8 loop_cnt, u32 seed_victim, u32 seed_aggressor, + u8 victim_bit, u8 first_run) +{ + u32 offset; + u32 tmp; + + if (first_run) { + msg_port_write(HTE, 0x00020020, 0x00910024); + msg_port_write(HTE, 0x00020023, 0x00810024); + msg_port_write(HTE, 0x00020021, 0x06070000); + msg_port_write(HTE, 0x00020024, 0x06070000); + msg_port_write(HTE, 0x00020022, addr >> 6); + msg_port_write(HTE, 0x00020025, addr >> 6); + msg_port_write(HTE, 0x00020062, 0x0000002A); + msg_port_write(HTE, 0x00020063, seed_victim); + msg_port_write(HTE, 0x00020064, seed_aggressor); + msg_port_write(HTE, 0x00020065, seed_victim); + + /* + * Write the pattern buffers to select the victim bit + * + * Start with bit0 + */ + for (offset = 0x80; offset <= 0x8F; offset++) { + if ((offset % 8) == victim_bit) + msg_port_write(HTE, offset, 0x55555555); + else + msg_port_write(HTE, offset, 0xCCCCCCCC); + } + + msg_port_write(HTE, 0x00020061, 0x00000000); + msg_port_write(HTE, 0x00020066, 0x03440000); + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); + } + + tmp = 0x10001000 | (loop_cnt << 16); + msg_port_write(HTE, 0x00020011, tmp); + msg_port_write(HTE, 0x00020011, tmp | BIT8); + + hte_wait_for_complete(); + + /* + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for + * any bytelane errors. + */ + return (hte_check_errors() >> 8) & 0xFF; +} + +/** + * This function uses HW HTE engine to initialize or test all memory attached + * to a given DUNIT. If flag is MRC_MEM_INIT, this routine writes 0s to all + * memory locations to initialize ECC. If flag is MRC_MEM_TEST, this routine + * will send an 5AA55AA5 pattern to all memory locations on the RankMask and + * then read it back. Then it sends an A55AA55A pattern to all memory locations + * on the RankMask and reads it back. + * + * @mrc_params: host struture for all MRC global data + * @flag: MRC_MEM_INIT or MRC_MEM_TEST + * + * @return: errors register showing HTE failures. Also prints out which rank + * failed the HTE test if failure occurs. For rank detection to work, + * the address map must be left in its default state. If MRC changes + * the address map, this function must be modified to change it back + * to default at the beginning, then restore it at the end. + */ +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag) +{ + u32 offset; + int test_num; + int i; + + /* + * Clear out the error registers at the start of each memory + * init or memory test run. + */ + hte_clear_error_regs(); + + msg_port_write(HTE, 0x00020062, 0x00000015); + + for (offset = 0x80; offset <= 0x8F; offset++) + msg_port_write(HTE, offset, ((offset & 1) ? 0xA55A : 0x5AA5)); + + msg_port_write(HTE, 0x00020021, 0x00000000); + msg_port_write(HTE, 0x00020022, (mrc_params->mem_size >> 6) - 1); + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); + msg_port_write(HTE, 0x00020066, 0x03000000); + + switch (flag) { + case MRC_MEM_INIT: + /* + * Only 1 write pass through memory is needed + * to initialize ECC + */ + test_num = 1; + break; + case MRC_MEM_TEST: + /* Write/read then write/read with inverted pattern */ + test_num = 4; + break; + default: + DPF(D_INFO, "Unknown parameter for flag: %d\n", flag); + return 0xFFFFFFFF; + } + + DPF(D_INFO, "hte_mem_init"); + + for (i = 0; i < test_num; i++) { + DPF(D_INFO, "."); + + if (i == 0) { + msg_port_write(HTE, 0x00020061, 0x00000000); + msg_port_write(HTE, 0x00020020, 0x00110010); + } else if (i == 1) { + msg_port_write(HTE, 0x00020061, 0x00000000); + msg_port_write(HTE, 0x00020020, 0x00010010); + } else if (i == 2) { + msg_port_write(HTE, 0x00020061, 0x00010100); + msg_port_write(HTE, 0x00020020, 0x00110010); + } else { + msg_port_write(HTE, 0x00020061, 0x00010100); + msg_port_write(HTE, 0x00020020, 0x00010010); + } + + msg_port_write(HTE, 0x00020011, 0x00111000); + msg_port_write(HTE, 0x00020011, 0x00111100); + + hte_wait_for_complete(); + + /* If this is a READ pass, check for errors at the end */ + if ((i % 2) == 1) { + /* Return immediately if error */ + if (hte_check_errors()) + break; + } + } + + DPF(D_INFO, "done\n"); + + return hte_check_errors(); +} + +/** + * This function executes basic single cache line memory write/read/verify + * test using simple constant pattern, different for READ_RAIN and + * WRITE_TRAIN modes. + * + * @mrc_params: host struture for all MRC global data + * @addr: memory adress being tested (must hit specific channel/rank) + * @first_run: if set then hte registers are configured, otherwise it is + * assumed configuration is done and just re-run the test + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) + * + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) + */ +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, + u8 first_run, u8 mode) +{ + u16 errors; + + ENTERFN(); + + /* Enable all error reporting in preparation for HTE test */ + hte_enable_all_errors(); + hte_clear_error_regs(); + + errors = hte_basic_data_cmp(mrc_params, addr, first_run, mode); + + LEAVEFN(); + + return errors; +} + +/** + * This function examines single cache line memory with write/read/verify + * test using multiple data patterns (victim-aggressor algorithm). + * + * @mrc_params: host struture for all MRC global data + * @addr: memory adress being tested (must hit specific channel/rank) + * @first_run: if set then hte registers are configured, otherwise it is + * assumed configuration is done and just re-run the test + * + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) + */ +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, + u32 addr, u8 first_run) +{ + u16 errors; + u8 victim_bit = 0; + + ENTERFN(); + + /* Enable all error reporting in preparation for HTE test */ + hte_enable_all_errors(); + hte_clear_error_regs(); + + /* + * Loop through each bit in the bytelane. + * + * Each pass creates a victim bit while keeping all other bits the same + * as aggressors. AVN HTE adds an auto-rotate feature which allows us + * to program the entire victim/aggressor sequence in 1 step. + * + * The victim bit rotates on each pass so no need to have software + * implement a victim bit loop like on VLV. + */ + errors = hte_rw_data_cmp(mrc_params, addr, HTE_LOOP_CNT, + HTE_LFSR_VICTIM_SEED, HTE_LFSR_AGRESSOR_SEED, + victim_bit, first_run); + + LEAVEFN(); + + return errors; +} + +/** + * This function execute basic single cache line memory write or read. + * This is just for receive enable / fine write levelling purpose. + * + * @addr: memory adress being tested (must hit specific channel/rank) + * @first_run: if set then hte registers are configured, otherwise it is + * assumed configuration is done and just re-run the test + * @is_write: when non-zero memory write operation executed, otherwise read + */ +void hte_mem_op(u32 addr, u8 first_run, u8 is_write) +{ + u32 offset; + u32 tmp; + + hte_enable_all_errors(); + hte_clear_error_regs(); + + if (first_run) { + tmp = is_write ? 0x01110021 : 0x01010021; + msg_port_write(HTE, 0x00020020, tmp); + + msg_port_write(HTE, 0x00020021, 0x06000000); + msg_port_write(HTE, 0x00020022, addr >> 6); + msg_port_write(HTE, 0x00020062, 0x00800015); + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); + msg_port_write(HTE, 0x00020061, 0x00030008); + + for (offset = 0x80; offset <= 0x8F; offset++) + msg_port_write(HTE, offset, 0xC33C0000); + } + + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); + msg_port_write(HTE, 0x00020011, 0x00011000); + msg_port_write(HTE, 0x00020011, 0x00011100); + + hte_wait_for_complete(); +} diff --git a/arch/x86/cpu/quark/hte.h b/arch/x86/cpu/quark/hte.h new file mode 100644 index 0000000..3a173ea --- /dev/null +++ b/arch/x86/cpu/quark/hte.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2013, Intel Corporation + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> + * + * Ported from Intel released Quark UEFI BIOS + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ + * + * SPDX-License-Identifier: Intel + */ + +#ifndef _HTE_H_ +#define _HTE_H_ + +enum { + MRC_MEM_INIT, + MRC_MEM_TEST +}; + +enum { + READ_TRAIN, + WRITE_TRAIN +}; + +/* + * EXP_LOOP_CNT field of HTE_CMD_CTL + * + * This CANNOT be less than 4! + */ +#define HTE_LOOP_CNT 5 + +/* random seed for victim */ +#define HTE_LFSR_VICTIM_SEED 0xF294BA21 + +/* random seed for aggressor */ +#define HTE_LFSR_AGRESSOR_SEED 0xEBA7492D + +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag); +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, + u8 first_run, u8 mode); +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, + u32 addr, u8 first_run); +void hte_mem_op(u32 addr, u8 first_run, u8 is_write); + +#endif /* _HTE_H_ */ diff --git a/arch/x86/cpu/quark/mrc_util.c b/arch/x86/cpu/quark/mrc_util.c new file mode 100644 index 0000000..1ae42d6 --- /dev/null +++ b/arch/x86/cpu/quark/mrc_util.c @@ -0,0 +1,1499 @@ +/* + * Copyright (C) 2013, Intel Corporation + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> + * + * Ported from Intel released Quark UEFI BIOS + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ + * + * SPDX-License-Identifier: Intel + */ + +#include <common.h> +#include <asm/arch/device.h> +#include <asm/arch/mrc.h> +#include <asm/arch/msg_port.h> +#include "mrc_util.h" +#include "hte.h" +#include "smc.h" + +static const uint8_t vref_codes[64] = { + /* lowest to highest */ + 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, + 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, + 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F +}; + +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask) +{ + msg_port_write(unit, addr, + (msg_port_read(unit, addr) & ~(mask)) | + ((data) & (mask))); +} + +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask) +{ + msg_port_alt_write(unit, addr, + (msg_port_alt_read(unit, addr) & ~(mask)) | + ((data) & (mask))); +} + +void mrc_post_code(uint8_t major, uint8_t minor) +{ + /* send message to UART */ + DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor); + + /* error check */ + if (major == 0xEE) + hang(); +} + +/* Delay number of nanoseconds */ +void delay_n(uint32_t ns) +{ + /* 1000 MHz clock has 1ns period --> no conversion required */ + uint64_t final_tsc = rdtsc(); + final_tsc += ((get_tbclk_mhz() * ns) / 1000); + + while (rdtsc() < final_tsc) + ; +} + +/* Delay number of microseconds */ +void delay_u(uint32_t ms) +{ + /* 64-bit math is not an option, just use loops */ + while (ms--) + delay_n(1000); +} + +/* Select Memory Manager as the source for PRI interface */ +void select_mem_mgr(void) +{ + u32 dco; + + ENTERFN(); + + dco = msg_port_read(MEM_CTLR, DCO); + dco &= ~BIT28; + msg_port_write(MEM_CTLR, DCO, dco); + + LEAVEFN(); +} + +/* Select HTE as the source for PRI interface */ +void select_hte(void) +{ + u32 dco; + + ENTERFN(); + + dco = msg_port_read(MEM_CTLR, DCO); + dco |= BIT28; + msg_port_write(MEM_CTLR, DCO, dco); + + LEAVEFN(); +} + +/* + * Send DRAM command + * data should be formated using DCMD_Xxxx macro or emrsXCommand structure + */ +void dram_init_command(uint32_t data) +{ + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data); + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0); + msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0); + + DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data); +} + +/* Send DRAM wake command using special MCU side-band WAKE opcode */ +void dram_wake_command(void) +{ + ENTERFN(); + + msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0); + + LEAVEFN(); +} + +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane) +{ + /* send message to UART */ + DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane); +} + +/* + * This function will program the RCVEN delays + * + * (currently doesn't comprehend rank) + */ +void set_rcvn(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", + channel, rank, byte_lane, pi_count); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : + (BIT11 | BIT10 | BIT9 | BIT8); + temp = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : + ((pi_count / HALF_CLK) << 8); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); + temp = pi_count << 24; + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * BL0/1 -> B01DBCTL1[08/11] (+1 select) + * BL0/1 -> B01DBCTL1[02/05] (enable) + */ + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2); + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8); + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) { + training_message(channel, rank, byte_lane); + mrc_post_code(0xEE, 0xE0); + } + + LEAVEFN(); +} + +/* + * This function will return the current RCVEN delay on the given + * channel, rank, byte_lane as an absolute PI count. + * + * (currently doesn't comprehend rank) + */ +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= (byte_lane & BIT0) ? (20) : (8); + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = temp * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 24; + temp &= 0x3F; + + /* Adjust PI_COUNT */ + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the RDQS delays based on an absolute + * amount of PIs. + * + * (currently doesn't comprehend rank) + */ +void set_rdqs(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", + channel, rank, byte_lane, pi_count); + + /* + * PI (1/128 MCLK) + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) + */ + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); + temp = pi_count << 0; + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check (shouldn't go above 0x3F) */ + if (pi_count > 0x47) { + training_message(channel, rank, byte_lane); + mrc_post_code(0xEE, 0xE1); + } + + LEAVEFN(); +} + +/* + * This function will return the current RDQS delay on the given + * channel, rank, byte_lane as an absolute PI count. + * + * (currently doesn't comprehend rank) + */ +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * PI (1/128 MCLK) + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) + */ + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + temp = msg_port_alt_read(DDRPHY, reg); + + /* Adjust PI_COUNT */ + pi_count = temp & 0x7F; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the WDQS delays based on an absolute + * amount of PIs. + * + * (currently doesn't comprehend rank) + */ +void set_wdqs(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", + channel, rank, byte_lane, pi_count); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : + (BIT7 | BIT6 | BIT5 | BIT4); + temp = pi_count / HALF_CLK; + temp <<= (byte_lane & BIT0) ? (16) : (4); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16); + temp = pi_count << 16; + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * BL0/1 -> B01DBCTL1[07/10] (+1 select) + * BL0/1 -> B01DBCTL1[01/04] (enable) + */ + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1); + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7); + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) { + training_message(channel, rank, byte_lane); + mrc_post_code(0xEE, 0xE2); + } + + LEAVEFN(); +} + +/* + * This function will return the amount of WDQS delay on the given + * channel, rank, byte_lane as an absolute PI count. + * + * (currently doesn't comprehend rank) + */ +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= (byte_lane & BIT0) ? (16) : (4); + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = (temp * HALF_CLK); + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 16; + temp &= 0x3F; + + /* Adjust PI_COUNT */ + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the WDQ delays based on an absolute + * number of PIs. + * + * (currently doesn't comprehend rank) + */ +void set_wdq(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", + channel, rank, byte_lane, pi_count); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : + (BIT3 | BIT2 | BIT1 | BIT0); + temp = pi_count / HALF_CLK; + temp <<= (byte_lane & BIT0) ? (12) : (0); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); + temp = pi_count << 8; + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * BL0/1 -> B01DBCTL1[06/09] (+1 select) + * BL0/1 -> B01DBCTL1[00/03] (enable) + */ + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0); + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6); + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) { + training_message(channel, rank, byte_lane); + mrc_post_code(0xEE, 0xE3); + } + + LEAVEFN(); +} + +/* + * This function will return the amount of WDQ delay on the given + * channel, rank, byte_lane as an absolute PI count. + * + * (currently doesn't comprehend rank) + */ +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) + */ + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= (byte_lane & BIT0) ? (12) : (0); + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = (temp * HALF_CLK); + + /* + * PI (1/64 MCLK, 1 PIs) + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) + */ + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET)); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 8; + temp &= 0x3F; + + /* Adjust PI_COUNT */ + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the WCMD delays based on an absolute + * number of PIs. + */ +void set_wcmd(uint8_t channel, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CMDPTRREG[11:08] (0x0-0xF) + */ + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); + msk = (BIT11 | BIT10 | BIT9 | BIT8); + temp = pi_count / HALF_CLK; + temp <<= 8; + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) + */ + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); + + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24 | + BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8 | + BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); + + temp = (pi_count << 24) | (pi_count << 16) | + (pi_count << 8) | (pi_count << 0); + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); /* PO */ + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * CMDCFGREG0[17] (+1 select) + * CMDCFGREG0[16] (enable) + */ + reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= BIT16; + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= BIT17; + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) + mrc_post_code(0xEE, 0xE4); + + LEAVEFN(); +} + +/* + * This function will return the amount of WCMD delay on the given + * channel as an absolute PI count. + */ +uint32_t get_wcmd(uint8_t channel) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CMDPTRREG[11:08] (0x0-0xF) + */ + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 8; + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = temp * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) + */ + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 16; + temp &= 0x3F; + + /* Adjust PI_COUNT */ + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the WCLK delays based on an absolute + * number of PIs. + */ +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) + */ + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); + msk = (BIT15 | BIT14 | BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); + temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) + */ + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); + reg += (channel * DDRIOCCC_CH_OFFSET); + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); + temp = (pi_count << 16) | (pi_count << 8); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1); + reg += (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2); + reg += (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3); + reg += (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * CCCFGREG1[11:08] (+1 select) + * CCCFGREG1[03:00] (enable) + */ + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= (BIT3 | BIT2 | BIT1 | BIT0); + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= (BIT11 | BIT10 | BIT9 | BIT8); + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) + mrc_post_code(0xEE, 0xE5); + + LEAVEFN(); +} + +/* + * This function will return the amout of WCLK delay on the given + * channel, rank as an absolute PI count. + */ +uint32_t get_wclk(uint8_t channel, uint8_t rank) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) + */ + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= (rank) ? (12) : (8); + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = temp * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) + */ + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); + reg += (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= (rank) ? (16) : (8); + temp &= 0x3F; + + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the WCTL delays based on an absolute + * number of PIs. + * + * (currently doesn't comprehend rank) + */ +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count) +{ + uint32_t reg; + uint32_t msk; + uint32_t temp; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CCPTRREG[31:28] (0x0-0xF) + * CCPTRREG[27:24] (0x0-0xF) + */ + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); + msk = (BIT31 | BIT30 | BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); + temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* Adjust PI_COUNT */ + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) + */ + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); + temp = (pi_count << 24); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET); + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* + * DEADBAND + * CCCFGREG1[13:12] (+1 select) + * CCCFGREG1[05:04] (enable) + */ + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); + msk = 0x00; + temp = 0x00; + + /* enable */ + msk |= (BIT5 | BIT4); + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) + temp |= msk; + + /* select */ + msk |= (BIT13 | BIT12); + if (pi_count < EARLY_DB) + temp |= msk; + + mrc_alt_write_mask(DDRPHY, reg, temp, msk); + + /* error check */ + if (pi_count > 0x3F) + mrc_post_code(0xEE, 0xE6); + + LEAVEFN(); +} + +/* + * This function will return the amount of WCTL delay on the given + * channel, rank as an absolute PI count. + * + * (currently doesn't comprehend rank) + */ +uint32_t get_wctl(uint8_t channel, uint8_t rank) +{ + uint32_t reg; + uint32_t temp; + uint32_t pi_count; + + ENTERFN(); + + /* + * RDPTR (1/2 MCLK, 64 PIs) + * CCPTRREG[31:28] (0x0-0xF) + * CCPTRREG[27:24] (0x0-0xF) + */ + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 24; + temp &= 0xF; + + /* Adjust PI_COUNT */ + pi_count = temp * HALF_CLK; + + /* + * PI (1/64 MCLK, 1 PIs) + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) + */ + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); + temp = msg_port_alt_read(DDRPHY, reg); + temp >>= 24; + temp &= 0x3F; + + /* Adjust PI_COUNT */ + pi_count += temp; + + LEAVEFN(); + + return pi_count; +} + +/* + * This function will program the internal Vref setting in a given + * byte lane in a given channel. + */ +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting) +{ + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); + + ENTERFN(); + + DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", + channel, byte_lane, setting); + + mrc_alt_write_mask(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), + (vref_codes[setting] << 2), + (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2)); + + /* + * need to wait ~300ns for Vref to settle + * (check that this is necessary) + */ + delay_n(300); + + /* ??? may need to clear pointers ??? */ + + LEAVEFN(); +} + +/* + * This function will return the internal Vref setting for the given + * channel, byte_lane. + */ +uint32_t get_vref(uint8_t channel, uint8_t byte_lane) +{ + uint8_t j; + uint32_t ret_val = sizeof(vref_codes) / 2; + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); + uint32_t temp; + + ENTERFN(); + + temp = msg_port_alt_read(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET))); + temp >>= 2; + temp &= 0x3F; + + for (j = 0; j < sizeof(vref_codes); j++) { + if (vref_codes[j] == temp) { + ret_val = j; + break; + } + } + + LEAVEFN(); + + return ret_val; +} + +/* + * This function will return a 32 bit address in the desired + * channel and rank. + */ +uint32_t get_addr(uint8_t channel, uint8_t rank) +{ + uint32_t offset = 0x02000000; /* 32MB */ + + /* Begin product specific code */ + if (channel > 0) { + DPF(D_ERROR, "ILLEGAL CHANNEL\n"); + DEAD_LOOP(); + } + + if (rank > 1) { + DPF(D_ERROR, "ILLEGAL RANK\n"); + DEAD_LOOP(); + } + + /* use 256MB lowest density as per DRP == 0x0003 */ + offset += rank * (256 * 1024 * 1024); + + return offset; +} + +/* + * This function will sample the DQTRAINSTS registers in the given + * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'. + * + * It will return an encoded DWORD in which each bit corresponds to + * the sampled value on the byte lane. + */ +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, + uint8_t rank, bool rcvn) +{ + uint8_t j; /* just a counter */ + uint8_t bl; /* which BL in the module (always 2 per module) */ + uint8_t bl_grp; /* which BL module */ + /* byte lane divisor */ + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; + uint32_t msk[2]; /* BLx in module */ + /* DQTRAINSTS register contents for each sample */ + uint32_t sampled_val[SAMPLE_SIZE]; + uint32_t num_0s; /* tracks the number of '0' samples */ + uint32_t num_1s; /* tracks the number of '1' samples */ + uint32_t ret_val = 0x00; /* assume all '0' samples */ + uint32_t address = get_addr(channel, rank); + + /* initialise msk[] */ + msk[0] = (rcvn) ? (BIT1) : (BIT9); /* BL0 */ + msk[1] = (rcvn) ? (BIT0) : (BIT8); /* BL1 */ + + /* cycle through each byte lane group */ + for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++) { + /* take SAMPLE_SIZE samples */ + for (j = 0; j < SAMPLE_SIZE; j++) { + hte_mem_op(address, mrc_params->first_run, + rcvn ? 0 : 1); + mrc_params->first_run = 0; + + /* + * record the contents of the proper + * DQTRAINSTS register + */ + sampled_val[j] = msg_port_alt_read(DDRPHY, + (DQTRAINSTS + + (bl_grp * DDRIODQ_BL_OFFSET) + + (channel * DDRIODQ_CH_OFFSET))); + } + + /* + * look for a majority value (SAMPLE_SIZE / 2) + 1 + * on the byte lane and set that value in the corresponding + * ret_val bit + */ + for (bl = 0; bl < 2; bl++) { + num_0s = 0x00; /* reset '0' tracker for byte lane */ + num_1s = 0x00; /* reset '1' tracker for byte lane */ + for (j = 0; j < SAMPLE_SIZE; j++) { + if (sampled_val[j] & msk[bl]) + num_1s++; + else + num_0s++; + } + if (num_1s > num_0s) + ret_val |= (1 << (bl + (bl_grp * 2))); + } + } + + /* + * "ret_val.0" contains the status of BL0 + * "ret_val.1" contains the status of BL1 + * "ret_val.2" contains the status of BL2 + * etc. + */ + return ret_val; +} + +/* This function will find the rising edge transition on RCVN or WDQS */ +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], + uint8_t channel, uint8_t rank, bool rcvn) +{ + bool all_edges_found; /* determines stop condition */ + bool direction[NUM_BYTE_LANES]; /* direction indicator */ + uint8_t sample; /* sample counter */ + uint8_t bl; /* byte lane counter */ + /* byte lane divisor */ + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; + uint32_t sample_result[SAMPLE_CNT]; /* results of sample_dqs() */ + uint32_t temp; + uint32_t transition_pattern; + + ENTERFN(); + + /* select hte and request initial configuration */ + select_hte(); + mrc_params->first_run = 1; + + /* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */ + for (sample = 0; sample < SAMPLE_CNT; sample++) { + /* program the desired delays for sample */ + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { + /* increase sample delay by 26 PI (0.2 CLK) */ + if (rcvn) { + set_rcvn(channel, rank, bl, + delay[bl] + (sample * SAMPLE_DLY)); + } else { + set_wdqs(channel, rank, bl, + delay[bl] + (sample * SAMPLE_DLY)); + } + } + + /* take samples (Tsample_i) */ + sample_result[sample] = sample_dqs(mrc_params, + channel, rank, rcvn); + + DPF(D_TRN, + "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n", + (rcvn ? "RCVN" : "WDQS"), channel, rank, sample, + sample * SAMPLE_DLY, sample_result[sample]); + } + + /* + * This pattern will help determine where we landed and ultimately + * how to place RCVEN/WDQS. + */ + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { + /* build transition_pattern (MSB is 1st sample) */ + transition_pattern = 0; + for (sample = 0; sample < SAMPLE_CNT; sample++) { + transition_pattern |= + ((sample_result[sample] & (1 << bl)) >> bl) << + (SAMPLE_CNT - 1 - sample); + } + + DPF(D_TRN, "=== transition pattern %d\n", transition_pattern); + + /* + * set up to look for rising edge based on + * transition_pattern + */ + switch (transition_pattern) { + case 0: /* sampled 0->0->0 */ + /* move forward from T3 looking for 0->1 */ + delay[bl] += 2 * SAMPLE_DLY; + direction[bl] = FORWARD; + break; + case 1: /* sampled 0->0->1 */ + case 5: /* sampled 1->0->1 (bad duty cycle) *HSD#237503* */ + /* move forward from T2 looking for 0->1 */ + delay[bl] += 1 * SAMPLE_DLY; + direction[bl] = FORWARD; + break; + case 2: /* sampled 0->1->0 (bad duty cycle) *HSD#237503* */ + case 3: /* sampled 0->1->1 */ + /* move forward from T1 looking for 0->1 */ + delay[bl] += 0 * SAMPLE_DLY; + direction[bl] = FORWARD; + break; + case 4: /* sampled 1->0->0 (assumes BL8, HSD#234975) */ + /* move forward from T3 looking for 0->1 */ + delay[bl] += 2 * SAMPLE_DLY; + direction[bl] = FORWARD; + break; + case 6: /* sampled 1->1->0 */ + case 7: /* sampled 1->1->1 */ + /* move backward from T1 looking for 1->0 */ + delay[bl] += 0 * SAMPLE_DLY; + direction[bl] = BACKWARD; + break; + default: + mrc_post_code(0xEE, 0xEE); + break; + } + + /* program delays */ + if (rcvn) + set_rcvn(channel, rank, bl, delay[bl]); + else + set_wdqs(channel, rank, bl, delay[bl]); + } + + /* + * Based on the observed transition pattern on the byte lane, + * begin looking for a rising edge with single PI granularity. + */ + do { + all_edges_found = true; /* assume all byte lanes passed */ + /* take a sample */ + temp = sample_dqs(mrc_params, channel, rank, rcvn); + /* check all each byte lane for proper edge */ + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { + if (temp & (1 << bl)) { + /* sampled "1" */ + if (direction[bl] == BACKWARD) { + /* + * keep looking for edge + * on this byte lane + */ + all_edges_found = false; + delay[bl] -= 1; + if (rcvn) { + set_rcvn(channel, rank, + bl, delay[bl]); + } else { + set_wdqs(channel, rank, + bl, delay[bl]); + } + } + } else { + /* sampled "0" */ + if (direction[bl] == FORWARD) { + /* + * keep looking for edge + * on this byte lane + */ + all_edges_found = false; + delay[bl] += 1; + if (rcvn) { + set_rcvn(channel, rank, + bl, delay[bl]); + } else { + set_wdqs(channel, rank, + bl, delay[bl]); + } + } + } + } + } while (!all_edges_found); + + /* restore DDR idle state */ + dram_init_command(DCMD_PREA(rank)); + + DPF(D_TRN, "Delay %03X %03X %03X %03X\n", + delay[0], delay[1], delay[2], delay[3]); + + LEAVEFN(); +} + +/* + * This function will return a 32 bit mask that will be used to + * check for byte lane failures. + */ +uint32_t byte_lane_mask(struct mrc_params *mrc_params) +{ + uint32_t j; + uint32_t ret_val = 0x00; + + /* + * set ret_val based on NUM_BYTE_LANES such that you will check + * only BL0 in result + * + * (each bit in result represents a byte lane) + */ + for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES) + ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES)); + + /* + * HSD#235037 + * need to adjust the mask for 16-bit mode + */ + if (mrc_params->channel_width == X16) + ret_val |= (ret_val << 2); + + return ret_val; +} + +/* + * Check memory executing simple write/read/verify at the specified address. + * + * Bits in the result indicate failure on specific byte lane. + */ +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address) +{ + uint32_t result = 0; + uint8_t first_run = 0; + + if (mrc_params->hte_setup) { + mrc_params->hte_setup = 0; + first_run = 1; + select_hte(); + } + + result = hte_basic_write_read(mrc_params, address, + first_run, WRITE_TRAIN); + + DPF(D_TRN, "check_rw_coarse result is %x\n", result); + + return result; +} + +/* + * Check memory executing write/read/verify of many data patterns + * at the specified address. Bits in the result indicate failure + * on specific byte lane. + */ +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address) +{ + uint32_t result; + uint8_t first_run = 0; + + if (mrc_params->hte_setup) { + mrc_params->hte_setup = 0; + first_run = 1; + select_hte(); + } + + result = hte_write_stress_bit_lanes(mrc_params, address, first_run); + + DPF(D_TRN, "check_bls_ex result is %x\n", result); + + return result; +} + +/* + * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1 + * + * The function takes pointer to previous 32 bit value and + * modifies it to next value. + */ +void lfsr32(uint32_t *lfsr_ptr) +{ + uint32_t bit; + uint32_t lfsr; + int i; + + lfsr = *lfsr_ptr; + + for (i = 0; i < 32; i++) { + bit = 1 ^ (lfsr & BIT0); + bit = bit ^ ((lfsr & BIT1) >> 1); + bit = bit ^ ((lfsr & BIT2) >> 2); + bit = bit ^ ((lfsr & BIT22) >> 22); + + lfsr = ((lfsr >> 1) | (bit << 31)); + } + + *lfsr_ptr = lfsr; +} + +/* Clear the pointers in a given byte lane in a given channel */ +void clear_pointers(void) +{ + uint8_t channel; + uint8_t bl; + + ENTERFN(); + + for (channel = 0; channel < NUM_CHANNELS; channel++) { + for (bl = 0; bl < NUM_BYTE_LANES; bl++) { + mrc_alt_write_mask(DDRPHY, + (B01PTRCTL1 + + (channel * DDRIODQ_CH_OFFSET) + + ((bl >> 1) * DDRIODQ_BL_OFFSET)), + ~BIT8, BIT8); + + mrc_alt_write_mask(DDRPHY, + (B01PTRCTL1 + + (channel * DDRIODQ_CH_OFFSET) + + ((bl >> 1) * DDRIODQ_BL_OFFSET)), + BIT8, BIT8); + } + } + + LEAVEFN(); +} + +void print_timings(struct mrc_params *mrc_params) +{ + uint8_t algo; + uint8_t channel; + uint8_t rank; + uint8_t bl; + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; + + DPF(D_INFO, "\n---------------------------"); + DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3"); + DPF(D_INFO, "\n==========================="); + + for (algo = 0; algo < MAX_ALGOS; algo++) { + for (channel = 0; channel < NUM_CHANNELS; channel++) { + if (mrc_params->channel_enables & (1 << channel)) { + for (rank = 0; rank < NUM_RANKS; rank++) { + if (mrc_params->rank_enables & + (1 << rank)) { + switch (algo) { + case RCVN: + DPF(D_INFO, + "\nRCVN[%02d:%02d]", + channel, rank); + break; + case WDQS: + DPF(D_INFO, + "\nWDQS[%02d:%02d]", + channel, rank); + break; + case WDQX: + DPF(D_INFO, + "\nWDQx[%02d:%02d]", + channel, rank); + break; + case RDQS: + DPF(D_INFO, + "\nRDQS[%02d:%02d]", + channel, rank); + break; + case VREF: + DPF(D_INFO, + "\nVREF[%02d:%02d]", + channel, rank); + break; + case WCMD: + DPF(D_INFO, + "\nWCMD[%02d:%02d]", + channel, rank); + break; + case WCTL: + DPF(D_INFO, + "\nWCTL[%02d:%02d]", + channel, rank); + break; + case WCLK: + DPF(D_INFO, + "\nWCLK[%02d:%02d]", + channel, rank); + break; + default: + break; + } + + for (bl = 0; + bl < (NUM_BYTE_LANES / bl_divisor); + bl++) { + switch (algo) { + case RCVN: + DPF(D_INFO, + " %03d", + get_rcvn(channel, rank, bl)); + break; + case WDQS: + DPF(D_INFO, + " %03d", + get_wdqs(channel, rank, bl)); + break; + case WDQX: + DPF(D_INFO, + " %03d", + get_wdq(channel, rank, bl)); + break; + case RDQS: + DPF(D_INFO, + " %03d", + get_rdqs(channel, rank, bl)); + break; + case VREF: + DPF(D_INFO, + " %03d", + get_vref(channel, bl)); + break; + case WCMD: + DPF(D_INFO, + " %03d", + get_wcmd(channel)); + break; + case WCTL: + DPF(D_INFO, + " %03d", + get_wctl(channel, rank)); + break; + case WCLK: + DPF(D_INFO, + " %03d", + get_wclk(channel, rank)); + break; + default: + break; + } + } + } + } + } + } + } + + DPF(D_INFO, "\n---------------------------"); + DPF(D_INFO, "\n"); +} diff --git a/arch/x86/cpu/quark/mrc_util.h b/arch/x86/cpu/quark/mrc_util.h new file mode 100644 index 0000000..edbe219 --- /dev/null +++ b/arch/x86/cpu/quark/mrc_util.h @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2013, Intel Corporation + * Copyright (C) 2015, Bin Meng <bmeng.cn@gmail.com> + * + * Ported from Intel released Quark UEFI BIOS + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ + * + * SPDX-License-Identifier: Intel + */ + +#ifndef _MRC_UTIL_H_ +#define _MRC_UTIL_H_ + +/* Turn on this macro to enable MRC debugging output */ +#undef MRC_DEBUG + +/* MRC Debug Support */ +#define DPF debug_cond + +/* debug print type */ + +#ifdef MRC_DEBUG +#define D_ERROR 0x0001 +#define D_INFO 0x0002 +#define D_REGRD 0x0004 +#define D_REGWR 0x0008 +#define D_FCALL 0x0010 +#define D_TRN 0x0020 +#define D_TIME 0x0040 +#else +#define D_ERROR 0 +#define D_INFO 0 +#define D_REGRD 0 +#define D_REGWR 0 +#define D_FCALL 0 +#define D_TRN 0 +#define D_TIME 0 +#endif + +#define ENTERFN(...) debug_cond(D_FCALL, "<%s>\n", __func__) +#define LEAVEFN(...) debug_cond(D_FCALL, "</%s>\n", __func__) +#define REPORTFN(...) debug_cond(D_FCALL, "<%s/>\n", __func__) + +/* Generic Register Bits */ +#define BIT0 0x00000001 +#define BIT1 0x00000002 +#define BIT2 0x00000004 +#define BIT3 0x00000008 +#define BIT4 0x00000010 +#define BIT5 0x00000020 +#define BIT6 0x00000040 +#define BIT7 0x00000080 +#define BIT8 0x00000100 +#define BIT9 0x00000200 +#define BIT10 0x00000400 +#define BIT11 0x00000800 +#define BIT12 0x00001000 +#define BIT13 0x00002000 +#define BIT14 0x00004000 +#define BIT15 0x00008000 +#define BIT16 0x00010000 +#define BIT17 0x00020000 +#define BIT18 0x00040000 +#define BIT19 0x00080000 +#define BIT20 0x00100000 +#define BIT21 0x00200000 +#define BIT22 0x00400000 +#define BIT23 0x00800000 +#define BIT24 0x01000000 +#define BIT25 0x02000000 +#define BIT26 0x04000000 +#define BIT27 0x08000000 +#define BIT28 0x10000000 +#define BIT29 0x20000000 +#define BIT30 0x40000000 +#define BIT31 0x80000000 + +/* Message Bus Port */ +#define MEM_CTLR 0x01 +#define HOST_BRIDGE 0x03 +#define MEM_MGR 0x05 +#define HTE 0x11 +#define DDRPHY 0x12 + +/* number of sample points */ +#define SAMPLE_CNT 3 +/* number of PIs to increment per sample */ +#define SAMPLE_DLY 26 + +enum { + /* indicates to decrease delays when looking for edge */ + BACKWARD, + /* indicates to increase delays when looking for edge */ + FORWARD +}; + +enum { + RCVN, + WDQS, + WDQX, + RDQS, + VREF, + WCMD, + WCTL, + WCLK, + MAX_ALGOS, +}; + +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask); +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask); +void mrc_post_code(uint8_t major, uint8_t minor); +void delay_n(uint32_t ns); +void delay_u(uint32_t ms); +void select_mem_mgr(void); +void select_hte(void); +void dram_init_command(uint32_t data); +void dram_wake_command(void); +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane); + +void set_rcvn(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count); +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane); +void set_rdqs(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count); +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); +void set_wdqs(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count); +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); +void set_wdq(uint8_t channel, uint8_t rank, + uint8_t byte_lane, uint32_t pi_count); +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane); +void set_wcmd(uint8_t channel, uint32_t pi_count); +uint32_t get_wcmd(uint8_t channel); +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count); +uint32_t get_wclk(uint8_t channel, uint8_t rank); +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count); +uint32_t get_wctl(uint8_t channel, uint8_t rank); +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting); +uint32_t get_vref(uint8_t channel, uint8_t byte_lane); + +uint32_t get_addr(uint8_t channel, uint8_t rank); +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, + uint8_t rank, bool rcvn); +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], + uint8_t channel, uint8_t rank, bool rcvn); +uint32_t byte_lane_mask(struct mrc_params *mrc_params); +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address); +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address); +void lfsr32(uint32_t *lfsr_ptr); +void clear_pointers(void); +void print_timings(struct mrc_params *mrc_params); + +#endif /* _MRC_UTIL_H_ */
Add various utility codes needed for Quark MRC. Signed-off-by: Bin Meng <bmeng.cn@gmail.com> --- There are 12 checkpatch warnings in this patch, which are: warning: arch/x86/cpu/quark/mrc_util.c,1446: Too many leading tabs - consider code refactoring warning: arch/x86/cpu/quark/mrc_util.c,1450: line over 80 characters ... Fixing 'Too many leading tabs ...' will be very dangerous, as I don't have all the details on how Intel's MRC codes are actually written to play with the hardware. Trying to refactor them may lead to a non-working MRC codes. For the 'line over 80 characters' issue, we have to leave them as is now due to the 'Too many leading tabs ...', sigh. arch/x86/cpu/quark/hte.c | 398 +++++++++++ arch/x86/cpu/quark/hte.h | 44 ++ arch/x86/cpu/quark/mrc_util.c | 1499 +++++++++++++++++++++++++++++++++++++++++ arch/x86/cpu/quark/mrc_util.h | 153 +++++ 4 files changed, 2094 insertions(+) create mode 100644 arch/x86/cpu/quark/hte.c create mode 100644 arch/x86/cpu/quark/hte.h create mode 100644 arch/x86/cpu/quark/mrc_util.c create mode 100644 arch/x86/cpu/quark/mrc_util.h