mbox series

[00/11] Add support for the Mobileye EyeQ5 SoC

Message ID 20231004161038.2818327-1-gregory.clement@bootlin.com
Headers show
Series Add support for the Mobileye EyeQ5 SoC | expand

Message

Gregory CLEMENT Oct. 4, 2023, 4:10 p.m. UTC
Hello,

The EyeQ5 SoC from Mobileye is based on the MIPS I6500 architecture
and features multiple controllers such as the classic UART, I2C, SPI,
as well as CAN-FD, PCIe, Octal/Quad SPI Flash interface, Gigabit
Ethernet, MIPI CSI-2, and eMMC 5.1. It also includes a Hardware
Security Module, Functional Safety Hardware, and MJPEG encoder.

One peculiarity of this SoC is that the physical address of the DDDR
exceeds 32 bits. Given that the architecture is 64 bits, this is not
an issue, but it requires some changes in how the mips64 is currently
managed during boot.

With the second patch, we enable the use of xphys instead of the
legacy kesg0 and kseg1. However, the vector reset remains 32 bits. So
the third patch allows the use of aliasing to map the vector address
using a 32-bit pointer.

While working on it, we found that there was an issue in the way the
assembly code managed kernel uncompression. While most of the head.S
file uses macros to employ the correct instruction depending on
whether it's in 32 or 64 bits, one was missing. Fixing it is the
purpose of the first patch.

Then the following 4 patches document the bindings that will be used
for the device tree of the SoC submitted by patch 7.

In addition to the SoC support, patch 8 adds support for the
evaluation board.

Finally, patch 10 adds support to build the kernel image for the EyeQ5
SoC and board, not only the Kconfig and Makefile infrastructure but
also an ITS file and a default configuration. To build and test the
kernel, we need to run the following commands:

make 64r6el_defconfig BOARDS=eyeq5
make vmlinuz.itb

And then from U-Boot
bootm ${vmlinuz.itb_addr}#conf-1

Gregory

Gregory CLEMENT (9):
  MIPS: compressed: Use correct instruction for 64 bit code
  dt-bindings: Add vendor prefix for Mobileye Vision Technologies Ltd.
  dt-bindings: mips: cpu: Add I-Class I6500 Multiprocessor Core
  dt-bindings: mips: Add bindings for Mobileye SoCs
  dt-bindings: mfd: syscon: Document EyeQ5 OLB
  MIPS: mobileye: Add EyeQ5 dtsi
  MIPS: mobileye: Add EPM5 device tree
  MIPS: generic: Add support for Mobileye EyeQ5
  MAINTAINERS: Add entry for Mobileye MIPS SoCs

Vladimir Kondratiev (2):
  MIPS: use virtual addresses from xkphys for MIPS64
  MIPS: support RAM beyond 32-bit

 .../devicetree/bindings/mfd/syscon.yaml       |   1 +
 .../devicetree/bindings/mips/cpus.yaml        |   1 +
 .../devicetree/bindings/mips/mobileye.yaml    |  36 ++
 .../devicetree/bindings/vendor-prefixes.yaml  |   2 +
 MAINTAINERS                                   |  12 +
 arch/mips/Kconfig                             |  15 +
 arch/mips/Makefile                            |   4 +
 arch/mips/boot/compressed/head.S              |   4 +-
 arch/mips/boot/dts/Makefile                   |   1 +
 arch/mips/boot/dts/mobileye/Makefile          |   6 +
 arch/mips/boot/dts/mobileye/eyeq5-epm5.dts    |  24 ++
 .../boot/dts/mobileye/eyeq5-fixed-clocks.dtsi | 315 ++++++++++++++++++
 arch/mips/boot/dts/mobileye/eyeq5.dtsi        | 138 ++++++++
 arch/mips/configs/generic/board-eyeq5.config  |  42 +++
 arch/mips/generic/Kconfig                     |  14 +
 arch/mips/generic/Platform                    |   7 +
 arch/mips/generic/board-epm5.its.S            |  24 ++
 arch/mips/include/asm/addrspace.h             |  12 +-
 arch/mips/include/asm/mips-cm.h               |   1 +
 arch/mips/include/asm/page.h                  |  10 +
 arch/mips/include/asm/vga.h                   |   4 +
 arch/mips/kernel/cps-vec.S                    |   8 +
 arch/mips/kernel/genex.S                      |  14 +
 arch/mips/kernel/smp-cps.c                    |  47 ++-
 arch/mips/kernel/traps.c                      |  32 +-
 arch/mips/lib/uncached.c                      |  10 +
 arch/mips/mm/init.c                           |   4 +-
 include/dt-bindings/soc/mobileye,eyeq5.h      |  77 +++++
 28 files changed, 847 insertions(+), 18 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mips/mobileye.yaml
 create mode 100644 arch/mips/boot/dts/mobileye/Makefile
 create mode 100644 arch/mips/boot/dts/mobileye/eyeq5-epm5.dts
 create mode 100644 arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
 create mode 100644 arch/mips/boot/dts/mobileye/eyeq5.dtsi
 create mode 100644 arch/mips/configs/generic/board-eyeq5.config
 create mode 100644 arch/mips/generic/board-epm5.its.S
 create mode 100644 include/dt-bindings/soc/mobileye,eyeq5.h

Comments

Rob Herring Oct. 4, 2023, 4:41 p.m. UTC | #1
On Wed, Oct 4, 2023 at 11:11 AM Gregory CLEMENT
<gregory.clement@bootlin.com> wrote:
>
> Add a device tree include file for the Mobileye EyeQ5 SoC.
>
> Based on the work of Slava Samsonov <stanislav.samsonov@intel.com>
>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
>  arch/mips/boot/dts/Makefile                   |   1 +
>  arch/mips/boot/dts/mobileye/Makefile          |   4 +
>  .../boot/dts/mobileye/eyeq5-fixed-clocks.dtsi | 315 ++++++++++++++++++
>  arch/mips/boot/dts/mobileye/eyeq5.dtsi        | 138 ++++++++
>  4 files changed, 458 insertions(+)
>  create mode 100644 arch/mips/boot/dts/mobileye/Makefile
>  create mode 100644 arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
>  create mode 100644 arch/mips/boot/dts/mobileye/eyeq5.dtsi
>
> diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
> index 928f38a79dff..edb8e8dee758 100644
> --- a/arch/mips/boot/dts/Makefile
> +++ b/arch/mips/boot/dts/Makefile
> @@ -8,6 +8,7 @@ subdir-$(CONFIG_LANTIQ)                 += lantiq
>  subdir-$(CONFIG_MACH_LOONGSON64)       += loongson
>  subdir-$(CONFIG_SOC_VCOREIII)          += mscc
>  subdir-$(CONFIG_MIPS_MALTA)            += mti
> +subdir-$(CONFIG_SOC_EYEQ5)             += mobileye
>  subdir-$(CONFIG_LEGACY_BOARD_SEAD3)    += mti
>  subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445)        += ni
>  subdir-$(CONFIG_MACH_PIC32)            += pic32
> diff --git a/arch/mips/boot/dts/mobileye/Makefile b/arch/mips/boot/dts/mobileye/Makefile
> new file mode 100644
> index 000000000000..99c4124fd4c0
> --- /dev/null
> +++ b/arch/mips/boot/dts/mobileye/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0-only
> +# Copyright 2023 Mobileye Vision Technologies Ltd.
> +
> +obj-$(CONFIG_BUILTIN_DTB)      += $(addsuffix .o, $(dtb-y))

You didn't add anything to 'dtb-y'. Did you test this?

Also, CONFIG_BUILTIN_DTB is supposed to be for legacy bootloaders
which don't understand DT. For a new SoC, fix the bootloader.

> diff --git a/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
> new file mode 100644
> index 000000000000..a0066465ac8b
> --- /dev/null
> +++ b/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
> @@ -0,0 +1,315 @@
> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
> +/*
> + * Copyright 2023 Mobileye Vision Technologies Ltd.
> + */

I assume these aren't all really fixed, but just 'I don't have a clock
driver yet'. That creates an ABI issue when you add the clock
driver(s). Just FYI.

> +
> +/ {
> +       /* Fixed clock */
> +       pll_cpu: pll_cpu {

Don't use _ in node names.

> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <1500000000>;
> +       };
> +
> +       pll_vdi: pll_vdi {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <1280000000>;
> +       };
> +
> +       pll_per: pll_per {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <2000000000>;
> +       };
> +
> +       pll_ddr0: pll_ddr0 {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <1857210000>;
> +       };
> +
> +       pll_ddr1: pll_ddr1 {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <1857210000>;
> +       };
> +
> +/* PLL_CPU derivatives */
> +       occ_cpu: occ_cpu {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_cpu>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_cpu";

Isn't the default name the node name? Drop these unless you really
have a need and they aren't redundant.

> +       };
> +       si_css0_ref_clk: si_css0_ref_clk { /* gate ClkRstGen_si_css0_ref */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_cpu>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "si_css0_ref_clk";
> +       };
> +       cpc_clk: cpc_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "cpc_clk";
> +       };
> +       core0_clk: core0_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "core0_clk";
> +       };
> +       core1_clk: core1_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "core1_clk";
> +       };
> +       core2_clk: core2_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "core2_clk";
> +       };
> +       core3_clk: core3_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "core3_clk";
> +       };
> +       cm_clk: cm_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "cm_clk";
> +       };
> +       mem_clk: mem_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&si_css0_ref_clk>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "mem_clk";
> +       };
> +       occ_isram: occ_isram {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_cpu>;
> +               #clock-cells = <0>;
> +               clock-div = <2>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_isram";
> +       };
> +       isram_clk: isram_clk { /* gate ClkRstGen_isram */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_isram>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "isram_clk";
> +       };
> +       occ_dbu: occ_dbu {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_cpu>;
> +               #clock-cells = <0>;
> +               clock-div = <10>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_dbu";
> +       };
> +       si_dbu_tp_pclk: si_dbu_tp_pclk { /* gate ClkRstGen_dbu */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_dbu>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "si_dbu_tp_pclk";
> +       };
> +/* PLL_VDI derivatives */
> +       occ_vdi: occ_vdi {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_vdi>;
> +               #clock-cells = <0>;
> +               clock-div = <2>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_vdi";
> +       };
> +       vdi_clk: vdi_clk { /* gate ClkRstGen_vdi */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_vdi>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "vdi_clk";
> +       };
> +       occ_can_ser: occ_can_ser {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_vdi>;
> +               #clock-cells = <0>;
> +               clock-div = <16>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_can_ser";
> +       };
> +       can_ser_clk: can_ser_clk { /* gate ClkRstGen_can_ser */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_can_ser>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "can_ser_clk";
> +       };
> +       i2c_ser_clk: i2c_ser_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_vdi>;
> +               #clock-cells = <0>;
> +               clock-div = <20>;
> +               clock-mult = <1>;
> +               clock-output-names = "i2c_ser_clk";
> +       };
> +/* PLL_PER derivatives */
> +       occ_periph: occ_periph {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <16>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_periph";
> +       };
> +       periph_clk: periph_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "periph_clk";
> +       };
> +       can_clk: can_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "can_clk";
> +       };
> +       spi_clk: spi_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "spi_clk";
> +       };
> +       uart_clk: uart_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "uart_clk";
> +       };
> +       i2c_clk: i2c_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "i2c_clk";
> +       };
> +       timer_clk: timer_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "timer_clk";
> +       };
> +       gpio_clk: gpio_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_periph>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "gpio_clk";
> +       };
> +       emmc_sys_clk: emmc_sys_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <10>;
> +               clock-mult = <1>;
> +               clock-output-names = "emmc_sys_clk";
> +       };
> +       ccf_ctrl_clk: ccf_ctrl_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <4>;
> +               clock-mult = <1>;
> +               clock-output-names = "ccf_ctrl_clk";
> +       };
> +       occ_mjpeg_core: occ_mjpeg_core {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <2>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_mjpeg_core";
> +       };
> +       hsm_clk: hsm_clk { /* gate ClkRstGen_hsm */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_mjpeg_core>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "hsm_clk";
> +       };
> +       mjpeg_core_clk: mjpeg_core_clk { /* gate ClkRstGen_mjpeg_gen */
> +               compatible = "fixed-factor-clock";
> +               clocks = <&occ_mjpeg_core>;
> +               #clock-cells = <0>;
> +               clock-div = <1>;
> +               clock-mult = <1>;
> +               clock-output-names = "mjpeg_core_clk";
> +       };
> +       fcmu_a_clk: fcmu_a_clk {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <20>;
> +               clock-mult = <1>;
> +               clock-output-names = "fcmu_a_clk";
> +       };
> +       occ_pci_sys: occ_pci_sys {
> +               compatible = "fixed-factor-clock";
> +               clocks = <&pll_per>;
> +               #clock-cells = <0>;
> +               clock-div = <8>;
> +               clock-mult = <1>;
> +               clock-output-names = "occ_pci_sys";
> +       };
> +       pclk: pclk {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <250000000>;  /* 250MHz */
> +       };
> +       tsu_clk: tsu_clk {
> +               compatible = "fixed-clock";
> +               #clock-cells = <0>;
> +               clock-frequency = <125000000>;  /* 125MHz */
> +       };
> +};
> diff --git a/arch/mips/boot/dts/mobileye/eyeq5.dtsi b/arch/mips/boot/dts/mobileye/eyeq5.dtsi
> new file mode 100644
> index 000000000000..0504c2fb3ad5
> --- /dev/null
> +++ b/arch/mips/boot/dts/mobileye/eyeq5.dtsi
> @@ -0,0 +1,138 @@
> +// SPDX-License-Identifier: GPL-2.0

Doesn't match eyeq5-fixed-clocks.dtsi

> +/*
> + * Copyright 2023 Mobileye Vision Technologies Ltd.
> + */
> +
> +#include <dt-bindings/interrupt-controller/mips-gic.h>
> +#include <dt-bindings/soc/mobileye,eyeq5.h>
> +
> +/memreserve/ 0x40000000 0xc0000000; /* DDR32 */
> +/memreserve/ 0x08000000 0x08000000; /* DDR_LOW */
> +
> +#include "eyeq5-fixed-clocks.dtsi"
> +
> +/* almost all GIC IRQs has the same characteristics. provide short form */

Maybe so, but I prefer not having 2 levels of lookup to figure out values.

> +#define GIC_IRQ(x) GIC_SHARED (x) IRQ_TYPE_LEVEL_HIGH
> +
> +/ {
> +       #address-cells = <2>;
> +       #size-cells = <2>;
> +       cpus {
> +               #address-cells = <1>;
> +               #size-cells = <0>;
> +               cpu@0 {
> +                       device_type = "cpu";
> +                       compatible = "mti,i6500";
> +                       reg = <0>;
> +                       clocks = <&core0_clk>;
> +               };
> +       };
> +
> +       reserved-memory {
> +               #address-cells = <2>;
> +               #size-cells = <2>;
> +               ranges;
> +
> +/* These reserved memory regions are also defined in bootmanager
> + * for configuring inbound translation for BARS, don't change
> + * these without syncing with bootmanager
> + */

Indent with the rest of the node.

> +               shmem0_reserved: shmem@804000000 {
> +                       reg = <0x8 0x04000000 0x0 0x1000000>;
> +               };
> +               shmem1_reserved: shmem@805000000 {
> +                       reg = <0x8 0x05000000 0x0 0x1000000>;
> +               };
> +               pci0_msi_reserved: pci0_msi@806000000 {
> +                       reg = <0x8 0x06000000 0x0 0x100000>;
> +               };
> +               pci1_msi_reserved: pci1_msi@806100000 {
> +                       reg = <0x8 0x06100000 0x0 0x100000>;
> +               };
> +
> +               mini_coredump0_reserved: mini_coredump0@806200000 {
> +                       reg = <0x8 0x06200000 0x0 0x100000>;
> +               };
> +               mhm_reserved_0: the_mhm_reserved_0@0 {
> +                       reg = <0x8 0x00000000 0x0 0x0000800>;
> +               };
> +       };
> +
> +       aliases {
> +               serial0 = &uart0;
> +               serial1 = &uart1;
> +               serial2 = &uart2;
> +       };
> +
> +       cpu_intc: interrupt-controller {
> +               compatible = "mti,cpu-interrupt-controller";
> +               interrupt-controller;
> +               #address-cells = <0>;
> +               #interrupt-cells = <1>;
> +       };
> +
> +       gic: interrupt-controller@140000 {
> +               compatible = "mti,gic";
> +               reg = <0x0 0x140000 0x0 0x20000>;
> +               interrupt-controller;
> +               #interrupt-cells = <3>;
> +
> +               /*
> +                * Declare the interrupt-parent even though the mti,gic
> +                * binding doesn't require it, such that the kernel can
> +                * figure out that cpu_intc is the root interrupt
> +                * controller & should be probed first.
> +                */
> +               interrupt-parent = <&cpu_intc>;
> +
> +               timer {
> +                       compatible = "mti,gic-timer";
> +                       interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
> +                       clocks = <&core0_clk>;
> +               };
> +       };
> +
> +       soc: soc {
> +               #address-cells = <2>;
> +               #size-cells = <2>;
> +               ranges;
> +               compatible = "simple-bus";
> +
> +               uart0: serial@800000 {
> +                       compatible = "arm,pl011", "arm,primecell";
> +                       reg = <0 0x800000 0x0 0x1000>;
> +                       reg-io-width = <4>;
> +                       interrupt-parent = <&gic>;
> +                       interrupts = <GIC_IRQ(NUM_INT_UART)>;
> +                       clocks  = <&uart_clk>, <&occ_periph>;
> +                       clock-names = "uartclk", "apb_pclk";
> +               };
> +
> +               uart1: serial@900000 {
> +                       compatible = "arm,pl011", "arm,primecell";
> +                       reg = <0 0x900000 0x0 0x1000>;
> +                       reg-io-width = <4>;
> +                       interrupt-parent = <&gic>;
> +                       interrupts = <GIC_IRQ(NUM_INT_UART)>;
> +                       clocks  = <&uart_clk>, <&occ_periph>;
> +                       clock-names = "uartclk", "apb_pclk";
> +               };
> +
> +               uart2: serial@a00000 {
> +                       compatible = "arm,pl011", "arm,primecell";
> +                       reg = <0 0xa00000 0x0 0x1000>;
> +                       reg-io-width = <4>;
> +                       interrupt-parent = <&gic>;
> +                       interrupts = <GIC_IRQ(NUM_INT_UART)>;
> +                       clocks  = <&uart_clk>, <&occ_periph>;
> +                       clock-names = "uartclk", "apb_pclk";
> +               };
> +
> +               olb: olb@e00000 {
> +                       compatible = "mobileye,eyeq5-olb", "syscon", "simple-mfd";
> +                       reg = <0 0xe00000 0x0 0x400>;
> +                       reg-io-width = <4>;
> +               };
> +
> +       };
> +};
> --
> 2.40.1
>
Philippe Mathieu-Daudé Oct. 5, 2023, 6:40 a.m. UTC | #2
On 4/10/23 18:10, Gregory CLEMENT wrote:
> The code clearing BSS already use macro or use correct instruction
> depending id the CPU is 32 bits or 64 bits. However, a few
> instructions remained 32 bits only.
> 
> By using the accurate MACRO, it is now possible to deal with memory
> address beyond 32 bits. As a side effect, when using 64bits processor,
> it also divides the loop number needed to clear the BSS by 2.
> 
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
>   arch/mips/boot/compressed/head.S | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Gregory CLEMENT Oct. 5, 2023, 3:17 p.m. UTC | #3
Hello Rob,

> On Wed, Oct 4, 2023 at 11:11 AM Gregory CLEMENT
> <gregory.clement@bootlin.com> wrote:
>>
>> Add a device tree include file for the Mobileye EyeQ5 SoC.
>>
>> Based on the work of Slava Samsonov <stanislav.samsonov@intel.com>
>>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> ---
>>  arch/mips/boot/dts/Makefile                   |   1 +
>>  arch/mips/boot/dts/mobileye/Makefile          |   4 +
>>  .../boot/dts/mobileye/eyeq5-fixed-clocks.dtsi | 315 ++++++++++++++++++
>>  arch/mips/boot/dts/mobileye/eyeq5.dtsi        | 138 ++++++++
>>  4 files changed, 458 insertions(+)
>>  create mode 100644 arch/mips/boot/dts/mobileye/Makefile
>>  create mode 100644 arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
>>  create mode 100644 arch/mips/boot/dts/mobileye/eyeq5.dtsi
>>
>> diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
>> index 928f38a79dff..edb8e8dee758 100644
>> --- a/arch/mips/boot/dts/Makefile
>> +++ b/arch/mips/boot/dts/Makefile
>> @@ -8,6 +8,7 @@ subdir-$(CONFIG_LANTIQ)                 += lantiq
>>  subdir-$(CONFIG_MACH_LOONGSON64)       += loongson
>>  subdir-$(CONFIG_SOC_VCOREIII)          += mscc
>>  subdir-$(CONFIG_MIPS_MALTA)            += mti
>> +subdir-$(CONFIG_SOC_EYEQ5)             += mobileye
>>  subdir-$(CONFIG_LEGACY_BOARD_SEAD3)    += mti
>>  subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445)        += ni
>>  subdir-$(CONFIG_MACH_PIC32)            += pic32
>> diff --git a/arch/mips/boot/dts/mobileye/Makefile b/arch/mips/boot/dts/mobileye/Makefile
>> new file mode 100644
>> index 000000000000..99c4124fd4c0
>> --- /dev/null
>> +++ b/arch/mips/boot/dts/mobileye/Makefile
>> @@ -0,0 +1,4 @@
>> +# SPDX-License-Identifier: GPL-2.0-only
>> +# Copyright 2023 Mobileye Vision Technologies Ltd.
>> +
>> +obj-$(CONFIG_BUILTIN_DTB)      += $(addsuffix .o, $(dtb-y))
>
> You didn't add anything to 'dtb-y'. Did you test this?

Initially yes, and finally we switch on the FIT image generation, so we
don't use it anymore

>
> Also, CONFIG_BUILTIN_DTB is supposed to be for legacy bootloaders
> which don't understand DT. For a new SoC, fix the bootloader.

I can remove it

>
>> diff --git a/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi b/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
>> new file mode 100644
>> index 000000000000..a0066465ac8b
>> --- /dev/null
>> +++ b/arch/mips/boot/dts/mobileye/eyeq5-fixed-clocks.dtsi
>> @@ -0,0 +1,315 @@
>> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
>> +/*
>> + * Copyright 2023 Mobileye Vision Technologies Ltd.
>> + */
>
> I assume these aren't all really fixed, but just 'I don't have a clock
> driver yet'. That creates an ABI issue when you add the clock
> driver(s). Just FYI.

Indeed they aren't all fixed. The plan is to replace the relevant ones by a
real clock driver when ready.

In this case some part of the dts file will be modified. But is it a
real issue ?

Booting with a new kernel with an old dtb will still continue to work in
the same way. it's only new tdb with old kernel that won't work, but we
are not supposed to support this case.


>
>> +
>> +/ {
>> +       /* Fixed clock */
>> +       pll_cpu: pll_cpu {
>
> Don't use _ in node names.

OK
[...]

>> +/* PLL_CPU derivatives */
>> +       occ_cpu: occ_cpu {
>> +               compatible = "fixed-factor-clock";
>> +               clocks = <&pll_cpu>;
>> +               #clock-cells = <0>;
>> +               clock-div = <1>;
>> +               clock-mult = <1>;
>> +               clock-output-names = "occ_cpu";
>
> Isn't the default name the node name? Drop these unless you really
> have a need and they aren't redundant.

indeed it's not used, I remove them too.
[...]

>> --- /dev/null
>> +++ b/arch/mips/boot/dts/mobileye/eyeq5.dtsi
>> @@ -0,0 +1,138 @@
>> +// SPDX-License-Identifier: GPL-2.0
>
> Doesn't match eyeq5-fixed-clocks.dtsi

OK

>
>> +/*
>> + * Copyright 2023 Mobileye Vision Technologies Ltd.
>> + */
>> +
>> +#include <dt-bindings/interrupt-controller/mips-gic.h>
>> +#include <dt-bindings/soc/mobileye,eyeq5.h>
>> +
>> +/memreserve/ 0x40000000 0xc0000000; /* DDR32 */
>> +/memreserve/ 0x08000000 0x08000000; /* DDR_LOW */
>> +
>> +#include "eyeq5-fixed-clocks.dtsi"
>> +
>> +/* almost all GIC IRQs has the same characteristics. provide short form */
>
> Maybe so, but I prefer not having 2 levels of lookup to figure out values.
>
>> +#define GIC_IRQ(x) GIC_SHARED (x) IRQ_TYPE_LEVEL_HIGH

OK I remove it.

>> +
>> +/ {
>> +       #address-cells = <2>;
>> +       #size-cells = <2>;
>> +       cpus {
>> +               #address-cells = <1>;
>> +               #size-cells = <0>;
>> +               cpu@0 {
>> +                       device_type = "cpu";
>> +                       compatible = "mti,i6500";
>> +                       reg = <0>;
>> +                       clocks = <&core0_clk>;
>> +               };
>> +       };
>> +
>> +       reserved-memory {
>> +               #address-cells = <2>;
>> +               #size-cells = <2>;
>> +               ranges;
>> +
>> +/* These reserved memory regions are also defined in bootmanager
>> + * for configuring inbound translation for BARS, don't change
>> + * these without syncing with bootmanager
>> + */
>
> Indent with the rest of the node.

OK

Thanks,

Gregory
Arnd Bergmann Oct. 6, 2023, 11:11 a.m. UTC | #4
On Wed, Oct 4, 2023, at 18:10, Gregory CLEMENT wrote:
> Add Vlad, Théo and myself as co-maintainers for the Mobileye MIPS
> SoCs.
>
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
> ---
>  MAINTAINERS | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 90f13281d297..6aedeab5f07c 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -14423,6 +14423,18 @@ W:	http://palosaari.fi/linux/
>  Q:	http://patchwork.linuxtv.org/project/linux-media/list/
>  F:	drivers/media/dvb-frontends/mn88473*
> 
> +MOBILEYE MIPS SOCS
> +M:	Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> +M:	Gregory CLEMENT <gregory.clement@bootlin.com>
> +M:	Théo Lebrun <theo.lebrun@bootlin.com>

Is Vladimir's @intel.com address going to stay valid in the
future? I would have assumed that after the spin-out, all
remaining developers working on eyeq would go back to a
mobileye address.

     Arnd
Arnd Bergmann Oct. 6, 2023, 11:18 a.m. UTC | #5
On Wed, Oct 4, 2023, at 18:10, Gregory CLEMENT wrote:
> +
> +	chosen {
> +		bootargs = "cca=5 earlycon console=ttyAMA2 ddr32_alias=0x40000000";
> +		stdout-path = "serial2:115200n8";
> +	};
> +

The bootargs should not be needed here, at least most of them:

- no need to set both console= and the stdout-path if you have
  earlycon support

- ddr32_alias=0x40000000 sounds like something that should be
  part of the dtb elsewhere and not require a command line argument.
  I assume this is needed to even build?

- For cca=, it looks like this is intended to be autodetected from
  the c0_config register. Does that not work for you for some reason?

     Arnd
Arnd Bergmann Oct. 6, 2023, 11:21 a.m. UTC | #6
On Wed, Oct 4, 2023, at 18:10, Gregory CLEMENT wrote:
> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>
> Support platforms where RAM is mapped beyond 32-bit.
>
> The kernel parameter ddr32_alias allows to setup the alias to point
> outside the first 4 GB of memory.
>
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>

This needs a better explanation, and probably a rewrite.
Having to pass the memory address on the command line does
not sound like an appropriate way to boot the kernel, so
I think either this needs to be detected from the running kernel
itself, or passed through DT.

      Arnd
Jiaxun Yang Oct. 7, 2023, 8:14 p.m. UTC | #7
在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>
> Support platforms where RAM is mapped beyond 32-bit.
>
> The kernel parameter ddr32_alias allows to setup the alias to point
> outside the first 4 GB of memory.

Are you trying to fix the problem that if kernel text is loaded in
XKPHYS there is no way to to set EBASE to that region?

The common practice for other 64bit MIPS system is to load kernel
in KSEG0 and add low 4G mirror with rest of the high memory to buddy
system. By doing this Kernel still have access to all memory beyond
32 bit, the only draw back is Kernel's text and data can't be relocted
beyond 32-bit.

Loading kernel into KSEG0 (i.e. with KBUILD_SYM32) have significant benefit
on performance, so I think you shouldn't try to load kernel into XKPHYS
without a good reason, but it might be helpful to add a BUG_ON at
CPS driver to handle such situation.

Btw: Is your target hardware publicly available? Folks at CIP United
are looking for EyeQ5 boards for a while, they are supporting MIPS R6
support at various projects.

Thanks
Jiaxun

>
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
>  arch/mips/kernel/smp-cps.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> index 47e76722a306..fcfb19487612 100644
> --- a/arch/mips/kernel/smp-cps.c
> +++ b/arch/mips/kernel/smp-cps.c
> @@ -34,6 +34,16 @@ static unsigned __init core_vpe_count(unsigned int 
> cluster, unsigned core)
>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>  }
> 
> +static int ddr32_alias;
> +
> +static int __init ddr32_alias_setup(char *str)
> +{
> +	get_option(&str, &ddr32_alias);
> +
> +	return 0;
> +}
> +early_param("ddr32_alias", ddr32_alias_setup);
> +
>  /**
>   * plat_core_entry - query reset vector for NMI/reset
>   *
> @@ -52,7 +62,7 @@ static u32 plat_core_entry(void)
>  {
>  #if defined(CONFIG_USE_XKPHYS)
>  	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
> -			| CM_GCR_Cx_RESET_BASE_MODE;
> +			| ddr32_alias | CM_GCR_Cx_RESET_BASE_MODE;
>  #else
>  	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>  #endif
> -- 
> 2.40.1
Gregory CLEMENT Oct. 9, 2023, 2:51 p.m. UTC | #8
"Arnd Bergmann" <arnd@arndb.de> writes:

> On Wed, Oct 4, 2023, at 18:10, Gregory CLEMENT wrote:
>> +
>> +	chosen {
>> +		bootargs = "cca=5 earlycon console=ttyAMA2 ddr32_alias=0x40000000";
>> +		stdout-path = "serial2:115200n8";
>> +	};
>> +
>
> The bootargs should not be needed here, at least most of them:
>
> - no need to set both console= and the stdout-path if you have
>   earlycon support

OK I keep stdout-path.

>
> - ddr32_alias=0x40000000 sounds like something that should be
>   part of the dtb elsewhere and not require a command line argument.
>   I assume this is needed to even build?

I will answer on the other email but in short I agree.
>
> - For cca=, it looks like this is intended to be autodetected from
>   the c0_config register. Does that not work for you for some reason?

Indeed I checked and it it nost needed anymore, it is already set in
cps_smp_setup. I remove it.

Thanks,

Gregory

>
>      Arnd
Gregory CLEMENT Oct. 9, 2023, 3:06 p.m. UTC | #9
"Arnd Bergmann" <arnd@arndb.de> writes:

> On Wed, Oct 4, 2023, at 18:10, Gregory CLEMENT wrote:
>> Add Vlad, Théo and myself as co-maintainers for the Mobileye MIPS
>> SoCs.
>>
>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> Signed-off-by: Théo Lebrun <theo.lebrun@bootlin.com>
>> ---
>>  MAINTAINERS | 12 ++++++++++++
>>  1 file changed, 12 insertions(+)
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 90f13281d297..6aedeab5f07c 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -14423,6 +14423,18 @@ W:	http://palosaari.fi/linux/
>>  Q:	http://patchwork.linuxtv.org/project/linux-media/list/
>>  F:	drivers/media/dvb-frontends/mn88473*
>> 
>> +MOBILEYE MIPS SOCS
>> +M:	Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> +M:	Gregory CLEMENT <gregory.clement@bootlin.com>
>> +M:	Théo Lebrun <theo.lebrun@bootlin.com>
>
> Is Vladimir's @intel.com address going to stay valid in the
> future? I would have assumed that after the spin-out, all
> remaining developers working on eyeq would go back to a
> mobileye address.

Until recently it was an intel address but now the mobileye one is
available, I will update it.

Thanks,

Gregory

>
>      Arnd
Gregory CLEMENT Oct. 9, 2023, 3:59 p.m. UTC | #10
Hello Jiaxun,

> 在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>>
>> Support platforms where RAM is mapped beyond 32-bit.
>>
>> The kernel parameter ddr32_alias allows to setup the alias to point
>> outside the first 4 GB of memory.
>
> Are you trying to fix the problem that if kernel text is loaded in
> XKPHYS there is no way to to set EBASE to that region?

Yes that exactly we try to fix.

>
> The common practice for other 64bit MIPS system is to load kernel
> in KSEG0 and add low 4G mirror with rest of the high memory to buddy
> system. By doing this Kernel still have access to all memory beyond
> 32 bit, the only draw back is Kernel's text and data can't be relocted
> beyond 32-bit.
>
> Loading kernel into KSEG0 (i.e. with KBUILD_SYM32) have significant benefit
> on performance, so I think you shouldn't try to load kernel into XKPHYS
> without a good reason, but it might be helpful to add a BUG_ON at
> CPS driver to handle such situation.

I guess that being in KSEG0 allows to use shorter pointer.  But in our
case the RAM is physically connected beyond 32bits, so it is not
accessible in KSEG0.

>
> Btw: Is your target hardware publicly available? Folks at CIP United
> are looking for EyeQ5 boards for a while, they are supporting MIPS R6
> support at various projects.

We use evaluation boards and I don't know if they are publicly
available.

Gregory

>
> Thanks
> Jiaxun
>
>>
>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> ---
>>  arch/mips/kernel/smp-cps.c | 12 +++++++++++-
>>  1 file changed, 11 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
>> index 47e76722a306..fcfb19487612 100644
>> --- a/arch/mips/kernel/smp-cps.c
>> +++ b/arch/mips/kernel/smp-cps.c
>> @@ -34,6 +34,16 @@ static unsigned __init core_vpe_count(unsigned int 
>> cluster, unsigned core)
>>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>>  }
>> 
>> +static int ddr32_alias;
>> +
>> +static int __init ddr32_alias_setup(char *str)
>> +{
>> +	get_option(&str, &ddr32_alias);
>> +
>> +	return 0;
>> +}
>> +early_param("ddr32_alias", ddr32_alias_setup);
>> +
>>  /**
>>   * plat_core_entry - query reset vector for NMI/reset
>>   *
>> @@ -52,7 +62,7 @@ static u32 plat_core_entry(void)
>>  {
>>  #if defined(CONFIG_USE_XKPHYS)
>>  	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
>> -			| CM_GCR_Cx_RESET_BASE_MODE;
>> +			| ddr32_alias | CM_GCR_Cx_RESET_BASE_MODE;
>>  #else
>>  	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>>  #endif
>> -- 
>> 2.40.1
>
> -- 
> - Jiaxun
Jiaxun Yang Oct. 10, 2023, 8:55 a.m. UTC | #11
在2023年10月9日十月 下午4:59,Gregory CLEMENT写道:
> Hello Jiaxun,
>
>> 在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
>>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>>>
>>> Support platforms where RAM is mapped beyond 32-bit.
>>>
>>> The kernel parameter ddr32_alias allows to setup the alias to point
>>> outside the first 4 GB of memory.
>>
>> Are you trying to fix the problem that if kernel text is loaded in
>> XKPHYS there is no way to to set EBASE to that region?
>
> Yes that exactly we try to fix.
>
>>
>> The common practice for other 64bit MIPS system is to load kernel
>> in KSEG0 and add low 4G mirror with rest of the high memory to buddy
>> system. By doing this Kernel still have access to all memory beyond
>> 32 bit, the only draw back is Kernel's text and data can't be relocted
>> beyond 32-bit.
>>
>> Loading kernel into KSEG0 (i.e. with KBUILD_SYM32) have significant benefit
>> on performance, so I think you shouldn't try to load kernel into XKPHYS
>> without a good reason, but it might be helpful to add a BUG_ON at
>> CPS driver to handle such situation.
>
> I guess that being in KSEG0 allows to use shorter pointer.  But in our
> case the RAM is physically connected beyond 32bits, so it is not
> accessible in KSEG0.

For most system there should be a mirror of part of DDR which is accessible
at KSEG0 and kernel runs from here. As per my interpretion of your code EyeQ5
is also doing this? If not could you please briefly describe the memory map?

For Kernel in KSEG0 the pointer is still 64bit but we can use fewer inst
to load ABS pointer into register, see [1].

>>
>> Btw: Is your target hardware publicly available? Folks at CIP United
>> are looking for EyeQ5 boards for a while, they are supporting MIPS R6
>> support at various projects.
>
> We use evaluation boards and I don't know if they are publicly
> available.
>
> Gregory
>
[1]: https://elinux.org/images/1/1f/New-tricks-mips-linux.pdf

Thanks
- Jiaxun
Gregory CLEMENT Oct. 11, 2023, 2:46 p.m. UTC | #12
Hello Jiaxun,

> 在2023年10月9日十月 下午4:59,Gregory CLEMENT写道:
>> Hello Jiaxun,
>>
>>> 在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
>>>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>>>>
>>>> Support platforms where RAM is mapped beyond 32-bit.
>>>>
>>>> The kernel parameter ddr32_alias allows to setup the alias to point
>>>> outside the first 4 GB of memory.
>>>
>>> Are you trying to fix the problem that if kernel text is loaded in
>>> XKPHYS there is no way to to set EBASE to that region?
>>
>> Yes that exactly we try to fix.
>>
>>>
>>> The common practice for other 64bit MIPS system is to load kernel
>>> in KSEG0 and add low 4G mirror with rest of the high memory to buddy
>>> system. By doing this Kernel still have access to all memory beyond
>>> 32 bit, the only draw back is Kernel's text and data can't be relocted
>>> beyond 32-bit.
>>>
>>> Loading kernel into KSEG0 (i.e. with KBUILD_SYM32) have significant benefit
>>> on performance, so I think you shouldn't try to load kernel into XKPHYS
>>> without a good reason, but it might be helpful to add a BUG_ON at
>>> CPS driver to handle such situation.
>>
>> I guess that being in KSEG0 allows to use shorter pointer.  But in our
>> case the RAM is physically connected beyond 32bits, so it is not
>> accessible in KSEG0.
>
> For most system there should be a mirror of part of DDR which is accessible
> at KSEG0 and kernel runs from here. As per my interpretion of your code EyeQ5
> is also doing this? If not could you please briefly describe the memory map?
>
> For Kernel in KSEG0 the pointer is still 64bit but we can use fewer inst
> to load ABS pointer into register, see [1].
>

There is a kind of mirror but its physical address start at 0x8000000
so beyond the first 512MBytes that are used for KSEG0.

In short the 32bits mapping is the following:

 - the controllers registers of the SoC are located  until 0x8000000,
 - then from 0x8000000 to 0x10000000 there is the alias to low addresses
   of the DDR
 - then the SPIflash is mapped to from 0x10000000 to 0x20000000
 - after the PCIe Memory 32-bit addr space is from 0x20000000 to
   0x40000000

Gregory

> [1]: https://elinux.org/images/1/1f/New-tricks-mips-linux.pdf
Thomas Bogendoerfer Oct. 12, 2023, 3:34 p.m. UTC | #13
On Wed, Oct 04, 2023 at 06:10:29PM +0200, Gregory CLEMENT wrote:
> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> 
> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
> to trivially map first 1/2 GByte of physical memory. This memory
> used to run kernel. This mean, one should have memory installed
> in this area in order for Linux to work.
> 
> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
> to use virtual addresses from the XKPHYS segment for both cached
> and uncached access. XKPHYS allows to access 2^48 bytes of
> memory, thus allowing kernel to work with any memory
> configuration.

IMHO it doesn't make sense to introduce an option for a generic
kernel, which then renders the generic kernel useless on all
platforms other then yours.

Please don't use generic, but setup a new platform for it. Hopefully
we can get rid all of the weirdness in this patch.

Thomas.
Jiaxun Yang Oct. 12, 2023, 8:40 p.m. UTC | #14
在2023年10月11日十月 下午3:46,Gregory CLEMENT写道:
> Hello Jiaxun,
>
[...]
>
> There is a kind of mirror but its physical address start at 0x8000000
> so beyond the first 512MBytes that are used for KSEG0.

Really, KSEG0 range is 0x00000000 to 0x20000000, and 0x08000000 to 0x10000000
is definitely within that range.

But I'd agree that 0x08000000 to 0x10000000 (32MB) seems too small for kernel
text and data. So yeah, it makes sense to load kernel into XKPHYS.

My sugesstion is, kernel does not have to be aware of the mirror deisgn.
Say that you have DDR fully mapped at 0x100000000, you can split memory
space into two trunks: 0x08000000 to 0x10000000 and 0x102000000 to end
of the dram. Since memblock always allocate from first continuous range
in system, we can guarantee that ebase is allocated with in the first
trunk.

Thanks

>
> In short the 32bits mapping is the following:
>
>  - the controllers registers of the SoC are located  until 0x8000000,
>  - then from 0x8000000 to 0x10000000 there is the alias to low addresses
>    of the DDR
>  - then the SPIflash is mapped to from 0x10000000 to 0x20000000
>  - after the PCIe Memory 32-bit addr space is from 0x20000000 to
>    0x40000000
>
> Gregory
>
>> [1]: https://elinux.org/images/1/1f/New-tricks-mips-linux.pdf
>
> -- 
> Gregory Clement, Bootlin
> Embedded Linux and Kernel engineering
> http://bootlin.com
Jiaxun Yang Oct. 22, 2023, 11:39 a.m. UTC | #15
在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>
> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
> to trivially map first 1/2 GByte of physical memory. This memory
> used to run kernel. This mean, one should have memory installed
> in this area in order for Linux to work.
>
> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
> to use virtual addresses from the XKPHYS segment for both cached
> and uncached access. XKPHYS allows to access 2^48 bytes of
> memory, thus allowing kernel to work with any memory
> configuration.
>
> MIPS CPU sets KX bit in the CP0 status register at reset
> if RESET_BASE_MODE (BIT 1) set in the GCR_CL_RESET_BASE.
>
> Reset vector should fit into 32-bit. If reset vector put outside of
> KSEG1, BIT(1) should be set in this value.
>
> IRQ handler for CPU updated to generate 64-bit address for jump

Please use existing KBUILD_SYM32 symbol.

Thanks
- Jiaxun

>
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
>  arch/mips/Kconfig                 | 15 +++++++++++++
>  arch/mips/Makefile                |  4 ++++
>  arch/mips/generic/Platform        |  5 +++++
>  arch/mips/include/asm/addrspace.h | 12 ++++++++--
>  arch/mips/include/asm/mips-cm.h   |  1 +
>  arch/mips/include/asm/page.h      | 10 +++++++++
>  arch/mips/include/asm/vga.h       |  4 ++++
>  arch/mips/kernel/cps-vec.S        |  8 +++++++
>  arch/mips/kernel/genex.S          | 14 ++++++++++++
>  arch/mips/kernel/smp-cps.c        | 37 +++++++++++++++++++++++--------
>  arch/mips/kernel/traps.c          | 32 +++++++++++++++++++++++---
>  arch/mips/lib/uncached.c          | 10 +++++++++
>  arch/mips/mm/init.c               |  4 ++--
>  13 files changed, 140 insertions(+), 16 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index bc8421859006..92832bbcca5d 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2026,6 +2026,21 @@ config 64BIT
> 
>  endchoice
> 
> +config USE_XKPHYS
> +	bool "use virtual address from XKPHYS"
> +	depends on 64BIT
> +	default n
> +	help
> +	 By default, MIPS uses 32-bit compatible segments KSEG0 and KSEG1
> +	 to trivially map first 1/2 GByte of physical memory. This mean,
> +	 one should have memory installed in this area in order for Linux to
> +	 work. With this option selected, kernel uses virtual addresses from
> +	 the XKPHYS segment for both cached and uncached access. XKPHYS allows
> +	 to access 2^48 bytes of memory, thus allowing to work with any memory
> +	 configuration.
> +
> +	 Say N if not sure
> +
>  config MIPS_VA_BITS_48
>  	bool "48 bits virtual memory"
>  	depends on 64BIT
> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
> index f49807e1f19b..544ee8427cab 100644
> --- a/arch/mips/Makefile
> +++ b/arch/mips/Makefile
> @@ -303,6 +303,10 @@ ifdef CONFIG_64BIT
>      endif
>    endif
> 
> +  ifdef CONFIG_USE_XKPHYS
> +      KBUILD_SYM32 = n
> +  endif
> +
>    ifeq ($(KBUILD_SYM32), y)
>      cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
>    else
> diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
> index 0c03623f3897..2be9947814ad 100644
> --- a/arch/mips/generic/Platform
> +++ b/arch/mips/generic/Platform
> @@ -12,7 +12,12 @@
>  cflags-$(CONFIG_MACH_INGENIC_SOC)	+= 
> -I$(srctree)/arch/mips/include/asm/mach-ingenic
>  cflags-$(CONFIG_MIPS_GENERIC)	+= 
> -I$(srctree)/arch/mips/include/asm/mach-generic
> 
> +ifndef (CONFIG_USE_XKPHYS)
>  load-$(CONFIG_MIPS_GENERIC)	+= 0xffffffff80100000
> +else
> +load-$(CONFIG_MIPS_GENERIC)	+= 0xa800000080100000
> +endif
> +
>  all-$(CONFIG_MIPS_GENERIC)	+= vmlinux.gz.itb
> 
>  its-y					:= vmlinux.its.S
> diff --git a/arch/mips/include/asm/addrspace.h 
> b/arch/mips/include/asm/addrspace.h
> index 59a48c60a065..8dc500d8e66d 100644
> --- a/arch/mips/include/asm/addrspace.h
> +++ b/arch/mips/include/asm/addrspace.h
> @@ -65,10 +65,15 @@
>  #define XKSSEG			_CONST64_(0x4000000000000000)
>  #define XKPHYS			_CONST64_(0x8000000000000000)
>  #define XKSEG			_CONST64_(0xc000000000000000)
> +#if !defined(CONFIG_USE_XKPHYS)
>  #define CKSEG0			_CONST64_(0xffffffff80000000)
>  #define CKSEG1			_CONST64_(0xffffffffa0000000)
>  #define CKSSEG			_CONST64_(0xffffffffc0000000)
>  #define CKSEG3			_CONST64_(0xffffffffe0000000)
> +#else
> +#define CKSEG0			XKPHYS_CM_CACHED
> +#define CKSEG1			XKPHYS_CM_UNCACHED
> +#endif /* !defined(CONFIG_USE_XKPHYS) */
> 
>  #define CKSEG0ADDR(a)		(CPHYSADDR(a) | CKSEG0)
>  #define CKSEG1ADDR(a)		(CPHYSADDR(a) | CKSEG1)
> @@ -126,8 +131,11 @@
>  #define PHYS_TO_XKSEG_UNCACHED(p)	PHYS_TO_XKPHYS(K_CALG_UNCACHED, (p))
>  #define PHYS_TO_XKSEG_CACHED(p)		PHYS_TO_XKPHYS(K_CALG_COH_SHAREABLE, (p))
>  #define XKPHYS_TO_PHYS(p)		((p) & TO_PHYS_MASK)
> -#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS | (_ACAST64_(cm) << 59) | (a))
> -
> +#define XKPHYS_CM(cm)			(XKPHYS | (_ACAST64_(cm) << 59))
> +#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS_CM(cm) | (a))
> +#define XKPHYS_CM_CACHED		(XKPHYS_CM(K_CALG_COH_SHAREABLE))
> +#define XKPHYS_CM_UNCACHED		(XKPHYS_CM(K_CALG_UNCACHED))
> +#define IS_XKPHYS(a)			(((a) >> 62) == 2)
>  /*
>   * The ultimate limited of the 64-bit MIPS architecture:  2 bits for selecting
>   * the region, 3 bits for the CCA mode.  This leaves 59 bits of which the
> diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
> index 23c67c0871b1..15d8d69de455 100644
> --- a/arch/mips/include/asm/mips-cm.h
> +++ b/arch/mips/include/asm/mips-cm.h
> @@ -311,6 +311,7 @@ GCR_CX_ACCESSOR_RW(32, 0x018, other)
>  /* GCR_Cx_RESET_BASE - Configure where powered up cores will fetch from */
>  GCR_CX_ACCESSOR_RW(32, 0x020, reset_base)
>  #define CM_GCR_Cx_RESET_BASE_BEVEXCBASE		GENMASK(31, 12)
> +#define CM_GCR_Cx_RESET_BASE_MODE		BIT(1)
> 
>  /* GCR_Cx_ID - Identify the current core */
>  GCR_CX_ACCESSOR_RO(32, 0x028, id)
> diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
> index 5978a8dfb917..53b8306da571 100644
> --- a/arch/mips/include/asm/page.h
> +++ b/arch/mips/include/asm/page.h
> @@ -176,7 +176,11 @@ static inline unsigned long ___pa(unsigned long x)
>  		 * the compatibility segements ckseg0 or ckseg1, or it may
>  		 * be in xkphys.
>  		 */
> +#if defined(CONFIG_USE_XKPHYS)
> +		return XPHYSADDR(x);
> +#else
>  		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
> +#endif
>  	}
> 
>  	if (!IS_ENABLED(CONFIG_EVA)) {
> @@ -196,7 +200,11 @@ static inline unsigned long ___pa(unsigned long x)
>  	return x - PAGE_OFFSET + PHYS_OFFSET;
>  }
>  #define __pa(x)		___pa((unsigned long)(x))
> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
> +#define __va(x)		((void *)PHYS_TO_XKSEG_CACHED(x))
> +#else
>  #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
> +#endif
>  #include <asm/io.h>
> 
>  /*
> @@ -239,6 +247,8 @@ static inline unsigned long kaslr_offset(void)
>  	return __kaslr_offset;
>  }
> 
> +#define UNCAC_ADDR(addr)       (UNCAC_BASE + __pa(addr))
> +
>  #include <asm-generic/memory_model.h>
>  #include <asm-generic/getorder.h>
> 
> diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
> index 0136e0366698..e338e57d0784 100644
> --- a/arch/mips/include/asm/vga.h
> +++ b/arch/mips/include/asm/vga.h
> @@ -16,7 +16,11 @@
>   *	access the videoram directly without any black magic.
>   */
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +#define VGA_MAP_MEM(x, s)	UNCAC_ADDR(0x10000000L + (unsigned long)(x))
> +#else
>  #define VGA_MAP_MEM(x, s)	CKSEG1ADDR(0x10000000L + (unsigned long)(x))
> +#endif
> 
>  #define vga_readb(x)	(*(x))
>  #define vga_writeb(x, y)	(*(y) = (x))
> diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
> index 64ecfdac6580..541f31a43a7f 100644
> --- a/arch/mips/kernel/cps-vec.S
> +++ b/arch/mips/kernel/cps-vec.S
> @@ -554,7 +554,11 @@ LEAF(mips_cps_cache_init)
>  	mul	t1, t1, t0
>  	mul	t1, t1, t2
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	PTR_LI	a0, XKPHYS_CM_CACHED
> +#else
>  	li	a0, CKSEG0
> +#endif
>  	PTR_ADD	a1, a0, t1
>  1:	cache	Index_Store_Tag_I, 0(a0)
>  	PTR_ADD	a0, a0, t0
> @@ -581,7 +585,11 @@ icache_done:
>  	mul	t1, t1, t0
>  	mul	t1, t1, t2
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	PTR_LI	a0, XKPHYS_CM_CACHED
> +#else
>  	li	a0, CKSEG0
> +#endif
>  	PTR_ADDU a1, a0, t1
>  	PTR_SUBU a1, a1, t0
>  1:	cache	Index_Store_Tag_D, 0(a0)
> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index b6de8e88c1bd..a002058e1838 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -272,11 +272,25 @@ NESTED(except_vec_vi, 0, sp)
>  	.set	push
>  	.set	noreorder
>  	PTR_LA	v1, except_vec_vi_handler
> +#if defined(CONFIG_USE_XKPHYS)
> +FEXPORT(except_vec_vi_63_48)
> +	lui	v0, 0		/* Patched - bits 63:48 */
> +FEXPORT(except_vec_vi_47_32)
> +	ori	v0, 0		/* Patched - bits 47:32 */
> +	dsll	v0, v0, 0x10
> +FEXPORT(except_vec_vi_31_16)
> +	ori	v0, 0		/* Patched - bits 31:16 */
> +	dsll	v0, v0, 0x10
> +	jr	v1
> +FEXPORT(except_vec_vi_15_0)
> +	ori	v0, 0		/* Patched - bits 15:0 */
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  FEXPORT(except_vec_vi_lui)
>  	lui	v0, 0		/* Patched */
>  	jr	v1
>  FEXPORT(except_vec_vi_ori)
>  	 ori	v0, 0		/* Patched */
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	.set	pop
>  	END(except_vec_vi)
>  EXPORT(except_vec_vi_end)
> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> index dd55d59b88db..47e76722a306 100644
> --- a/arch/mips/kernel/smp-cps.c
> +++ b/arch/mips/kernel/smp-cps.c
> @@ -34,10 +34,33 @@ static unsigned __init core_vpe_count(unsigned int 
> cluster, unsigned core)
>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>  }
> 
> +/**
> + * plat_core_entry - query reset vector for NMI/reset
> + *
> + * Returns low 32 bits of the reset vector
> + *
> + * This is used to fill 2 registers:
> + * - BEV Base (GCR_BEV_BASE) Offset: 0x0680
> + * - VP Local Reset Exception Base (GCR_CL_RESET_BASE,GCR_CO_RESET_BASE)
> + *   Offset: 0x0020 (0x2020 relative to GCR_BASE_ADDR)
> + *
> + * In both registers, BIT(1) should be set in case it uses address in XKPHYS
> + * (as opposed to KSEG1). This bit defined as CM_GCR_Cx_RESET_BASE_MODE,
> + * using it unconditionally because for GCR_BEV_BASE its value is the same
> + */
> +static u32 plat_core_entry(void)
> +{
> +#if defined(CONFIG_USE_XKPHYS)
> +	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
> +			| CM_GCR_Cx_RESET_BASE_MODE;
> +#else
> +	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> +#endif
> +}
> +
>  static void __init cps_smp_setup(void)
>  {
>  	unsigned int nclusters, ncores, nvpes, core_vpes;
> -	unsigned long core_entry;
>  	int cl, c, v;
> 
>  	/* Detect & record VPE topology */
> @@ -94,10 +117,8 @@ static void __init cps_smp_setup(void)
>  	/* Make core 0 coherent with everything */
>  	write_gcr_cl_coherence(0xff);
> 
> -	if (mips_cm_revision() >= CM_REV_CM3) {
> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> -		write_gcr_bev_base(core_entry);
> -	}
> +	if (mips_cm_revision() >= CM_REV_CM3)
> +		write_gcr_bev_base(plat_core_entry());
> 
>  #ifdef CONFIG_MIPS_MT_FPAFF
>  	/* If we have an FPU, enroll ourselves in the FPU-full mask */
> @@ -213,7 +234,7 @@ static void boot_core(unsigned int core, unsigned 
> int vpe_id)
>  	mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
> 
>  	/* Set its reset vector */
> -	write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
> +	write_gcr_co_reset_base(plat_core_entry());
> 
>  	/* Ensure its coherency is disabled */
>  	write_gcr_co_coherence(0);
> @@ -290,7 +311,6 @@ static int cps_boot_secondary(int cpu, struct 
> task_struct *idle)
>  	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
>  	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
>  	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
> -	unsigned long core_entry;
>  	unsigned int remote;
>  	int err;
> 
> @@ -314,8 +334,7 @@ static int cps_boot_secondary(int cpu, struct 
> task_struct *idle)
> 
>  	if (cpu_has_vp) {
>  		mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> -		write_gcr_co_reset_base(core_entry);
> +		write_gcr_co_reset_base(plat_core_entry());
>  		mips_cm_unlock_other();
>  	}
> 
> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> index 246c6a6b0261..875594843626 100644
> --- a/arch/mips/kernel/traps.c
> +++ b/arch/mips/kernel/traps.c
> @@ -2091,11 +2091,20 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  		 * If no shadow set is selected then use the default handler
>  		 * that does normal register saving and standard interrupt exit
>  		 */
> -		extern const u8 except_vec_vi[], except_vec_vi_lui[];
> -		extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
> +		extern const u8 except_vec_vi[], except_vec_vi_end[];
>  		extern const u8 rollback_except_vec_vi[];
>  		const u8 *vec_start = using_rollback_handler() ?
>  				      rollback_except_vec_vi : except_vec_vi;
> +		const int handler_len = except_vec_vi_end - vec_start;
> +#if defined(CONFIG_USE_XKPHYS)
> +		extern const u8 except_vec_vi_63_48[], except_vec_vi_47_32[];
> +		extern const u8 except_vec_vi_31_16[], except_vec_vi_15_0[];
> +		const int offset_63_48 = except_vec_vi_63_48 - vec_start;
> +		const int offset_47_32 = except_vec_vi_47_32 - vec_start;
> +		const int offset_31_16 = except_vec_vi_31_16 - vec_start;
> +		const int offset_15_0  = except_vec_vi_15_0  - vec_start;
> +#else /* defined(CONFIG_USE_XKPHYS) */
> +		extern const u8 except_vec_vi_lui[], except_vec_vi_ori[];
>  #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
>  		const int lui_offset = except_vec_vi_lui - vec_start + 2;
>  		const int ori_offset = except_vec_vi_ori - vec_start + 2;
> @@ -2103,7 +2112,7 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  		const int lui_offset = except_vec_vi_lui - vec_start;
>  		const int ori_offset = except_vec_vi_ori - vec_start;
>  #endif
> -		const int handler_len = except_vec_vi_end - vec_start;
> +#endif /* defined(CONFIG_USE_XKPHYS) */
> 
>  		if (handler_len > VECTORSPACING) {
>  			/*
> @@ -2119,10 +2128,21 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  #else
>  				handler_len);
>  #endif
> +#if defined(CONFIG_USE_XKPHYS)
> +		h = (u16 *)(b + offset_63_48);
> +		*h = (handler >> 48) & 0xffff;
> +		h = (u16 *)(b + offset_47_32);
> +		*h = (handler >> 32) & 0xffff;
> +		h = (u16 *)(b + offset_31_16);
> +		*h = (handler >> 16) & 0xffff;
> +		h = (u16 *)(b + offset_15_0);
> +		*h = (handler >> 0) & 0xffff;
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  		h = (u16 *)(b + lui_offset);
>  		*h = (handler >> 16) & 0xffff;
>  		h = (u16 *)(b + ori_offset);
>  		*h = (handler & 0xffff);
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  		local_flush_icache_range((unsigned long)b,
>  					 (unsigned long)(b+handler_len));
>  	}
> @@ -2332,7 +2352,11 @@ static const char panic_null_cerr[] =
>  void set_uncached_handler(unsigned long offset, void *addr,
>  	unsigned long size)
>  {
> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
> +	unsigned long uncached_ebase = UNCAC_ADDR(ebase);
> +#else
>  	unsigned long uncached_ebase = CKSEG1ADDR(ebase);
> +#endif
> 
>  	if (!addr)
>  		panic(panic_null_cerr);
> @@ -2384,9 +2408,11 @@ void __init trap_init(void)
>  		 * EVA is special though as it allows segments to be rearranged
>  		 * and to become uncached during cache error handling.
>  		 */
> +#if !defined(CONFIG_USE_XKPHYS)
>  		if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
>  			ebase = CKSEG0ADDR(ebase_pa);
>  		else
> +#endif
>  			ebase = (unsigned long)phys_to_virt(ebase_pa);
>  	}
> 
> diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c
> index f80a67c092b6..8a78348a2dd7 100644
> --- a/arch/mips/lib/uncached.c
> +++ b/arch/mips/lib/uncached.c
> @@ -44,6 +44,10 @@ unsigned long run_uncached(void *func)
> 
>  	__asm__("move %0, $sp" : "=r" (sp));
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	if (IS_XKPHYS(sp))
> +		usp = UNCAC_ADDR(sp);
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  	if (sp >= (long)CKSEG0 && sp < (long)CKSEG2)
>  		usp = CKSEG1ADDR(sp);
>  #ifdef CONFIG_64BIT
> @@ -52,10 +56,15 @@ unsigned long run_uncached(void *func)
>  		usp = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>  				     XKPHYS_TO_PHYS((long long)sp));
>  #endif
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	else {
>  		BUG();
>  		usp = sp;
>  	}
> +#if defined(CONFIG_USE_XKPHYS)
> +	if (IS_XKPHYS(lfunc))
> +		ufunc = UNCAC_ADDR(lfunc);
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  	if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2)
>  		ufunc = CKSEG1ADDR(lfunc);
>  #ifdef CONFIG_64BIT
> @@ -64,6 +73,7 @@ unsigned long run_uncached(void *func)
>  		ufunc = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>  				       XKPHYS_TO_PHYS((long long)lfunc));
>  #endif
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	else {
>  		BUG();
>  		ufunc = lfunc;
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 5dcb525a8995..eb57283ec4e0 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -427,7 +427,7 @@ void __init paging_init(void)
>  	free_area_init(max_zone_pfns);
>  }
> 
> -#ifdef CONFIG_64BIT
> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>  static struct kcore_list kcore_kseg0;
>  #endif
> 
> @@ -470,7 +470,7 @@ void __init mem_init(void)
>  	setup_zero_pages();	/* Setup zeroed pages.  */
>  	mem_init_free_highmem();
> 
> -#ifdef CONFIG_64BIT
> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>  	if ((unsigned long) &_text > (unsigned long) CKSEG0)
>  		/* The -4 is a hack so that user tools don't have to handle
>  		   the overflow.  */
> -- 
> 2.40.1
Jiaxun Yang Oct. 22, 2023, 11:52 a.m. UTC | #16
在2023年10月12日十月 下午4:34,Thomas Bogendoerfer写道:
> On Wed, Oct 04, 2023 at 06:10:29PM +0200, Gregory CLEMENT wrote:
>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> 
>> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
>> to trivially map first 1/2 GByte of physical memory. This memory
>> used to run kernel. This mean, one should have memory installed
>> in this area in order for Linux to work.
>> 
>> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
>> to use virtual addresses from the XKPHYS segment for both cached
>> and uncached access. XKPHYS allows to access 2^48 bytes of
>> memory, thus allowing kernel to work with any memory
>> configuration.
>
> IMHO it doesn't make sense to introduce an option for a generic
> kernel, which then renders the generic kernel useless on all
> platforms other then yours.

Actually it won't. Many 64bit platforms do support load kernel to
XKPHYS, including boston and Loongson64, so it's still a generic
function.

IMO this patch won't break support for any generic platform.

>
> Please don't use generic, but setup a new platform for it. Hopefully
> we can get rid all of the weirdness in this patch.

Perhaps better to introduce a Kconfig option to allow manipulation of
kernel load address.

Thanks
- Jiaxun

>
> Thomas.
>
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Jiaxun Yang Oct. 22, 2023, 4:42 p.m. UTC | #17
在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>
> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
> to trivially map first 1/2 GByte of physical memory. This memory
> used to run kernel. This mean, one should have memory installed
> in this area in order for Linux to work.
>
> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
> to use virtual addresses from the XKPHYS segment for both cached
> and uncached access. XKPHYS allows to access 2^48 bytes of
> memory, thus allowing kernel to work with any memory
> configuration.
>
> MIPS CPU sets KX bit in the CP0 status register at reset
> if RESET_BASE_MODE (BIT 1) set in the GCR_CL_RESET_BASE.
>
> Reset vector should fit into 32-bit. If reset vector put outside of
> KSEG1, BIT(1) should be set in this value.
>
> IRQ handler for CPU updated to generate 64-bit address for jump

So I just spend some time to review and test this patch on QEMU,
comments below:

>
> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
> ---
>  arch/mips/Kconfig                 | 15 +++++++++++++
>  arch/mips/Makefile                |  4 ++++
>  arch/mips/generic/Platform        |  5 +++++
>  arch/mips/include/asm/addrspace.h | 12 ++++++++--
>  arch/mips/include/asm/mips-cm.h   |  1 +
>  arch/mips/include/asm/page.h      | 10 +++++++++
>  arch/mips/include/asm/vga.h       |  4 ++++
>  arch/mips/kernel/cps-vec.S        |  8 +++++++
>  arch/mips/kernel/genex.S          | 14 ++++++++++++
>  arch/mips/kernel/smp-cps.c        | 37 +++++++++++++++++++++++--------
>  arch/mips/kernel/traps.c          | 32 +++++++++++++++++++++++---
>  arch/mips/lib/uncached.c          | 10 +++++++++
>  arch/mips/mm/init.c               |  4 ++--
>  13 files changed, 140 insertions(+), 16 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index bc8421859006..92832bbcca5d 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -2026,6 +2026,21 @@ config 64BIT
> 
>  endchoice
> 
> +config USE_XKPHYS
> +	bool "use virtual address from XKPHYS"
> +	depends on 64BIT
> +	default n
> +	help
> +	 By default, MIPS uses 32-bit compatible segments KSEG0 and KSEG1
> +	 to trivially map first 1/2 GByte of physical memory. This mean,
> +	 one should have memory installed in this area in order for Linux to
> +	 work. With this option selected, kernel uses virtual addresses from
> +	 the XKPHYS segment for both cached and uncached access. XKPHYS allows
> +	 to access 2^48 bytes of memory, thus allowing to work with any memory
> +	 configuration.
> +
> +	 Say N if not sure
> +
>  config MIPS_VA_BITS_48
>  	bool "48 bits virtual memory"
>  	depends on 64BIT
> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
> index f49807e1f19b..544ee8427cab 100644
> --- a/arch/mips/Makefile
> +++ b/arch/mips/Makefile
> @@ -303,6 +303,10 @@ ifdef CONFIG_64BIT
>      endif
>    endif
> 
> +  ifdef CONFIG_USE_XKPHYS
> +      KBUILD_SYM32 = n
> +  endif
> +
>    ifeq ($(KBUILD_SYM32), y)
>      cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
>    else
> diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
> index 0c03623f3897..2be9947814ad 100644
> --- a/arch/mips/generic/Platform
> +++ b/arch/mips/generic/Platform
> @@ -12,7 +12,12 @@
>  cflags-$(CONFIG_MACH_INGENIC_SOC)	+= 
> -I$(srctree)/arch/mips/include/asm/mach-ingenic
>  cflags-$(CONFIG_MIPS_GENERIC)	+= 
> -I$(srctree)/arch/mips/include/asm/mach-generic
> 
> +ifndef (CONFIG_USE_XKPHYS)
>  load-$(CONFIG_MIPS_GENERIC)	+= 0xffffffff80100000
> +else
> +load-$(CONFIG_MIPS_GENERIC)	+= 0xa800000080100000
> +endif

Better to make load address configurable.

> +
>  all-$(CONFIG_MIPS_GENERIC)	+= vmlinux.gz.itb
> 
>  its-y					:= vmlinux.its.S
> diff --git a/arch/mips/include/asm/addrspace.h 
> b/arch/mips/include/asm/addrspace.h
> index 59a48c60a065..8dc500d8e66d 100644
> --- a/arch/mips/include/asm/addrspace.h
> +++ b/arch/mips/include/asm/addrspace.h
> @@ -65,10 +65,15 @@
>  #define XKSSEG			_CONST64_(0x4000000000000000)
>  #define XKPHYS			_CONST64_(0x8000000000000000)
>  #define XKSEG			_CONST64_(0xc000000000000000)
> +#if !defined(CONFIG_USE_XKPHYS)
>  #define CKSEG0			_CONST64_(0xffffffff80000000)
>  #define CKSEG1			_CONST64_(0xffffffffa0000000)
>  #define CKSSEG			_CONST64_(0xffffffffc0000000)
>  #define CKSEG3			_CONST64_(0xffffffffe0000000)
> +#else
> +#define CKSEG0			XKPHYS_CM_CACHED
> +#define CKSEG1			XKPHYS_CM_UNCACHED
> +#endif /* !defined(CONFIG_USE_XKPHYS) */
> 
>  #define CKSEG0ADDR(a)		(CPHYSADDR(a) | CKSEG0)
>  #define CKSEG1ADDR(a)		(CPHYSADDR(a) | CKSEG1)
> @@ -126,8 +131,11 @@
>  #define PHYS_TO_XKSEG_UNCACHED(p)	PHYS_TO_XKPHYS(K_CALG_UNCACHED, (p))
>  #define PHYS_TO_XKSEG_CACHED(p)		PHYS_TO_XKPHYS(K_CALG_COH_SHAREABLE, (p))
>  #define XKPHYS_TO_PHYS(p)		((p) & TO_PHYS_MASK)
> -#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS | (_ACAST64_(cm) << 59) | (a))
> -
> +#define XKPHYS_CM(cm)			(XKPHYS | (_ACAST64_(cm) << 59))
> +#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS_CM(cm) | (a))
> +#define XKPHYS_CM_CACHED		(XKPHYS_CM(K_CALG_COH_SHAREABLE))
> +#define XKPHYS_CM_UNCACHED		(XKPHYS_CM(K_CALG_UNCACHED))
> +#define IS_XKPHYS(a)			(((a) >> 62) == 2)
>  /*
>   * The ultimate limited of the 64-bit MIPS architecture:  2 bits for selecting
>   * the region, 3 bits for the CCA mode.  This leaves 59 bits of which the
> diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
> index 23c67c0871b1..15d8d69de455 100644
> --- a/arch/mips/include/asm/mips-cm.h
> +++ b/arch/mips/include/asm/mips-cm.h
> @@ -311,6 +311,7 @@ GCR_CX_ACCESSOR_RW(32, 0x018, other)
>  /* GCR_Cx_RESET_BASE - Configure where powered up cores will fetch from */
>  GCR_CX_ACCESSOR_RW(32, 0x020, reset_base)
>  #define CM_GCR_Cx_RESET_BASE_BEVEXCBASE		GENMASK(31, 12)
> +#define CM_GCR_Cx_RESET_BASE_MODE		BIT(1)
> 
>  /* GCR_Cx_ID - Identify the current core */
>  GCR_CX_ACCESSOR_RO(32, 0x028, id)
> diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
> index 5978a8dfb917..53b8306da571 100644
> --- a/arch/mips/include/asm/page.h
> +++ b/arch/mips/include/asm/page.h
> @@ -176,7 +176,11 @@ static inline unsigned long ___pa(unsigned long x)
>  		 * the compatibility segements ckseg0 or ckseg1, or it may
>  		 * be in xkphys.
>  		 */
> +#if defined(CONFIG_USE_XKPHYS)
> +		return XPHYSADDR(x);
> +#else
>  		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
> +#endif

Dangerous, there might be some code passing KSEG0/1 address to __pa, so
we should not disregard it.

>  	}
> 
>  	if (!IS_ENABLED(CONFIG_EVA)) {
> @@ -196,7 +200,11 @@ static inline unsigned long ___pa(unsigned long x)
>  	return x - PAGE_OFFSET + PHYS_OFFSET;
>  }
>  #define __pa(x)		___pa((unsigned long)(x))
> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
> +#define __va(x)		((void *)PHYS_TO_XKSEG_CACHED(x))
> +#else
>  #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
> +#endif

PAGE_OFFSET resolves to CAC_BASE anyway, so unnecessary.

>  #include <asm/io.h>
> 
>  /*
> @@ -239,6 +247,8 @@ static inline unsigned long kaslr_offset(void)
>  	return __kaslr_offset;
>  }
> 
> +#define UNCAC_ADDR(addr)       (UNCAC_BASE + __pa(addr))
> +
>  #include <asm-generic/memory_model.h>
>  #include <asm-generic/getorder.h>
> 
> diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
> index 0136e0366698..e338e57d0784 100644
> --- a/arch/mips/include/asm/vga.h
> +++ b/arch/mips/include/asm/vga.h
> @@ -16,7 +16,11 @@
>   *	access the videoram directly without any black magic.
>   */
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +#define VGA_MAP_MEM(x, s)	UNCAC_ADDR(0x10000000L + (unsigned long)(x))
> +#else
>  #define VGA_MAP_MEM(x, s)	CKSEG1ADDR(0x10000000L + (unsigned long)(x))
> +#endif

VGA_MAP_MEM intends to work on some really legacy systems, it won't break
your platform, so better leave it as is.

> 
>  #define vga_readb(x)	(*(x))
>  #define vga_writeb(x, y)	(*(y) = (x))
> diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
> index 64ecfdac6580..541f31a43a7f 100644
> --- a/arch/mips/kernel/cps-vec.S
> +++ b/arch/mips/kernel/cps-vec.S
> @@ -554,7 +554,11 @@ LEAF(mips_cps_cache_init)
>  	mul	t1, t1, t0
>  	mul	t1, t1, t2
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	PTR_LI	a0, XKPHYS_CM_CACHED
> +#else
>  	li	a0, CKSEG0
> +#endif

Unnecessary, KSEG0 address here are just for matching cache ways,
so there is no difference to use KSEG0 or XKPHYS.

If you are using XKPHYS here you must extarct CCA from bootinfo
or CP0 as it may varies on different systems.

>  	PTR_ADD	a1, a0, t1
>  1:	cache	Index_Store_Tag_I, 0(a0)
>  	PTR_ADD	a0, a0, t0
> @@ -581,7 +585,11 @@ icache_done:
>  	mul	t1, t1, t0
>  	mul	t1, t1, t2
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	PTR_LI	a0, XKPHYS_CM_CACHED
> +#else

Ditto.

>  	li	a0, CKSEG0
> +#endif
>  	PTR_ADDU a1, a0, t1
>  	PTR_SUBU a1, a1, t0
>  1:	cache	Index_Store_Tag_D, 0(a0)
> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
> index b6de8e88c1bd..a002058e1838 100644
> --- a/arch/mips/kernel/genex.S
> +++ b/arch/mips/kernel/genex.S
> @@ -272,11 +272,25 @@ NESTED(except_vec_vi, 0, sp)
>  	.set	push
>  	.set	noreorder
>  	PTR_LA	v1, except_vec_vi_handler
> +#if defined(CONFIG_USE_XKPHYS)
> +FEXPORT(except_vec_vi_63_48)
> +	lui	v0, 0		/* Patched - bits 63:48 */
> +FEXPORT(except_vec_vi_47_32)
> +	ori	v0, 0		/* Patched - bits 47:32 */
> +	dsll	v0, v0, 0x10
> +FEXPORT(except_vec_vi_31_16)
> +	ori	v0, 0		/* Patched - bits 31:16 */
> +	dsll	v0, v0, 0x10
> +	jr	v1
> +FEXPORT(except_vec_vi_15_0)
> +	ori	v0, 0		/* Patched - bits 15:0 */
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  FEXPORT(except_vec_vi_lui)
>  	lui	v0, 0		/* Patched */
>  	jr	v1
>  FEXPORT(except_vec_vi_ori)
>  	 ori	v0, 0		/* Patched */
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	.set	pop
>  	END(except_vec_vi)
>  EXPORT(except_vec_vi_end)
> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
> index dd55d59b88db..47e76722a306 100644
> --- a/arch/mips/kernel/smp-cps.c
> +++ b/arch/mips/kernel/smp-cps.c
> @@ -34,10 +34,33 @@ static unsigned __init core_vpe_count(unsigned int 
> cluster, unsigned core)
>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>  }
> 
> +/**
> + * plat_core_entry - query reset vector for NMI/reset
> + *
> + * Returns low 32 bits of the reset vector
> + *
> + * This is used to fill 2 registers:
> + * - BEV Base (GCR_BEV_BASE) Offset: 0x0680
> + * - VP Local Reset Exception Base (GCR_CL_RESET_BASE,GCR_CO_RESET_BASE)
> + *   Offset: 0x0020 (0x2020 relative to GCR_BASE_ADDR)
> + *
> + * In both registers, BIT(1) should be set in case it uses address in XKPHYS
> + * (as opposed to KSEG1). This bit defined as CM_GCR_Cx_RESET_BASE_MODE,
> + * using it unconditionally because for GCR_BEV_BASE its value is the same
> + */
> +static u32 plat_core_entry(void)
> +{
> +#if defined(CONFIG_USE_XKPHYS)
> +	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
> +			| CM_GCR_Cx_RESET_BASE_MODE;
> +#else
> +	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> +#endif

This is a CM3 feature, so perhaps we should handle it in a general
way.

> +}
> +
>  static void __init cps_smp_setup(void)
>  {
>  	unsigned int nclusters, ncores, nvpes, core_vpes;
> -	unsigned long core_entry;
>  	int cl, c, v;
> 
>  	/* Detect & record VPE topology */
> @@ -94,10 +117,8 @@ static void __init cps_smp_setup(void)
>  	/* Make core 0 coherent with everything */
>  	write_gcr_cl_coherence(0xff);
> 
> -	if (mips_cm_revision() >= CM_REV_CM3) {
> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> -		write_gcr_bev_base(core_entry);
> -	}
> +	if (mips_cm_revision() >= CM_REV_CM3)
> +		write_gcr_bev_base(plat_core_entry());
> 
>  #ifdef CONFIG_MIPS_MT_FPAFF
>  	/* If we have an FPU, enroll ourselves in the FPU-full mask */
> @@ -213,7 +234,7 @@ static void boot_core(unsigned int core, unsigned 
> int vpe_id)
>  	mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
> 
>  	/* Set its reset vector */
> -	write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
> +	write_gcr_co_reset_base(plat_core_entry());
> 
>  	/* Ensure its coherency is disabled */
>  	write_gcr_co_coherence(0);
> @@ -290,7 +311,6 @@ static int cps_boot_secondary(int cpu, struct 
> task_struct *idle)
>  	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
>  	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
>  	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
> -	unsigned long core_entry;
>  	unsigned int remote;
>  	int err;
> 
> @@ -314,8 +334,7 @@ static int cps_boot_secondary(int cpu, struct 
> task_struct *idle)
> 
>  	if (cpu_has_vp) {
>  		mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
> -		write_gcr_co_reset_base(core_entry);
> +		write_gcr_co_reset_base(plat_core_entry());
>  		mips_cm_unlock_other();
>  	}
> 
> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
> index 246c6a6b0261..875594843626 100644
> --- a/arch/mips/kernel/traps.c
> +++ b/arch/mips/kernel/traps.c
> @@ -2091,11 +2091,20 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  		 * If no shadow set is selected then use the default handler
>  		 * that does normal register saving and standard interrupt exit
>  		 */
> -		extern const u8 except_vec_vi[], except_vec_vi_lui[];
> -		extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
> +		extern const u8 except_vec_vi[], except_vec_vi_end[];
>  		extern const u8 rollback_except_vec_vi[];
>  		const u8 *vec_start = using_rollback_handler() ?
>  				      rollback_except_vec_vi : except_vec_vi;
> +		const int handler_len = except_vec_vi_end - vec_start;
> +#if defined(CONFIG_USE_XKPHYS)
> +		extern const u8 except_vec_vi_63_48[], except_vec_vi_47_32[];
> +		extern const u8 except_vec_vi_31_16[], except_vec_vi_15_0[];
> +		const int offset_63_48 = except_vec_vi_63_48 - vec_start;
> +		const int offset_47_32 = except_vec_vi_47_32 - vec_start;
> +		const int offset_31_16 = except_vec_vi_31_16 - vec_start;
> +		const int offset_15_0  = except_vec_vi_15_0  - vec_start;
> +#else /* defined(CONFIG_USE_XKPHYS) */
> +		extern const u8 except_vec_vi_lui[], except_vec_vi_ori[];
>  #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
>  		const int lui_offset = except_vec_vi_lui - vec_start + 2;
>  		const int ori_offset = except_vec_vi_ori - vec_start + 2;
> @@ -2103,7 +2112,7 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  		const int lui_offset = except_vec_vi_lui - vec_start;
>  		const int ori_offset = except_vec_vi_ori - vec_start;
>  #endif
> -		const int handler_len = except_vec_vi_end - vec_start;
> +#endif /* defined(CONFIG_USE_XKPHYS) */
> 
>  		if (handler_len > VECTORSPACING) {
>  			/*
> @@ -2119,10 +2128,21 @@ static void *set_vi_srs_handler(int n, 
> vi_handler_t addr, int srs)
>  #else
>  				handler_len);
>  #endif
> +#if defined(CONFIG_USE_XKPHYS)
> +		h = (u16 *)(b + offset_63_48);
> +		*h = (handler >> 48) & 0xffff;
> +		h = (u16 *)(b + offset_47_32);
> +		*h = (handler >> 32) & 0xffff;
> +		h = (u16 *)(b + offset_31_16);
> +		*h = (handler >> 16) & 0xffff;
> +		h = (u16 *)(b + offset_15_0);
> +		*h = (handler >> 0) & 0xffff;
> +#else /* defined(CONFIG_USE_XKPHYS) */
>  		h = (u16 *)(b + lui_offset);
>  		*h = (handler >> 16) & 0xffff;
>  		h = (u16 *)(b + ori_offset);
>  		*h = (handler & 0xffff);
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  		local_flush_icache_range((unsigned long)b,
>  					 (unsigned long)(b+handler_len));
>  	}
> @@ -2332,7 +2352,11 @@ static const char panic_null_cerr[] =
>  void set_uncached_handler(unsigned long offset, void *addr,
>  	unsigned long size)
>  {
> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
> +	unsigned long uncached_ebase = UNCAC_ADDR(ebase);
> +#else
>  	unsigned long uncached_ebase = CKSEG1ADDR(ebase);
> +#endif
> 
>  	if (!addr)
>  		panic(panic_null_cerr);
> @@ -2384,9 +2408,11 @@ void __init trap_init(void)
>  		 * EVA is special though as it allows segments to be rearranged
>  		 * and to become uncached during cache error handling.
>  		 */
> +#if !defined(CONFIG_USE_XKPHYS)
>  		if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
>  			ebase = CKSEG0ADDR(ebase_pa);
>  		else
> +#endif
>  			ebase = (unsigned long)phys_to_virt(ebase_pa);
>  	}
> 
> diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c
> index f80a67c092b6..8a78348a2dd7 100644
> --- a/arch/mips/lib/uncached.c
> +++ b/arch/mips/lib/uncached.c
> @@ -44,6 +44,10 @@ unsigned long run_uncached(void *func)
> 
>  	__asm__("move %0, $sp" : "=r" (sp));
> 
> +#if defined(CONFIG_USE_XKPHYS)
> +	if (IS_XKPHYS(sp))
> +		usp = UNCAC_ADDR(sp);

Unnecessary, the else if later is actually handling XKPHYS sp.

> +#else /* defined(CONFIG_USE_XKPHYS) */
>  	if (sp >= (long)CKSEG0 && sp < (long)CKSEG2)
>  		usp = CKSEG1ADDR(sp);
>  #ifdef CONFIG_64BIT
> @@ -52,10 +56,15 @@ unsigned long run_uncached(void *func)
>  		usp = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>  				     XKPHYS_TO_PHYS((long long)sp));
>  #endif
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	else {
>  		BUG();
>  		usp = sp;
>  	}
> +#if defined(CONFIG_USE_XKPHYS)
> +	if (IS_XKPHYS(lfunc))
> +		ufunc = UNCAC_ADDR(lfunc);

ditto.

> +#else /* defined(CONFIG_USE_XKPHYS) */
>  	if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2)
>  		ufunc = CKSEG1ADDR(lfunc);
>  #ifdef CONFIG_64BIT
> @@ -64,6 +73,7 @@ unsigned long run_uncached(void *func)
>  		ufunc = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>  				       XKPHYS_TO_PHYS((long long)lfunc));
>  #endif
> +#endif /* defined(CONFIG_USE_XKPHYS) */
>  	else {
>  		BUG();
>  		ufunc = lfunc;
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 5dcb525a8995..eb57283ec4e0 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -427,7 +427,7 @@ void __init paging_init(void)
>  	free_area_init(max_zone_pfns);
>  }
> 
> -#ifdef CONFIG_64BIT
> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>  static struct kcore_list kcore_kseg0;
>  #endif
> 
> @@ -470,7 +470,7 @@ void __init mem_init(void)
>  	setup_zero_pages();	/* Setup zeroed pages.  */
>  	mem_init_free_highmem();
> 
> -#ifdef CONFIG_64BIT
> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>  	if ((unsigned long) &_text > (unsigned long) CKSEG0)
>  		/* The -4 is a hack so that user tools don't have to handle
>  		   the overflow.  */
> -- 
> 2.40.1

Thanks.
Gregory CLEMENT Oct. 23, 2023, 3:45 p.m. UTC | #18
Hello Jiaxun,

> 在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>>
>> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
>> to trivially map first 1/2 GByte of physical memory. This memory
>> used to run kernel. This mean, one should have memory installed
>> in this area in order for Linux to work.
>>
>> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
>> to use virtual addresses from the XKPHYS segment for both cached
>> and uncached access. XKPHYS allows to access 2^48 bytes of
>> memory, thus allowing kernel to work with any memory
>> configuration.
>>
>> MIPS CPU sets KX bit in the CP0 status register at reset
>> if RESET_BASE_MODE (BIT 1) set in the GCR_CL_RESET_BASE.
>>
>> Reset vector should fit into 32-bit. If reset vector put outside of
>> KSEG1, BIT(1) should be set in this value.
>>
>> IRQ handler for CPU updated to generate 64-bit address for jump
>
> Please use existing KBUILD_SYM32 symbol.

Could you add more detail ?

Where do you think KBUILD_SYM32 symbol should be used ?

Gregory


>
> Thanks
> - Jiaxun
>
>>
>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> ---
>>  arch/mips/Kconfig                 | 15 +++++++++++++
>>  arch/mips/Makefile                |  4 ++++
>>  arch/mips/generic/Platform        |  5 +++++
>>  arch/mips/include/asm/addrspace.h | 12 ++++++++--
>>  arch/mips/include/asm/mips-cm.h   |  1 +
>>  arch/mips/include/asm/page.h      | 10 +++++++++
>>  arch/mips/include/asm/vga.h       |  4 ++++
>>  arch/mips/kernel/cps-vec.S        |  8 +++++++
>>  arch/mips/kernel/genex.S          | 14 ++++++++++++
>>  arch/mips/kernel/smp-cps.c        | 37 +++++++++++++++++++++++--------
>>  arch/mips/kernel/traps.c          | 32 +++++++++++++++++++++++---
>>  arch/mips/lib/uncached.c          | 10 +++++++++
>>  arch/mips/mm/init.c               |  4 ++--
>>  13 files changed, 140 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>> index bc8421859006..92832bbcca5d 100644
>> --- a/arch/mips/Kconfig
>> +++ b/arch/mips/Kconfig
>> @@ -2026,6 +2026,21 @@ config 64BIT
>> 
>>  endchoice
>> 
>> +config USE_XKPHYS
>> +	bool "use virtual address from XKPHYS"
>> +	depends on 64BIT
>> +	default n
>> +	help
>> +	 By default, MIPS uses 32-bit compatible segments KSEG0 and KSEG1
>> +	 to trivially map first 1/2 GByte of physical memory. This mean,
>> +	 one should have memory installed in this area in order for Linux to
>> +	 work. With this option selected, kernel uses virtual addresses from
>> +	 the XKPHYS segment for both cached and uncached access. XKPHYS allows
>> +	 to access 2^48 bytes of memory, thus allowing to work with any memory
>> +	 configuration.
>> +
>> +	 Say N if not sure
>> +
>>  config MIPS_VA_BITS_48
>>  	bool "48 bits virtual memory"
>>  	depends on 64BIT
>> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
>> index f49807e1f19b..544ee8427cab 100644
>> --- a/arch/mips/Makefile
>> +++ b/arch/mips/Makefile
>> @@ -303,6 +303,10 @@ ifdef CONFIG_64BIT
>>      endif
>>    endif
>> 
>> +  ifdef CONFIG_USE_XKPHYS
>> +      KBUILD_SYM32 = n
>> +  endif
>> +
>>    ifeq ($(KBUILD_SYM32), y)
>>      cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
>>    else
>> diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
>> index 0c03623f3897..2be9947814ad 100644
>> --- a/arch/mips/generic/Platform
>> +++ b/arch/mips/generic/Platform
>> @@ -12,7 +12,12 @@
>>  cflags-$(CONFIG_MACH_INGENIC_SOC)	+= 
>> -I$(srctree)/arch/mips/include/asm/mach-ingenic
>>  cflags-$(CONFIG_MIPS_GENERIC)	+= 
>> -I$(srctree)/arch/mips/include/asm/mach-generic
>> 
>> +ifndef (CONFIG_USE_XKPHYS)
>>  load-$(CONFIG_MIPS_GENERIC)	+= 0xffffffff80100000
>> +else
>> +load-$(CONFIG_MIPS_GENERIC)	+= 0xa800000080100000
>> +endif
>> +
>>  all-$(CONFIG_MIPS_GENERIC)	+= vmlinux.gz.itb
>> 
>>  its-y					:= vmlinux.its.S
>> diff --git a/arch/mips/include/asm/addrspace.h 
>> b/arch/mips/include/asm/addrspace.h
>> index 59a48c60a065..8dc500d8e66d 100644
>> --- a/arch/mips/include/asm/addrspace.h
>> +++ b/arch/mips/include/asm/addrspace.h
>> @@ -65,10 +65,15 @@
>>  #define XKSSEG			_CONST64_(0x4000000000000000)
>>  #define XKPHYS			_CONST64_(0x8000000000000000)
>>  #define XKSEG			_CONST64_(0xc000000000000000)
>> +#if !defined(CONFIG_USE_XKPHYS)
>>  #define CKSEG0			_CONST64_(0xffffffff80000000)
>>  #define CKSEG1			_CONST64_(0xffffffffa0000000)
>>  #define CKSSEG			_CONST64_(0xffffffffc0000000)
>>  #define CKSEG3			_CONST64_(0xffffffffe0000000)
>> +#else
>> +#define CKSEG0			XKPHYS_CM_CACHED
>> +#define CKSEG1			XKPHYS_CM_UNCACHED
>> +#endif /* !defined(CONFIG_USE_XKPHYS) */
>> 
>>  #define CKSEG0ADDR(a)		(CPHYSADDR(a) | CKSEG0)
>>  #define CKSEG1ADDR(a)		(CPHYSADDR(a) | CKSEG1)
>> @@ -126,8 +131,11 @@
>>  #define PHYS_TO_XKSEG_UNCACHED(p)	PHYS_TO_XKPHYS(K_CALG_UNCACHED, (p))
>>  #define PHYS_TO_XKSEG_CACHED(p)		PHYS_TO_XKPHYS(K_CALG_COH_SHAREABLE, (p))
>>  #define XKPHYS_TO_PHYS(p)		((p) & TO_PHYS_MASK)
>> -#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS | (_ACAST64_(cm) << 59) | (a))
>> -
>> +#define XKPHYS_CM(cm)			(XKPHYS | (_ACAST64_(cm) << 59))
>> +#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS_CM(cm) | (a))
>> +#define XKPHYS_CM_CACHED		(XKPHYS_CM(K_CALG_COH_SHAREABLE))
>> +#define XKPHYS_CM_UNCACHED		(XKPHYS_CM(K_CALG_UNCACHED))
>> +#define IS_XKPHYS(a)			(((a) >> 62) == 2)
>>  /*
>>   * The ultimate limited of the 64-bit MIPS architecture:  2 bits for selecting
>>   * the region, 3 bits for the CCA mode.  This leaves 59 bits of which the
>> diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
>> index 23c67c0871b1..15d8d69de455 100644
>> --- a/arch/mips/include/asm/mips-cm.h
>> +++ b/arch/mips/include/asm/mips-cm.h
>> @@ -311,6 +311,7 @@ GCR_CX_ACCESSOR_RW(32, 0x018, other)
>>  /* GCR_Cx_RESET_BASE - Configure where powered up cores will fetch from */
>>  GCR_CX_ACCESSOR_RW(32, 0x020, reset_base)
>>  #define CM_GCR_Cx_RESET_BASE_BEVEXCBASE		GENMASK(31, 12)
>> +#define CM_GCR_Cx_RESET_BASE_MODE		BIT(1)
>> 
>>  /* GCR_Cx_ID - Identify the current core */
>>  GCR_CX_ACCESSOR_RO(32, 0x028, id)
>> diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
>> index 5978a8dfb917..53b8306da571 100644
>> --- a/arch/mips/include/asm/page.h
>> +++ b/arch/mips/include/asm/page.h
>> @@ -176,7 +176,11 @@ static inline unsigned long ___pa(unsigned long x)
>>  		 * the compatibility segements ckseg0 or ckseg1, or it may
>>  		 * be in xkphys.
>>  		 */
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		return XPHYSADDR(x);
>> +#else
>>  		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
>> +#endif
>>  	}
>> 
>>  	if (!IS_ENABLED(CONFIG_EVA)) {
>> @@ -196,7 +200,11 @@ static inline unsigned long ___pa(unsigned long x)
>>  	return x - PAGE_OFFSET + PHYS_OFFSET;
>>  }
>>  #define __pa(x)		___pa((unsigned long)(x))
>> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
>> +#define __va(x)		((void *)PHYS_TO_XKSEG_CACHED(x))
>> +#else
>>  #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
>> +#endif
>>  #include <asm/io.h>
>> 
>>  /*
>> @@ -239,6 +247,8 @@ static inline unsigned long kaslr_offset(void)
>>  	return __kaslr_offset;
>>  }
>> 
>> +#define UNCAC_ADDR(addr)       (UNCAC_BASE + __pa(addr))
>> +
>>  #include <asm-generic/memory_model.h>
>>  #include <asm-generic/getorder.h>
>> 
>> diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
>> index 0136e0366698..e338e57d0784 100644
>> --- a/arch/mips/include/asm/vga.h
>> +++ b/arch/mips/include/asm/vga.h
>> @@ -16,7 +16,11 @@
>>   *	access the videoram directly without any black magic.
>>   */
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +#define VGA_MAP_MEM(x, s)	UNCAC_ADDR(0x10000000L + (unsigned long)(x))
>> +#else
>>  #define VGA_MAP_MEM(x, s)	CKSEG1ADDR(0x10000000L + (unsigned long)(x))
>> +#endif
>> 
>>  #define vga_readb(x)	(*(x))
>>  #define vga_writeb(x, y)	(*(y) = (x))
>> diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
>> index 64ecfdac6580..541f31a43a7f 100644
>> --- a/arch/mips/kernel/cps-vec.S
>> +++ b/arch/mips/kernel/cps-vec.S
>> @@ -554,7 +554,11 @@ LEAF(mips_cps_cache_init)
>>  	mul	t1, t1, t0
>>  	mul	t1, t1, t2
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	PTR_LI	a0, XKPHYS_CM_CACHED
>> +#else
>>  	li	a0, CKSEG0
>> +#endif
>>  	PTR_ADD	a1, a0, t1
>>  1:	cache	Index_Store_Tag_I, 0(a0)
>>  	PTR_ADD	a0, a0, t0
>> @@ -581,7 +585,11 @@ icache_done:
>>  	mul	t1, t1, t0
>>  	mul	t1, t1, t2
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	PTR_LI	a0, XKPHYS_CM_CACHED
>> +#else
>>  	li	a0, CKSEG0
>> +#endif
>>  	PTR_ADDU a1, a0, t1
>>  	PTR_SUBU a1, a1, t0
>>  1:	cache	Index_Store_Tag_D, 0(a0)
>> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
>> index b6de8e88c1bd..a002058e1838 100644
>> --- a/arch/mips/kernel/genex.S
>> +++ b/arch/mips/kernel/genex.S
>> @@ -272,11 +272,25 @@ NESTED(except_vec_vi, 0, sp)
>>  	.set	push
>>  	.set	noreorder
>>  	PTR_LA	v1, except_vec_vi_handler
>> +#if defined(CONFIG_USE_XKPHYS)
>> +FEXPORT(except_vec_vi_63_48)
>> +	lui	v0, 0		/* Patched - bits 63:48 */
>> +FEXPORT(except_vec_vi_47_32)
>> +	ori	v0, 0		/* Patched - bits 47:32 */
>> +	dsll	v0, v0, 0x10
>> +FEXPORT(except_vec_vi_31_16)
>> +	ori	v0, 0		/* Patched - bits 31:16 */
>> +	dsll	v0, v0, 0x10
>> +	jr	v1
>> +FEXPORT(except_vec_vi_15_0)
>> +	ori	v0, 0		/* Patched - bits 15:0 */
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  FEXPORT(except_vec_vi_lui)
>>  	lui	v0, 0		/* Patched */
>>  	jr	v1
>>  FEXPORT(except_vec_vi_ori)
>>  	 ori	v0, 0		/* Patched */
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	.set	pop
>>  	END(except_vec_vi)
>>  EXPORT(except_vec_vi_end)
>> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
>> index dd55d59b88db..47e76722a306 100644
>> --- a/arch/mips/kernel/smp-cps.c
>> +++ b/arch/mips/kernel/smp-cps.c
>> @@ -34,10 +34,33 @@ static unsigned __init core_vpe_count(unsigned int 
>> cluster, unsigned core)
>>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>>  }
>> 
>> +/**
>> + * plat_core_entry - query reset vector for NMI/reset
>> + *
>> + * Returns low 32 bits of the reset vector
>> + *
>> + * This is used to fill 2 registers:
>> + * - BEV Base (GCR_BEV_BASE) Offset: 0x0680
>> + * - VP Local Reset Exception Base (GCR_CL_RESET_BASE,GCR_CO_RESET_BASE)
>> + *   Offset: 0x0020 (0x2020 relative to GCR_BASE_ADDR)
>> + *
>> + * In both registers, BIT(1) should be set in case it uses address in XKPHYS
>> + * (as opposed to KSEG1). This bit defined as CM_GCR_Cx_RESET_BASE_MODE,
>> + * using it unconditionally because for GCR_BEV_BASE its value is the same
>> + */
>> +static u32 plat_core_entry(void)
>> +{
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
>> +			| CM_GCR_Cx_RESET_BASE_MODE;
>> +#else
>> +	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> +#endif
>> +}
>> +
>>  static void __init cps_smp_setup(void)
>>  {
>>  	unsigned int nclusters, ncores, nvpes, core_vpes;
>> -	unsigned long core_entry;
>>  	int cl, c, v;
>> 
>>  	/* Detect & record VPE topology */
>> @@ -94,10 +117,8 @@ static void __init cps_smp_setup(void)
>>  	/* Make core 0 coherent with everything */
>>  	write_gcr_cl_coherence(0xff);
>> 
>> -	if (mips_cm_revision() >= CM_REV_CM3) {
>> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> -		write_gcr_bev_base(core_entry);
>> -	}
>> +	if (mips_cm_revision() >= CM_REV_CM3)
>> +		write_gcr_bev_base(plat_core_entry());
>> 
>>  #ifdef CONFIG_MIPS_MT_FPAFF
>>  	/* If we have an FPU, enroll ourselves in the FPU-full mask */
>> @@ -213,7 +234,7 @@ static void boot_core(unsigned int core, unsigned 
>> int vpe_id)
>>  	mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
>> 
>>  	/* Set its reset vector */
>> -	write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
>> +	write_gcr_co_reset_base(plat_core_entry());
>> 
>>  	/* Ensure its coherency is disabled */
>>  	write_gcr_co_coherence(0);
>> @@ -290,7 +311,6 @@ static int cps_boot_secondary(int cpu, struct 
>> task_struct *idle)
>>  	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
>>  	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
>>  	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
>> -	unsigned long core_entry;
>>  	unsigned int remote;
>>  	int err;
>> 
>> @@ -314,8 +334,7 @@ static int cps_boot_secondary(int cpu, struct 
>> task_struct *idle)
>> 
>>  	if (cpu_has_vp) {
>>  		mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
>> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> -		write_gcr_co_reset_base(core_entry);
>> +		write_gcr_co_reset_base(plat_core_entry());
>>  		mips_cm_unlock_other();
>>  	}
>> 
>> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
>> index 246c6a6b0261..875594843626 100644
>> --- a/arch/mips/kernel/traps.c
>> +++ b/arch/mips/kernel/traps.c
>> @@ -2091,11 +2091,20 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  		 * If no shadow set is selected then use the default handler
>>  		 * that does normal register saving and standard interrupt exit
>>  		 */
>> -		extern const u8 except_vec_vi[], except_vec_vi_lui[];
>> -		extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
>> +		extern const u8 except_vec_vi[], except_vec_vi_end[];
>>  		extern const u8 rollback_except_vec_vi[];
>>  		const u8 *vec_start = using_rollback_handler() ?
>>  				      rollback_except_vec_vi : except_vec_vi;
>> +		const int handler_len = except_vec_vi_end - vec_start;
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		extern const u8 except_vec_vi_63_48[], except_vec_vi_47_32[];
>> +		extern const u8 except_vec_vi_31_16[], except_vec_vi_15_0[];
>> +		const int offset_63_48 = except_vec_vi_63_48 - vec_start;
>> +		const int offset_47_32 = except_vec_vi_47_32 - vec_start;
>> +		const int offset_31_16 = except_vec_vi_31_16 - vec_start;
>> +		const int offset_15_0  = except_vec_vi_15_0  - vec_start;
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>> +		extern const u8 except_vec_vi_lui[], except_vec_vi_ori[];
>>  #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
>>  		const int lui_offset = except_vec_vi_lui - vec_start + 2;
>>  		const int ori_offset = except_vec_vi_ori - vec_start + 2;
>> @@ -2103,7 +2112,7 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  		const int lui_offset = except_vec_vi_lui - vec_start;
>>  		const int ori_offset = except_vec_vi_ori - vec_start;
>>  #endif
>> -		const int handler_len = except_vec_vi_end - vec_start;
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>> 
>>  		if (handler_len > VECTORSPACING) {
>>  			/*
>> @@ -2119,10 +2128,21 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  #else
>>  				handler_len);
>>  #endif
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		h = (u16 *)(b + offset_63_48);
>> +		*h = (handler >> 48) & 0xffff;
>> +		h = (u16 *)(b + offset_47_32);
>> +		*h = (handler >> 32) & 0xffff;
>> +		h = (u16 *)(b + offset_31_16);
>> +		*h = (handler >> 16) & 0xffff;
>> +		h = (u16 *)(b + offset_15_0);
>> +		*h = (handler >> 0) & 0xffff;
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  		h = (u16 *)(b + lui_offset);
>>  		*h = (handler >> 16) & 0xffff;
>>  		h = (u16 *)(b + ori_offset);
>>  		*h = (handler & 0xffff);
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  		local_flush_icache_range((unsigned long)b,
>>  					 (unsigned long)(b+handler_len));
>>  	}
>> @@ -2332,7 +2352,11 @@ static const char panic_null_cerr[] =
>>  void set_uncached_handler(unsigned long offset, void *addr,
>>  	unsigned long size)
>>  {
>> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
>> +	unsigned long uncached_ebase = UNCAC_ADDR(ebase);
>> +#else
>>  	unsigned long uncached_ebase = CKSEG1ADDR(ebase);
>> +#endif
>> 
>>  	if (!addr)
>>  		panic(panic_null_cerr);
>> @@ -2384,9 +2408,11 @@ void __init trap_init(void)
>>  		 * EVA is special though as it allows segments to be rearranged
>>  		 * and to become uncached during cache error handling.
>>  		 */
>> +#if !defined(CONFIG_USE_XKPHYS)
>>  		if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
>>  			ebase = CKSEG0ADDR(ebase_pa);
>>  		else
>> +#endif
>>  			ebase = (unsigned long)phys_to_virt(ebase_pa);
>>  	}
>> 
>> diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c
>> index f80a67c092b6..8a78348a2dd7 100644
>> --- a/arch/mips/lib/uncached.c
>> +++ b/arch/mips/lib/uncached.c
>> @@ -44,6 +44,10 @@ unsigned long run_uncached(void *func)
>> 
>>  	__asm__("move %0, $sp" : "=r" (sp));
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	if (IS_XKPHYS(sp))
>> +		usp = UNCAC_ADDR(sp);
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  	if (sp >= (long)CKSEG0 && sp < (long)CKSEG2)
>>  		usp = CKSEG1ADDR(sp);
>>  #ifdef CONFIG_64BIT
>> @@ -52,10 +56,15 @@ unsigned long run_uncached(void *func)
>>  		usp = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>>  				     XKPHYS_TO_PHYS((long long)sp));
>>  #endif
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	else {
>>  		BUG();
>>  		usp = sp;
>>  	}
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	if (IS_XKPHYS(lfunc))
>> +		ufunc = UNCAC_ADDR(lfunc);
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  	if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2)
>>  		ufunc = CKSEG1ADDR(lfunc);
>>  #ifdef CONFIG_64BIT
>> @@ -64,6 +73,7 @@ unsigned long run_uncached(void *func)
>>  		ufunc = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>>  				       XKPHYS_TO_PHYS((long long)lfunc));
>>  #endif
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	else {
>>  		BUG();
>>  		ufunc = lfunc;
>> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
>> index 5dcb525a8995..eb57283ec4e0 100644
>> --- a/arch/mips/mm/init.c
>> +++ b/arch/mips/mm/init.c
>> @@ -427,7 +427,7 @@ void __init paging_init(void)
>>  	free_area_init(max_zone_pfns);
>>  }
>> 
>> -#ifdef CONFIG_64BIT
>> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>>  static struct kcore_list kcore_kseg0;
>>  #endif
>> 
>> @@ -470,7 +470,7 @@ void __init mem_init(void)
>>  	setup_zero_pages();	/* Setup zeroed pages.  */
>>  	mem_init_free_highmem();
>> 
>> -#ifdef CONFIG_64BIT
>> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>>  	if ((unsigned long) &_text > (unsigned long) CKSEG0)
>>  		/* The -4 is a hack so that user tools don't have to handle
>>  		   the overflow.  */
>> -- 
>> 2.40.1
>
> -- 
> - Jiaxun
Florian Fainelli Oct. 24, 2023, 1:49 a.m. UTC | #19
On 10/4/2023 9:10 AM, Gregory CLEMENT wrote:
> The code clearing BSS already use macro or use correct instruction
> depending id the CPU is 32 bits or 64 bits.

s/id/if/

> However, a few
> instructions remained 32 bits only.
> 
> By using the accurate MACRO, it is now possible to deal with memory
> address beyond 32 bits. As a side effect, when using 64bits processor,
> it also divides the loop number needed to clear the BSS by 2.
> 
> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>

Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>

> ---
>   arch/mips/boot/compressed/head.S | 4 ++--
>   1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/mips/boot/compressed/head.S b/arch/mips/boot/compressed/head.S
> index 5795d0af1e1b..d237a834b85e 100644
> --- a/arch/mips/boot/compressed/head.S
> +++ b/arch/mips/boot/compressed/head.S
> @@ -25,8 +25,8 @@
>   	/* Clear BSS */
>   	PTR_LA	a0, _edata
>   	PTR_LA	a2, _end
> -1:	sw	zero, 0(a0)
> -	addiu	a0, a0, 4
> +1:	PTR_S	zero, 0(a0)
> +	PTR_ADDIU a0, a0, PTRSIZE
>   	bne	a2, a0, 1b
>   
>   	PTR_LA	a0, (.heap)	     /* heap address */
Maciej W. Rozycki Oct. 24, 2023, 9:05 a.m. UTC | #20
On Thu, 12 Oct 2023, Jiaxun Yang wrote:

> > There is a kind of mirror but its physical address start at 0x8000000
> > so beyond the first 512MBytes that are used for KSEG0.
> 
> Really, KSEG0 range is 0x00000000 to 0x20000000, and 0x08000000 to 0x10000000
> is definitely within that range.
> 
> But I'd agree that 0x08000000 to 0x10000000 (32MB) seems too small for kernel
> text and data. So yeah, it makes sense to load kernel into XKPHYS.

 Hmm, my calculation indicates the range shown spans 128MiB, which I think 
is usually suitably large to hold kernel static text and data even for the 
richest configurations.  Regardless, loading into XKPHYS isn't wrong, with 
some platforms we've been doing it for decades now.

  Maciej
Gregory CLEMENT Oct. 24, 2023, 4:08 p.m. UTC | #21
"Jiaxun Yang" <jiaxun.yang@flygoat.com> writes:

> 在2023年10月4日十月 下午5:10,Gregory CLEMENT写道:
>> From: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>>
>> Now 64-bit MIPS uses 32-bit compatible segments KSEG0 and KSEG1
>> to trivially map first 1/2 GByte of physical memory. This memory
>> used to run kernel. This mean, one should have memory installed
>> in this area in order for Linux to work.
>>
>> Kconfig CONFIG_USE_XKPHYS introduced; it adds support for kernel
>> to use virtual addresses from the XKPHYS segment for both cached
>> and uncached access. XKPHYS allows to access 2^48 bytes of
>> memory, thus allowing kernel to work with any memory
>> configuration.
>>
>> MIPS CPU sets KX bit in the CP0 status register at reset
>> if RESET_BASE_MODE (BIT 1) set in the GCR_CL_RESET_BASE.
>>
>> Reset vector should fit into 32-bit. If reset vector put outside of
>> KSEG1, BIT(1) should be set in this value.
>>
>> IRQ handler for CPU updated to generate 64-bit address for jump
>
> So I just spend some time to review and test this patch on QEMU,
> comments below:

Thanks for your time.

>
>>
>> Signed-off-by: Vladimir Kondratiev <vladimir.kondratiev@intel.com>
>> Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
>> ---
>>  arch/mips/Kconfig                 | 15 +++++++++++++
>>  arch/mips/Makefile                |  4 ++++
>>  arch/mips/generic/Platform        |  5 +++++
>>  arch/mips/include/asm/addrspace.h | 12 ++++++++--
>>  arch/mips/include/asm/mips-cm.h   |  1 +
>>  arch/mips/include/asm/page.h      | 10 +++++++++
>>  arch/mips/include/asm/vga.h       |  4 ++++
>>  arch/mips/kernel/cps-vec.S        |  8 +++++++
>>  arch/mips/kernel/genex.S          | 14 ++++++++++++
>>  arch/mips/kernel/smp-cps.c        | 37 +++++++++++++++++++++++--------
>>  arch/mips/kernel/traps.c          | 32 +++++++++++++++++++++++---
>>  arch/mips/lib/uncached.c          | 10 +++++++++
>>  arch/mips/mm/init.c               |  4 ++--
>>  13 files changed, 140 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
>> index bc8421859006..92832bbcca5d 100644
>> --- a/arch/mips/Kconfig
>> +++ b/arch/mips/Kconfig
>> @@ -2026,6 +2026,21 @@ config 64BIT
>> 
>>  endchoice
>> 
>> +config USE_XKPHYS
>> +	bool "use virtual address from XKPHYS"
>> +	depends on 64BIT
>> +	default n
>> +	help
>> +	 By default, MIPS uses 32-bit compatible segments KSEG0 and KSEG1
>> +	 to trivially map first 1/2 GByte of physical memory. This mean,
>> +	 one should have memory installed in this area in order for Linux to
>> +	 work. With this option selected, kernel uses virtual addresses from
>> +	 the XKPHYS segment for both cached and uncached access. XKPHYS allows
>> +	 to access 2^48 bytes of memory, thus allowing to work with any memory
>> +	 configuration.
>> +
>> +	 Say N if not sure
>> +
>>  config MIPS_VA_BITS_48
>>  	bool "48 bits virtual memory"
>>  	depends on 64BIT
>> diff --git a/arch/mips/Makefile b/arch/mips/Makefile
>> index f49807e1f19b..544ee8427cab 100644
>> --- a/arch/mips/Makefile
>> +++ b/arch/mips/Makefile
>> @@ -303,6 +303,10 @@ ifdef CONFIG_64BIT
>>      endif
>>    endif
>> 
>> +  ifdef CONFIG_USE_XKPHYS
>> +      KBUILD_SYM32 = n
>> +  endif
>> +
>>    ifeq ($(KBUILD_SYM32), y)
>>      cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
>>    else
>> diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform
>> index 0c03623f3897..2be9947814ad 100644
>> --- a/arch/mips/generic/Platform
>> +++ b/arch/mips/generic/Platform
>> @@ -12,7 +12,12 @@
>>  cflags-$(CONFIG_MACH_INGENIC_SOC)	+= 
>> -I$(srctree)/arch/mips/include/asm/mach-ingenic
>>  cflags-$(CONFIG_MIPS_GENERIC)	+= 
>> -I$(srctree)/arch/mips/include/asm/mach-generic
>> 
>> +ifndef (CONFIG_USE_XKPHYS)
>>  load-$(CONFIG_MIPS_GENERIC)	+= 0xffffffff80100000
>> +else
>> +load-$(CONFIG_MIPS_GENERIC)	+= 0xa800000080100000
>> +endif
>
> Better to make load address configurable.

OK I prepared a patch for it.

>
>> +
>>  all-$(CONFIG_MIPS_GENERIC)	+= vmlinux.gz.itb
>> 
>>  its-y					:= vmlinux.its.S
>> diff --git a/arch/mips/include/asm/addrspace.h 
>> b/arch/mips/include/asm/addrspace.h
>> index 59a48c60a065..8dc500d8e66d 100644
>> --- a/arch/mips/include/asm/addrspace.h
>> +++ b/arch/mips/include/asm/addrspace.h
>> @@ -65,10 +65,15 @@
>>  #define XKSSEG			_CONST64_(0x4000000000000000)
>>  #define XKPHYS			_CONST64_(0x8000000000000000)
>>  #define XKSEG			_CONST64_(0xc000000000000000)
>> +#if !defined(CONFIG_USE_XKPHYS)
>>  #define CKSEG0			_CONST64_(0xffffffff80000000)
>>  #define CKSEG1			_CONST64_(0xffffffffa0000000)
>>  #define CKSSEG			_CONST64_(0xffffffffc0000000)
>>  #define CKSEG3			_CONST64_(0xffffffffe0000000)
>> +#else
>> +#define CKSEG0			XKPHYS_CM_CACHED
>> +#define CKSEG1			XKPHYS_CM_UNCACHED
>> +#endif /* !defined(CONFIG_USE_XKPHYS) */
>> 
>>  #define CKSEG0ADDR(a)		(CPHYSADDR(a) | CKSEG0)
>>  #define CKSEG1ADDR(a)		(CPHYSADDR(a) | CKSEG1)
>> @@ -126,8 +131,11 @@
>>  #define PHYS_TO_XKSEG_UNCACHED(p)	PHYS_TO_XKPHYS(K_CALG_UNCACHED, (p))
>>  #define PHYS_TO_XKSEG_CACHED(p)		PHYS_TO_XKPHYS(K_CALG_COH_SHAREABLE, (p))
>>  #define XKPHYS_TO_PHYS(p)		((p) & TO_PHYS_MASK)
>> -#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS | (_ACAST64_(cm) << 59) | (a))
>> -
>> +#define XKPHYS_CM(cm)			(XKPHYS | (_ACAST64_(cm) << 59))
>> +#define PHYS_TO_XKPHYS(cm, a)		(XKPHYS_CM(cm) | (a))
>> +#define XKPHYS_CM_CACHED		(XKPHYS_CM(K_CALG_COH_SHAREABLE))
>> +#define XKPHYS_CM_UNCACHED		(XKPHYS_CM(K_CALG_UNCACHED))
>> +#define IS_XKPHYS(a)			(((a) >> 62) == 2)
>>  /*
>>   * The ultimate limited of the 64-bit MIPS architecture:  2 bits for selecting
>>   * the region, 3 bits for the CCA mode.  This leaves 59 bits of which the
>> diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
>> index 23c67c0871b1..15d8d69de455 100644
>> --- a/arch/mips/include/asm/mips-cm.h 
>> +++ b/arch/mips/include/asm/mips-cm.h
>> @@ -311,6 +311,7 @@ GCR_CX_ACCESSOR_RW(32, 0x018, other)
>>  /* GCR_Cx_RESET_BASE - Configure where powered up cores will fetch from */
>>  GCR_CX_ACCESSOR_RW(32, 0x020, reset_base)
>>  #define CM_GCR_Cx_RESET_BASE_BEVEXCBASE		GENMASK(31, 12)
>> +#define CM_GCR_Cx_RESET_BASE_MODE		BIT(1)
>> 
>>  /* GCR_Cx_ID - Identify the current core */
>>  GCR_CX_ACCESSOR_RO(32, 0x028, id)
>> diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
>> index 5978a8dfb917..53b8306da571 100644
>> --- a/arch/mips/include/asm/page.h
>> +++ b/arch/mips/include/asm/page.h
>> @@ -176,7 +176,11 @@ static inline unsigned long ___pa(unsigned long x)
>>  		 * the compatibility segements ckseg0 or ckseg1, or it may
>>  		 * be in xkphys.
>>  		 */
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		return XPHYSADDR(x);
>> +#else
>>  		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
>> +#endif
>
> Dangerous, there might be some code passing KSEG0/1 address to __pa, so
> we should not disregard it.

I don't see any code doing it, but to be safe I will remove it.

>
>>  	}
>> 
>>  	if (!IS_ENABLED(CONFIG_EVA)) {
>> @@ -196,7 +200,11 @@ static inline unsigned long ___pa(unsigned long x)
>>  	return x - PAGE_OFFSET + PHYS_OFFSET;
>>  }
>>  #define __pa(x)		___pa((unsigned long)(x))
>> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
>> +#define __va(x)		((void *)PHYS_TO_XKSEG_CACHED(x))
>> +#else
>>  #define __va(x)		((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET))
>> +#endif
>
> PAGE_OFFSET resolves to CAC_BASE anyway, so unnecessary.

OK, there was a lot of indirection but in the end it almost the same
indeed.

Gregory

>
>>  #include <asm/io.h>
>> 
>>  /*
>> @@ -239,6 +247,8 @@ static inline unsigned long kaslr_offset(void)
>>  	return __kaslr_offset;
>>  }
>> 
>> +#define UNCAC_ADDR(addr)       (UNCAC_BASE + __pa(addr))
>> +
>>  #include <asm-generic/memory_model.h>
>>  #include <asm-generic/getorder.h>
>> 
>> diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
>> index 0136e0366698..e338e57d0784 100644
>> --- a/arch/mips/include/asm/vga.h
>> +++ b/arch/mips/include/asm/vga.h
>> @@ -16,7 +16,11 @@
>>   *	access the videoram directly without any black magic.
>>   */
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +#define VGA_MAP_MEM(x, s)	UNCAC_ADDR(0x10000000L + (unsigned long)(x))
>> +#else
>>  #define VGA_MAP_MEM(x, s)	CKSEG1ADDR(0x10000000L + (unsigned long)(x))
>> +#endif
>
> VGA_MAP_MEM intends to work on some really legacy systems, it won't break
> your platform, so better leave it as is.

OK maybe I will still put a comment here.

>
>> 
>>  #define vga_readb(x)	(*(x))
>>  #define vga_writeb(x, y)	(*(y) = (x))
>> diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
>> index 64ecfdac6580..541f31a43a7f 100644
>> --- a/arch/mips/kernel/cps-vec.S
>> +++ b/arch/mips/kernel/cps-vec.S
>> @@ -554,7 +554,11 @@ LEAF(mips_cps_cache_init)
>>  	mul	t1, t1, t0
>>  	mul	t1, t1, t2
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	PTR_LI	a0, XKPHYS_CM_CACHED
>> +#else
>>  	li	a0, CKSEG0
>> +#endif
>
> Unnecessary, KSEG0 address here are just for matching cache ways,
> so there is no difference to use KSEG0 or XKPHYS.
>
> If you are using XKPHYS here you must extarct CCA from bootinfo
> or CP0 as it may varies on different systems.
>
>>  	PTR_ADD	a1, a0, t1
>>  1:	cache	Index_Store_Tag_I, 0(a0)
>>  	PTR_ADD	a0, a0, t0
>> @@ -581,7 +585,11 @@ icache_done:
>>  	mul	t1, t1, t0
>>  	mul	t1, t1, t2
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	PTR_LI	a0, XKPHYS_CM_CACHED
>> +#else
>
> Ditto.
>
>>  	li	a0, CKSEG0
>> +#endif
>>  	PTR_ADDU a1, a0, t1
>>  	PTR_SUBU a1, a1, t0
>>  1:	cache	Index_Store_Tag_D, 0(a0)
>> diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
>> index b6de8e88c1bd..a002058e1838 100644
>> --- a/arch/mips/kernel/genex.S
>> +++ b/arch/mips/kernel/genex.S
>> @@ -272,11 +272,25 @@ NESTED(except_vec_vi, 0, sp)
>>  	.set	push
>>  	.set	noreorder
>>  	PTR_LA	v1, except_vec_vi_handler
>> +#if defined(CONFIG_USE_XKPHYS)
>> +FEXPORT(except_vec_vi_63_48)
>> +	lui	v0, 0		/* Patched - bits 63:48 */
>> +FEXPORT(except_vec_vi_47_32)
>> +	ori	v0, 0		/* Patched - bits 47:32 */
>> +	dsll	v0, v0, 0x10
>> +FEXPORT(except_vec_vi_31_16)
>> +	ori	v0, 0		/* Patched - bits 31:16 */
>> +	dsll	v0, v0, 0x10
>> +	jr	v1
>> +FEXPORT(except_vec_vi_15_0)
>> +	ori	v0, 0		/* Patched - bits 15:0 */
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  FEXPORT(except_vec_vi_lui)
>>  	lui	v0, 0		/* Patched */
>>  	jr	v1
>>  FEXPORT(except_vec_vi_ori)
>>  	 ori	v0, 0		/* Patched */
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	.set	pop
>>  	END(except_vec_vi)
>>  EXPORT(except_vec_vi_end)
>> diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
>> index dd55d59b88db..47e76722a306 100644
>> --- a/arch/mips/kernel/smp-cps.c
>> +++ b/arch/mips/kernel/smp-cps.c
>> @@ -34,10 +34,33 @@ static unsigned __init core_vpe_count(unsigned int 
>> cluster, unsigned core)
>>  	return min(smp_max_threads, mips_cps_numvps(cluster, core));
>>  }
>> 
>> +/**
>> + * plat_core_entry - query reset vector for NMI/reset
>> + *
>> + * Returns low 32 bits of the reset vector
>> + *
>> + * This is used to fill 2 registers:
>> + * - BEV Base (GCR_BEV_BASE) Offset: 0x0680
>> + * - VP Local Reset Exception Base (GCR_CL_RESET_BASE,GCR_CO_RESET_BASE)
>> + *   Offset: 0x0020 (0x2020 relative to GCR_BASE_ADDR)
>> + *
>> + * In both registers, BIT(1) should be set in case it uses address in XKPHYS
>> + * (as opposed to KSEG1). This bit defined as CM_GCR_Cx_RESET_BASE_MODE,
>> + * using it unconditionally because for GCR_BEV_BASE its value is the same
>> + */
>> +static u32 plat_core_entry(void)
>> +{
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	return (UNCAC_ADDR(mips_cps_core_entry) & 0xffffffff)
>> +			| CM_GCR_Cx_RESET_BASE_MODE;
>> +#else
>> +	return CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> +#endif
>
> This is a CM3 feature, so perhaps we should handle it in a general
> way.
>
>> +}
>> +
>>  static void __init cps_smp_setup(void)
>>  {
>>  	unsigned int nclusters, ncores, nvpes, core_vpes;
>> -	unsigned long core_entry;
>>  	int cl, c, v;
>> 
>>  	/* Detect & record VPE topology */
>> @@ -94,10 +117,8 @@ static void __init cps_smp_setup(void)
>>  	/* Make core 0 coherent with everything */
>>  	write_gcr_cl_coherence(0xff);
>> 
>> -	if (mips_cm_revision() >= CM_REV_CM3) {
>> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> -		write_gcr_bev_base(core_entry);
>> -	}
>> +	if (mips_cm_revision() >= CM_REV_CM3)
>> +		write_gcr_bev_base(plat_core_entry());
>> 
>>  #ifdef CONFIG_MIPS_MT_FPAFF
>>  	/* If we have an FPU, enroll ourselves in the FPU-full mask */
>> @@ -213,7 +234,7 @@ static void boot_core(unsigned int core, unsigned 
>> int vpe_id)
>>  	mips_cm_lock_other(0, core, 0, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
>> 
>>  	/* Set its reset vector */
>> -	write_gcr_co_reset_base(CKSEG1ADDR((unsigned long)mips_cps_core_entry));
>> +	write_gcr_co_reset_base(plat_core_entry());
>> 
>>  	/* Ensure its coherency is disabled */
>>  	write_gcr_co_coherence(0);
>> @@ -290,7 +311,6 @@ static int cps_boot_secondary(int cpu, struct 
>> task_struct *idle)
>>  	unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]);
>>  	struct core_boot_config *core_cfg = &mips_cps_core_bootcfg[core];
>>  	struct vpe_boot_config *vpe_cfg = &core_cfg->vpe_config[vpe_id];
>> -	unsigned long core_entry;
>>  	unsigned int remote;
>>  	int err;
>> 
>> @@ -314,8 +334,7 @@ static int cps_boot_secondary(int cpu, struct 
>> task_struct *idle)
>> 
>>  	if (cpu_has_vp) {
>>  		mips_cm_lock_other(0, core, vpe_id, CM_GCR_Cx_OTHER_BLOCK_LOCAL);
>> -		core_entry = CKSEG1ADDR((unsigned long)mips_cps_core_entry);
>> -		write_gcr_co_reset_base(core_entry);
>> +		write_gcr_co_reset_base(plat_core_entry());
>>  		mips_cm_unlock_other();
>>  	}
>> 
>> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
>> index 246c6a6b0261..875594843626 100644
>> --- a/arch/mips/kernel/traps.c
>> +++ b/arch/mips/kernel/traps.c
>> @@ -2091,11 +2091,20 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  		 * If no shadow set is selected then use the default handler
>>  		 * that does normal register saving and standard interrupt exit
>>  		 */
>> -		extern const u8 except_vec_vi[], except_vec_vi_lui[];
>> -		extern const u8 except_vec_vi_ori[], except_vec_vi_end[];
>> +		extern const u8 except_vec_vi[], except_vec_vi_end[];
>>  		extern const u8 rollback_except_vec_vi[];
>>  		const u8 *vec_start = using_rollback_handler() ?
>>  				      rollback_except_vec_vi : except_vec_vi;
>> +		const int handler_len = except_vec_vi_end - vec_start;
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		extern const u8 except_vec_vi_63_48[], except_vec_vi_47_32[];
>> +		extern const u8 except_vec_vi_31_16[], except_vec_vi_15_0[];
>> +		const int offset_63_48 = except_vec_vi_63_48 - vec_start;
>> +		const int offset_47_32 = except_vec_vi_47_32 - vec_start;
>> +		const int offset_31_16 = except_vec_vi_31_16 - vec_start;
>> +		const int offset_15_0  = except_vec_vi_15_0  - vec_start;
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>> +		extern const u8 except_vec_vi_lui[], except_vec_vi_ori[];
>>  #if defined(CONFIG_CPU_MICROMIPS) || defined(CONFIG_CPU_BIG_ENDIAN)
>>  		const int lui_offset = except_vec_vi_lui - vec_start + 2;
>>  		const int ori_offset = except_vec_vi_ori - vec_start + 2;
>> @@ -2103,7 +2112,7 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  		const int lui_offset = except_vec_vi_lui - vec_start;
>>  		const int ori_offset = except_vec_vi_ori - vec_start;
>>  #endif
>> -		const int handler_len = except_vec_vi_end - vec_start;
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>> 
>>  		if (handler_len > VECTORSPACING) {
>>  			/*
>> @@ -2119,10 +2128,21 @@ static void *set_vi_srs_handler(int n, 
>> vi_handler_t addr, int srs)
>>  #else
>>  				handler_len);
>>  #endif
>> +#if defined(CONFIG_USE_XKPHYS)
>> +		h = (u16 *)(b + offset_63_48);
>> +		*h = (handler >> 48) & 0xffff;
>> +		h = (u16 *)(b + offset_47_32);
>> +		*h = (handler >> 32) & 0xffff;
>> +		h = (u16 *)(b + offset_31_16);
>> +		*h = (handler >> 16) & 0xffff;
>> +		h = (u16 *)(b + offset_15_0);
>> +		*h = (handler >> 0) & 0xffff;
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  		h = (u16 *)(b + lui_offset);
>>  		*h = (handler >> 16) & 0xffff;
>>  		h = (u16 *)(b + ori_offset);
>>  		*h = (handler & 0xffff);
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  		local_flush_icache_range((unsigned long)b,
>>  					 (unsigned long)(b+handler_len));
>>  	}
>> @@ -2332,7 +2352,11 @@ static const char panic_null_cerr[] =
>>  void set_uncached_handler(unsigned long offset, void *addr,
>>  	unsigned long size)
>>  {
>> +#if IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_USE_XKPHYS)
>> +	unsigned long uncached_ebase = UNCAC_ADDR(ebase);
>> +#else
>>  	unsigned long uncached_ebase = CKSEG1ADDR(ebase);
>> +#endif
>> 
>>  	if (!addr)
>>  		panic(panic_null_cerr);
>> @@ -2384,9 +2408,11 @@ void __init trap_init(void)
>>  		 * EVA is special though as it allows segments to be rearranged
>>  		 * and to become uncached during cache error handling.
>>  		 */
>> +#if !defined(CONFIG_USE_XKPHYS)
>>  		if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
>>  			ebase = CKSEG0ADDR(ebase_pa);
>>  		else
>> +#endif
>>  			ebase = (unsigned long)phys_to_virt(ebase_pa);
>>  	}
>> 
>> diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c
>> index f80a67c092b6..8a78348a2dd7 100644
>> --- a/arch/mips/lib/uncached.c
>> +++ b/arch/mips/lib/uncached.c
>> @@ -44,6 +44,10 @@ unsigned long run_uncached(void *func)
>> 
>>  	__asm__("move %0, $sp" : "=r" (sp));
>> 
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	if (IS_XKPHYS(sp))
>> +		usp = UNCAC_ADDR(sp);
>
> Unnecessary, the else if later is actually handling XKPHYS sp.
>
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  	if (sp >= (long)CKSEG0 && sp < (long)CKSEG2)
>>  		usp = CKSEG1ADDR(sp);
>>  #ifdef CONFIG_64BIT
>> @@ -52,10 +56,15 @@ unsigned long run_uncached(void *func)
>>  		usp = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>>  				     XKPHYS_TO_PHYS((long long)sp));
>>  #endif
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	else {
>>  		BUG();
>>  		usp = sp;
>>  	}
>> +#if defined(CONFIG_USE_XKPHYS)
>> +	if (IS_XKPHYS(lfunc))
>> +		ufunc = UNCAC_ADDR(lfunc);
>
> ditto.
>
>> +#else /* defined(CONFIG_USE_XKPHYS) */
>>  	if (lfunc >= (long)CKSEG0 && lfunc < (long)CKSEG2)
>>  		ufunc = CKSEG1ADDR(lfunc);
>>  #ifdef CONFIG_64BIT
>> @@ -64,6 +73,7 @@ unsigned long run_uncached(void *func)
>>  		ufunc = PHYS_TO_XKPHYS(K_CALG_UNCACHED,
>>  				       XKPHYS_TO_PHYS((long long)lfunc));
>>  #endif
>> +#endif /* defined(CONFIG_USE_XKPHYS) */
>>  	else {
>>  		BUG();
>>  		ufunc = lfunc;
>> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
>> index 5dcb525a8995..eb57283ec4e0 100644
>> --- a/arch/mips/mm/init.c
>> +++ b/arch/mips/mm/init.c
>> @@ -427,7 +427,7 @@ void __init paging_init(void)
>>  	free_area_init(max_zone_pfns);
>>  }
>> 
>> -#ifdef CONFIG_64BIT
>> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>>  static struct kcore_list kcore_kseg0;
>>  #endif
>> 
>> @@ -470,7 +470,7 @@ void __init mem_init(void)
>>  	setup_zero_pages();	/* Setup zeroed pages.  */
>>  	mem_init_free_highmem();
>> 
>> -#ifdef CONFIG_64BIT
>> +#if defined(CONFIG_64BIT) && !defined(CONFIG_USE_XKPHYS)
>>  	if ((unsigned long) &_text > (unsigned long) CKSEG0)
>>  		/* The -4 is a hack so that user tools don't have to handle
>>  		   the overflow.  */
>> -- 
>> 2.40.1
>
> Thanks.
>
> -- 
> - Jiaxun