[5/5] npu2: Move to new GPU memory map

Message ID 20171113110644.15478-5-mikey@neuling.org
State Superseded
Headers show
Series
  • [1/5] npu2: Create npu2_write_mcd()
Related show

Commit Message

Michael Neuling Nov. 13, 2017, 11:06 a.m.
There are three different ways we configure the MCD and memory map.

1) Old way (current way)
   Skiboot configures the MCD and puts GPUs at 4TB and below
2) New way with MCD
   Hostboot configures the MCD and skiboot puts GPU at 4TB and above
3) New way without MCD
   No one configures the MCD and skiboot puts GPU at 4TB and below

The patch keeps option 1 and adds options 2 and 3.

The different configurations are detected using certain scoms (see
patch).

Option 1 will go away eventually as it's a configuration that can
cause xstops or data integrity problems. We are keeping it around to
support existing hostboot.

Option 2 supports only 4 GPUs and 512GB of memory per socket.

Option 3 supports 6 GPUs and 4TB of memory but may have some
performance impact.

Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 hw/npu2.c           | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 hw/phys-map.c       |  5 +++++
 include/npu2-regs.h |  5 +++++
 include/npu2.h      |  1 +
 include/phys-map.h  |  1 +
 5 files changed, 63 insertions(+), 5 deletions(-)

Comments

Balbir Singh Nov. 14, 2017, 12:39 a.m. | #1
On Mon, Nov 13, 2017 at 10:06 PM, Michael Neuling <mikey@neuling.org> wrote:
> There are three different ways we configure the MCD and memory map.
>
> 1) Old way (current way)
>    Skiboot configures the MCD and puts GPUs at 4TB and below
> 2) New way with MCD
>    Hostboot configures the MCD and skiboot puts GPU at 4TB and above
> 3) New way without MCD
>    No one configures the MCD and skiboot puts GPU at 4TB and below
>
> The patch keeps option 1 and adds options 2 and 3.
>
> The different configurations are detected using certain scoms (see
> patch).
>
> Option 1 will go away eventually as it's a configuration that can
> cause xstops or data integrity problems. We are keeping it around to
> support existing hostboot.
>
> Option 2 supports only 4 GPUs and 512GB of memory per socket.
>
> Option 3 supports 6 GPUs and 4TB of memory but may have some
> performance impact.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> ---
>  hw/npu2.c           | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-----
>  hw/phys-map.c       |  5 +++++
>  include/npu2-regs.h |  5 +++++
>  include/npu2.h      |  1 +
>  include/phys-map.h  |  1 +
>  5 files changed, 63 insertions(+), 5 deletions(-)
>
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 082591a638..9752dec5d7 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -593,10 +593,11 @@ static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
>   * on bdfn. */
>  static void npu2_get_gpu_base(struct npu2_dev *ndev, uint64_t *addr, uint64_t *size)
>  {
> +       struct npu2 *p = ndev->npu;
>         int group;
>
>         group = (ndev->bdfn >> 3) & 0x1f;
> -       phys_map_get(ndev->npu->chip_id, GPU_MEM_4T_DOWN, group, addr, size);
> +       phys_map_get(ndev->npu->chip_id, p->gpu_map_type, group, addr, size);
>  }
>
>  static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, struct npu2_dev *ndev)
> @@ -855,16 +856,16 @@ static void npu2_mcd_init(struct npu2 *p)
>         uint64_t size, addr, gpu_min_addr, gpu_max_addr, total_size;
>
>         /* Init memory cache directory (MCD) registers. */
> -       phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, NPU2_LINKS_PER_CHIP - 1,
> +       phys_map_get(p->chip_id, p->gpu_map_type, NPU2_LINKS_PER_CHIP - 1,
>                         &gpu_min_addr, NULL);
> -       phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, 0, &gpu_max_addr, &size);
> +       phys_map_get(p->chip_id, p->gpu_map_type, 0, &gpu_max_addr, &size);
>         gpu_max_addr += size;
>
>         /* We assume GPU memory is contiguous from the first possible GPU to the
>          * last and that the size is the same so best to check that. */
>         for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
>                 uint64_t tmp;
> -               phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, i, &addr, &tmp);
> +               phys_map_get(p->chip_id, p->gpu_map_type, i, &addr, &tmp);
>                 assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
>                 assert(tmp == size);
>         }
> @@ -905,7 +906,52 @@ static void npu2_hw_init(struct npu2 *p)
>                 npu2_write(p, NPU2_XTS_CFG2, val | NPU2_XTS_CFG2_NO_FLUSH_ENA);
>         }
>
> -       npu2_mcd_init(p);
> +       /*
> +        * There are three different ways we configure the MCD and memory map.
> +        * 1) Old way
> +        *    Skiboot configures the MCD and puts GPUs at 4TB and below
> +        * 2) New way with MCD
> +        *    Hostboot configures the MCD and skiboot puts GPU at 4TB and above
> +        * 3) New way without MCD
> +        *    No one configures the MCD and skiboot puts GPU at 4TB and below
> +        *
> +        * 1) Will go away evenutally as it's a configuration that can
> +        *    cause an xstop or data integrity problems. We are keeping
> +        *    it around to support existing hostboot. Print error
> +        *    message if used.
> +        * 2) Is for smaller memory configurations and will be used
> +        *    initially for GPUs on Witherspoon. Supports only to
> +        *    512GB of memory and 4 GPUs per socket.
> +        * 3) Is for fully populated configurations of 4TB of memory
> +        *    and 6GPUs per socket. May have performance impacts.
> +        *
> +        * The different configurations can be detected via the following scoms:
> +        * 1) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 0
> +        * 2) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 3
> +        * 3) 0x5011c0c bit 2 = 0, 0x5011c0a bits 42:48 = 0
> +        */
> +
> +       /* Get 0x05011c0c bit 2 = 1 */
> +       xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &val);
> +       if ((val & PB_CFG_CHG_RATE_GP_MASTER) != 0) {
> +               /* Get 0x05011c0a bits 42:48 */
> +               xscom_read(p->chip_id, PB_CENT_MODE, &val);
> +               if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0) {
> +                       /* 1) */
> +                       NPU2DBG(p, "Using old memory map + MCD enabled in skiboot\n");
> +                       NPU2ERR(p, "!!! Old firmware detected. Update hostboot for new MCD mapping !!!\n");
> +                       p->gpu_map_type = GPU_MEM_4T_DOWN;
> +                       npu2_mcd_init(p);
> +               } else {
> +                       /* 2) */
> +                       NPU2DBG(p, "Using small memory map + MCD enabled\n");
> +                       p->gpu_map_type = GPU_MEM_4T_UP;

Should be explicitly check if GETFIELD(..., val) == 3? I think it
might be safer.

> +               }
> +       } else {
> +               /* 3) */
> +               NPU2DBG(p, "Using large memory map + MCD disabled\n");
> +               p->gpu_map_type = GPU_MEM_4T_DOWN;
> +       }
>  }
>
>  static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index 02bc33b8f8..a2b5dbd262 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -46,6 +46,11 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
>         { GPU_MEM_4T_DOWN, 2, 0x000003a000000000ull, 0x0000002000000000ull },
>         { GPU_MEM_4T_DOWN, 1, 0x000003c000000000ull, 0x0000002000000000ull },
>         { GPU_MEM_4T_DOWN, 0, 0x000003e000000000ull, 0x0000002000000000ull },
> +       /* GPU memory from 4TB + 128GB*GPU. 4 GPUs only */

Physmap Space for 4 GPUs per socket, I guess that is implied

> +       { GPU_MEM_4T_UP,   0, 0x0000040000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   1, 0x0000042000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   2, 0x0000044000000000ull, 0x0000002000000000ull },
> +       { GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
>
>         /* 0 TB offset @ MMIO 0x0006000000000000ull */
>         { PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index cef9dbf7f8..2764f9291e 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -31,6 +31,11 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
>  #define        MCD_BANK_CN_VALID       PPC_BIT(0)
>  #define        MCD_BANK_CN_SIZE        PPC_BITMASK(13,29)
>  #define        MCD_BANK_CN_ADDR        PPC_BITMASK(33,63)
> +#define PB_CENT_HP_MODE_CURR   0x5011c0c
> +#define  PB_CFG_CHG_RATE_GP_MASTER PPC_BIT(2)
> +#define PB_CENT_MODE           0x5011c0a
> +#define  PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT PPC_BITMASK(42,48)
> +
>
>  #define NPU2_REG_OFFSET(stack, block, offset) \
>         (((stack) << 20) | ((block) << 16) | (offset))
> diff --git a/include/npu2.h b/include/npu2.h
> index f11a13a0b2..925eae0a64 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -128,6 +128,7 @@ struct npu2 {
>         uint32_t        base_lsi;
>         uint32_t        total_devices;
>         struct npu2_dev *devices;
> +       enum phys_map_type gpu_map_type;
>
>         /* IODA cache */
>         uint64_t        lxive_cache[8];
> diff --git a/include/phys-map.h b/include/phys-map.h
> index c9b2389615..73adda079e 100644
> --- a/include/phys-map.h
> +++ b/include/phys-map.h
> @@ -27,6 +27,7 @@ enum phys_map_type {
>         NULL_MAP,
>         SYSTEM_MEM,
>         GPU_MEM_4T_DOWN,
> +       GPU_MEM_4T_UP,
>         PHB4_64BIT_MMIO,
>         PHB4_32BIT_MMIO,
>         PHB4_XIVE_ESB,


Balbir Singh.
Michael Neuling Nov. 14, 2017, 10:50 a.m. | #2
> > +       /* Get 0x05011c0c bit 2 = 1 */
> > +       xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &val);
> > +       if ((val & PB_CFG_CHG_RATE_GP_MASTER) != 0) {
> > +               /* Get 0x05011c0a bits 42:48 */
> > +               xscom_read(p->chip_id, PB_CENT_MODE, &val);
> > +               if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0) {
> > +                       /* 1) */
> > +                       NPU2DBG(p, "Using old memory map + MCD enabled in skiboot\n");
> > +                       NPU2ERR(p, "!!! Old firmware detected. Update hostboot for new MCD mapping !!!\n");
> > +                       p->gpu_map_type = GPU_MEM_4T_DOWN;
> > +                       npu2_mcd_init(p);
> > +               } else {
> > +                       /* 2) */
> > +                       NPU2DBG(p, "Using small memory map + MCD enabled\n");
> > +                       p->gpu_map_type = GPU_MEM_4T_UP;
> 
> Should be explicitly check if GETFIELD(..., val) == 3? I think it
> might be safer.

ok.  I could return an error if that happens I guess.

Mikey

Patch

diff --git a/hw/npu2.c b/hw/npu2.c
index 082591a638..9752dec5d7 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -593,10 +593,11 @@  static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
  * on bdfn. */
 static void npu2_get_gpu_base(struct npu2_dev *ndev, uint64_t *addr, uint64_t *size)
 {
+	struct npu2 *p = ndev->npu;
 	int group;
 
 	group = (ndev->bdfn >> 3) & 0x1f;
-	phys_map_get(ndev->npu->chip_id, GPU_MEM_4T_DOWN, group, addr, size);
+	phys_map_get(ndev->npu->chip_id, p->gpu_map_type, group, addr, size);
 }
 
 static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, struct npu2_dev *ndev)
@@ -855,16 +856,16 @@  static void npu2_mcd_init(struct npu2 *p)
 	uint64_t size, addr, gpu_min_addr, gpu_max_addr, total_size;
 
 	/* Init memory cache directory (MCD) registers. */
-	phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, NPU2_LINKS_PER_CHIP - 1,
+	phys_map_get(p->chip_id, p->gpu_map_type, NPU2_LINKS_PER_CHIP - 1,
 			&gpu_min_addr, NULL);
-	phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, 0, &gpu_max_addr, &size);
+	phys_map_get(p->chip_id, p->gpu_map_type, 0, &gpu_max_addr, &size);
 	gpu_max_addr += size;
 
 	/* We assume GPU memory is contiguous from the first possible GPU to the
 	 * last and that the size is the same so best to check that. */
 	for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
 		uint64_t tmp;
-		phys_map_get(p->chip_id, GPU_MEM_4T_DOWN, i, &addr, &tmp);
+		phys_map_get(p->chip_id, p->gpu_map_type, i, &addr, &tmp);
 		assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
 		assert(tmp == size);
 	}
@@ -905,7 +906,52 @@  static void npu2_hw_init(struct npu2 *p)
 		npu2_write(p, NPU2_XTS_CFG2, val | NPU2_XTS_CFG2_NO_FLUSH_ENA);
 	}
 
-	npu2_mcd_init(p);
+	/*
+	 * There are three different ways we configure the MCD and memory map.
+	 * 1) Old way
+	 *    Skiboot configures the MCD and puts GPUs at 4TB and below
+	 * 2) New way with MCD
+	 *    Hostboot configures the MCD and skiboot puts GPU at 4TB and above
+	 * 3) New way without MCD
+	 *    No one configures the MCD and skiboot puts GPU at 4TB and below
+	 *
+	 * 1) Will go away evenutally as it's a configuration that can
+	 *    cause an xstop or data integrity problems. We are keeping
+	 *    it around to support existing hostboot. Print error
+	 *    message if used.
+	 * 2) Is for smaller memory configurations and will be used
+	 *    initially for GPUs on Witherspoon. Supports only to
+	 *    512GB of memory and 4 GPUs per socket.
+	 * 3) Is for fully populated configurations of 4TB of memory
+	 *    and 6GPUs per socket. May have performance impacts.
+	 *
+	 * The different configurations can be detected via the following scoms:
+	 * 1) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 0
+	 * 2) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 3
+	 * 3) 0x5011c0c bit 2 = 0, 0x5011c0a bits 42:48 = 0
+	 */
+
+	/* Get 0x05011c0c bit 2 = 1 */
+	xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &val);
+	if ((val & PB_CFG_CHG_RATE_GP_MASTER) != 0) {
+		/* Get 0x05011c0a bits 42:48 */
+		xscom_read(p->chip_id, PB_CENT_MODE, &val);
+		if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0) {
+			/* 1) */
+			NPU2DBG(p, "Using old memory map + MCD enabled in skiboot\n");
+			NPU2ERR(p, "!!! Old firmware detected. Update hostboot for new MCD mapping !!!\n");
+			p->gpu_map_type = GPU_MEM_4T_DOWN;
+			npu2_mcd_init(p);
+		} else {
+			/* 2) */
+			NPU2DBG(p, "Using small memory map + MCD enabled\n");
+			p->gpu_map_type = GPU_MEM_4T_UP;
+		}
+	} else {
+		/* 3) */
+		NPU2DBG(p, "Using large memory map + MCD disabled\n");
+		p->gpu_map_type = GPU_MEM_4T_DOWN;
+	}
 }
 
 static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
diff --git a/hw/phys-map.c b/hw/phys-map.c
index 02bc33b8f8..a2b5dbd262 100644
--- a/hw/phys-map.c
+++ b/hw/phys-map.c
@@ -46,6 +46,11 @@  static const struct phys_map_entry phys_map_table_nimbus[] = {
 	{ GPU_MEM_4T_DOWN, 2, 0x000003a000000000ull, 0x0000002000000000ull },
 	{ GPU_MEM_4T_DOWN, 1, 0x000003c000000000ull, 0x0000002000000000ull },
 	{ GPU_MEM_4T_DOWN, 0, 0x000003e000000000ull, 0x0000002000000000ull },
+	/* GPU memory from 4TB + 128GB*GPU. 4 GPUs only */
+	{ GPU_MEM_4T_UP,   0, 0x0000040000000000ull, 0x0000002000000000ull },
+	{ GPU_MEM_4T_UP,   1, 0x0000042000000000ull, 0x0000002000000000ull },
+	{ GPU_MEM_4T_UP,   2, 0x0000044000000000ull, 0x0000002000000000ull },
+	{ GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
 
 	/* 0 TB offset @ MMIO 0x0006000000000000ull */
 	{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index cef9dbf7f8..2764f9291e 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -31,6 +31,11 @@  void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
 #define	MCD_BANK_CN_VALID	PPC_BIT(0)
 #define	MCD_BANK_CN_SIZE	PPC_BITMASK(13,29)
 #define	MCD_BANK_CN_ADDR	PPC_BITMASK(33,63)
+#define PB_CENT_HP_MODE_CURR	0x5011c0c
+#define  PB_CFG_CHG_RATE_GP_MASTER PPC_BIT(2)
+#define PB_CENT_MODE		0x5011c0a
+#define  PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT PPC_BITMASK(42,48)
+
 
 #define NPU2_REG_OFFSET(stack, block, offset) \
 	(((stack) << 20) | ((block) << 16) | (offset))
diff --git a/include/npu2.h b/include/npu2.h
index f11a13a0b2..925eae0a64 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -128,6 +128,7 @@  struct npu2 {
 	uint32_t	base_lsi;
 	uint32_t	total_devices;
 	struct npu2_dev	*devices;
+	enum phys_map_type gpu_map_type;
 
 	/* IODA cache */
 	uint64_t	lxive_cache[8];
diff --git a/include/phys-map.h b/include/phys-map.h
index c9b2389615..73adda079e 100644
--- a/include/phys-map.h
+++ b/include/phys-map.h
@@ -27,6 +27,7 @@  enum phys_map_type {
 	NULL_MAP,
 	SYSTEM_MEM,
 	GPU_MEM_4T_DOWN,
+	GPU_MEM_4T_UP,
 	PHB4_64BIT_MMIO,
 	PHB4_32BIT_MMIO,
 	PHB4_XIVE_ESB,