diff mbox series

[v2,2/2] npu2-opencapi: Add opencapi support on ZZ

Message ID 20190709150558.14957-2-fbarrat@linux.ibm.com
State Accepted
Headers show
Series [v2,1/2] hdata: Align SMP link definitions with current HDAT spec | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (4db38a36b31045f0a116d388ddeac850b38c8680)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Frederic Barrat July 9, 2019, 3:05 p.m. UTC
This patch adds opencapi support on ZZ. It hard-codes the required
device tree entries for the NPU and links. The alternative was to use
HDAT, but it somehow proved too painful to do.

The new device tree entries activate the npu2 init code on ZZ. On
systems with no opencapi adapters, it should go unnoticed, as presence
detection will skip link training.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---

Desired for the branch that will be used for FW940

Changelog:
v2: user proper dt APIs and explain problems with current HDAT data (Oliver)

 hdata/spira.c          |  9 +++-
 platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 103 insertions(+), 2 deletions(-)

Comments

Oliver O'Halloran July 10, 2019, 3:43 a.m. UTC | #1
On Wed, Jul 10, 2019 at 1:06 AM Frederic Barrat <fbarrat@linux.ibm.com> wrote:
>
> This patch adds opencapi support on ZZ. It hard-codes the required
> device tree entries for the NPU and links. The alternative was to use
> HDAT, but it somehow proved too painful to do.
>
> The new device tree entries activate the npu2 init code on ZZ. On
> systems with no opencapi adapters, it should go unnoticed, as presence
> detection will skip link training.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>
> Desired for the branch that will be used for FW940
>
> Changelog:
> v2: user proper dt APIs and explain problems with current HDAT data (Oliver)

Thanks for doing that. Seems to have cleaned the patch up nicely.

>
>  hdata/spira.c          |  9 +++-
>  platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 103 insertions(+), 2 deletions(-)
>
> diff --git a/hdata/spira.c b/hdata/spira.c
> index 3d7e21cd..a23fd17c 100644
> --- a/hdata/spira.c
> +++ b/hdata/spira.c
> @@ -1369,7 +1369,14 @@ static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
>                 uint64_t speed = 0, nvlink_speed = 0;
>                 struct dt_node *node;
>
> -               /* only add a link node if this link is targeted at at device */
> +               /*
> +                * Only add a link node if this link is targeted at a
> +                * GPU device.
> +                *
> +                * If we ever activate it for an opencapi device, we
> +                * should revisit the link definitions hard-coded
> +                * on ZZ.
> +                */
>                 if (be32_to_cpu(link->usage) != SMP_LINK_USE_GPU)
>                         continue;
>
> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
> index f44c618c..f657b38e 100644
> --- a/platforms/ibm-fsp/zz.c
> +++ b/platforms/ibm-fsp/zz.c
> @@ -45,6 +45,97 @@ static const struct platform_ocapi zz_ocapi = {
>         .odl_phy_swap        = true,
>  };
>
> +#define NPU_BASE 0x5011000
> +#define NPU_SIZE 0x2c
> +#define NPU_INDIRECT0  0x8000000009010c3f /* OB0 - no OB3 on ZZ */
> +
> +static void create_link(struct dt_node *npu, int group, int index)
> +{
> +       struct dt_node *link;
> +       uint32_t lane_mask;
> +
> +       switch (index) {
> +       case 2:
> +               lane_mask = 0xf1e000; /* 0-3, 7-10 */
> +               break;
> +       case 3:
> +               lane_mask = 0x00078f; /* 13-16, 20-23 */
> +               break;
> +       default:
> +               assert(0);
> +       }
> +
> +       link = dt_new_addr(npu, "link", index);
> +       dt_add_property_string(link, "compatible", "ibm,npu-link");
> +       dt_add_property_cells(link, "ibm,npu-link-index", index);
> +       dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
> +       dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
> +       dt_add_property_cells(link, "ibm,npu-group-id", group);
> +       dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
> +}
> +
> +static void add_opencapi_dt_nodes(void)
> +{
> +       struct dt_node *npu, *xscom;
> +       int npu_index = 0;
> +       int phb_index = 7;
> +
> +       /*
> +        * In an ideal world, we should get all the NPU links
> +        * information from HDAT. But after some effort, HDAT is still
> +        * giving incorrect information for opencapi. As of this
> +        * writing:
> +        * 1. link usage is wrong for most FPGA cards (0xFFFF vs. 2)
> +        * 2. the 24-bit lane mask is aligned differently than on
> +        *    other platforms (witherspoon)
> +        * 3. connecting a link entry in HDAT to the real physical
> +        *    link will need extra work:
> +        *    - HDAT does presence detection and only lists links with
> +        *      an adapter, so we cannot use default ordering like on
> +        *      witherspoon
> +        *    - best option is probably the brick ID field (offset 8).
> +        *      It's coming straight from the MRW, but seems to match
> +        *      what we expect (2 or 3). Would need to be checked.
> +        *
> +        * To make things more fun, any change in the HDAT data needs
> +        * to be coordinated with PHYP, which is using (some of) those
> +        * fields.

I think Mike Vance said that phyp also ignores the HDAT entries today.
It might be worth having a chat to him about it if you haven't
already.

> +        * As a consequence:
> +        * 1. the hdat parsing code in skiboot remains disabled (for
> +        *    opencapi)
> +        * 2. we hard-code the NPU and links entries in the device
> +        *    tree.
> +        *
> +        * Getting the data from HDAT would have the advantage of
> +        * providing the real link speed (20.0 vs. 25.78125 gbps),
> +        * which is useful as there's one speed-dependent setting we
> +        * need to do when initializing the NPU. Our hard coded
> +        * definition assumes the higher speed and may need tuning in
> +        * debug scenario using a lower link speed.
> +        */

It might be worth adding a PR_DEBUG in or something saying that we're
assuming the phy is set to 25gbps to avoid surprising behaviour. Do
you have an nvram hack or something to change the assumed phy speed?

Looks good otherwise.

Reviewed-by: Oliver O'Halloran <oohall@gmail.com>
Frederic Barrat July 10, 2019, 4:12 p.m. UTC | #2
Le 10/07/2019 à 05:43, Oliver O'Halloran a écrit :
> On Wed, Jul 10, 2019 at 1:06 AM Frederic Barrat <fbarrat@linux.ibm.com> wrote:
>>
>> This patch adds opencapi support on ZZ. It hard-codes the required
>> device tree entries for the NPU and links. The alternative was to use
>> HDAT, but it somehow proved too painful to do.
>>
>> The new device tree entries activate the npu2 init code on ZZ. On
>> systems with no opencapi adapters, it should go unnoticed, as presence
>> detection will skip link training.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>
>> Desired for the branch that will be used for FW940
>>
>> Changelog:
>> v2: user proper dt APIs and explain problems with current HDAT data (Oliver)
> 
> Thanks for doing that. Seems to have cleaned the patch up nicely.
> 
>>
>>   hdata/spira.c          |  9 +++-
>>   platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 103 insertions(+), 2 deletions(-)
>>
>> diff --git a/hdata/spira.c b/hdata/spira.c
>> index 3d7e21cd..a23fd17c 100644
>> --- a/hdata/spira.c
>> +++ b/hdata/spira.c
>> @@ -1369,7 +1369,14 @@ static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
>>                  uint64_t speed = 0, nvlink_speed = 0;
>>                  struct dt_node *node;
>>
>> -               /* only add a link node if this link is targeted at at device */
>> +               /*
>> +                * Only add a link node if this link is targeted at a
>> +                * GPU device.
>> +                *
>> +                * If we ever activate it for an opencapi device, we
>> +                * should revisit the link definitions hard-coded
>> +                * on ZZ.
>> +                */
>>                  if (be32_to_cpu(link->usage) != SMP_LINK_USE_GPU)
>>                          continue;
>>
>> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
>> index f44c618c..f657b38e 100644
>> --- a/platforms/ibm-fsp/zz.c
>> +++ b/platforms/ibm-fsp/zz.c
>> @@ -45,6 +45,97 @@ static const struct platform_ocapi zz_ocapi = {
>>          .odl_phy_swap        = true,
>>   };
>>
>> +#define NPU_BASE 0x5011000
>> +#define NPU_SIZE 0x2c
>> +#define NPU_INDIRECT0  0x8000000009010c3f /* OB0 - no OB3 on ZZ */
>> +
>> +static void create_link(struct dt_node *npu, int group, int index)
>> +{
>> +       struct dt_node *link;
>> +       uint32_t lane_mask;
>> +
>> +       switch (index) {
>> +       case 2:
>> +               lane_mask = 0xf1e000; /* 0-3, 7-10 */
>> +               break;
>> +       case 3:
>> +               lane_mask = 0x00078f; /* 13-16, 20-23 */
>> +               break;
>> +       default:
>> +               assert(0);
>> +       }
>> +
>> +       link = dt_new_addr(npu, "link", index);
>> +       dt_add_property_string(link, "compatible", "ibm,npu-link");
>> +       dt_add_property_cells(link, "ibm,npu-link-index", index);
>> +       dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
>> +       dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
>> +       dt_add_property_cells(link, "ibm,npu-group-id", group);
>> +       dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
>> +}
>> +
>> +static void add_opencapi_dt_nodes(void)
>> +{
>> +       struct dt_node *npu, *xscom;
>> +       int npu_index = 0;
>> +       int phb_index = 7;
>> +
>> +       /*
>> +        * In an ideal world, we should get all the NPU links
>> +        * information from HDAT. But after some effort, HDAT is still
>> +        * giving incorrect information for opencapi. As of this
>> +        * writing:
>> +        * 1. link usage is wrong for most FPGA cards (0xFFFF vs. 2)
>> +        * 2. the 24-bit lane mask is aligned differently than on
>> +        *    other platforms (witherspoon)
>> +        * 3. connecting a link entry in HDAT to the real physical
>> +        *    link will need extra work:
>> +        *    - HDAT does presence detection and only lists links with
>> +        *      an adapter, so we cannot use default ordering like on
>> +        *      witherspoon
>> +        *    - best option is probably the brick ID field (offset 8).
>> +        *      It's coming straight from the MRW, but seems to match
>> +        *      what we expect (2 or 3). Would need to be checked.
>> +        *
>> +        * To make things more fun, any change in the HDAT data needs
>> +        * to be coordinated with PHYP, which is using (some of) those
>> +        * fields.
> 
> I think Mike Vance said that phyp also ignores the HDAT entries today.
> It might be worth having a chat to him about it if you haven't
> already.

I had started talking to him about it. The funny thing is that PHYP 
doesn't look at the link speed in hdat, which is really the only thing 
we need hdat for (talking for skiboot only). See below.


>> +        * As a consequence:
>> +        * 1. the hdat parsing code in skiboot remains disabled (for
>> +        *    opencapi)
>> +        * 2. we hard-code the NPU and links entries in the device
>> +        *    tree.
>> +        *
>> +        * Getting the data from HDAT would have the advantage of
>> +        * providing the real link speed (20.0 vs. 25.78125 gbps),
>> +        * which is useful as there's one speed-dependent setting we
>> +        * need to do when initializing the NPU. Our hard coded
>> +        * definition assumes the higher speed and may need tuning in
>> +        * debug scenario using a lower link speed.
>> +        */
> 
> It might be worth adding a PR_DEBUG in or something saying that we're
> assuming the phy is set to 25gbps to avoid surprising behaviour. Do
> you have an nvram hack or something to change the assumed phy speed?


We have a trace in phy_reset_complete() to tell what speed we set the 
link to.
Truth is we've never seen a 20 gbps link fail to train even if skiboot 
initializes that PHY setting (a.k.a RX_SPEED_SELECT) for 25.x! Which is 
really why I'm giving up with hdat integration. The PHY team tells us 
it's supposed to help with the eye margin though, so it could become a 
problem one day on platforms/cards/cables with lower signal quality. I 
guess I'll wait till it's needed to worry about it again.

Thanks for the review.

   Fred



> 
> Looks good otherwise.
> 
> Reviewed-by: Oliver O'Halloran <oohall@gmail.com>
>
Stewart Smith July 16, 2019, 4:41 a.m. UTC | #3
Frederic Barrat <fbarrat@linux.ibm.com> writes:
> This patch adds opencapi support on ZZ. It hard-codes the required
> device tree entries for the NPU and links. The alternative was to use
> HDAT, but it somehow proved too painful to do.
>
> The new device tree entries activate the npu2 init code on ZZ. On
> systems with no opencapi adapters, it should go unnoticed, as presence
> detection will skip link training.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>
> Desired for the branch that will be used for FW940
>
> Changelog:
> v2: user proper dt APIs and explain problems with current HDAT data
> (Oliver)

Thanks! Series merged to master as of 4cfc2f773ad55fe23d6d322bf1ccec6a0c0091f7
diff mbox series

Patch

diff --git a/hdata/spira.c b/hdata/spira.c
index 3d7e21cd..a23fd17c 100644
--- a/hdata/spira.c
+++ b/hdata/spira.c
@@ -1369,7 +1369,14 @@  static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
 		uint64_t speed = 0, nvlink_speed = 0;
 		struct dt_node *node;
 
-		/* only add a link node if this link is targeted at at device */
+		/*
+		 * Only add a link node if this link is targeted at a
+		 * GPU device.
+		 *
+		 * If we ever activate it for an opencapi device, we
+		 * should revisit the link definitions hard-coded
+		 * on ZZ.
+		 */
 		if (be32_to_cpu(link->usage) != SMP_LINK_USE_GPU)
 			continue;
 
diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
index f44c618c..f657b38e 100644
--- a/platforms/ibm-fsp/zz.c
+++ b/platforms/ibm-fsp/zz.c
@@ -45,6 +45,97 @@  static const struct platform_ocapi zz_ocapi = {
 	.odl_phy_swap        = true,
 };
 
+#define NPU_BASE 0x5011000
+#define NPU_SIZE 0x2c
+#define NPU_INDIRECT0	0x8000000009010c3f /* OB0 - no OB3 on ZZ */
+
+static void create_link(struct dt_node *npu, int group, int index)
+{
+	struct dt_node *link;
+	uint32_t lane_mask;
+
+	switch (index) {
+	case 2:
+		lane_mask = 0xf1e000; /* 0-3, 7-10 */
+		break;
+	case 3:
+		lane_mask = 0x00078f; /* 13-16, 20-23 */
+		break;
+	default:
+		assert(0);
+	}
+
+	link = dt_new_addr(npu, "link", index);
+	dt_add_property_string(link, "compatible", "ibm,npu-link");
+	dt_add_property_cells(link, "ibm,npu-link-index", index);
+	dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
+	dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
+	dt_add_property_cells(link, "ibm,npu-group-id", group);
+	dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
+}
+
+static void add_opencapi_dt_nodes(void)
+{
+	struct dt_node *npu, *xscom;
+	int npu_index = 0;
+	int phb_index = 7;
+
+	/*
+	 * In an ideal world, we should get all the NPU links
+	 * information from HDAT. But after some effort, HDAT is still
+	 * giving incorrect information for opencapi. As of this
+	 * writing:
+	 * 1. link usage is wrong for most FPGA cards (0xFFFF vs. 2)
+	 * 2. the 24-bit lane mask is aligned differently than on
+	 *    other platforms (witherspoon)
+	 * 3. connecting a link entry in HDAT to the real physical
+	 *    link will need extra work:
+	 *    - HDAT does presence detection and only lists links with
+	 *      an adapter, so we cannot use default ordering like on
+	 *      witherspoon
+	 *    - best option is probably the brick ID field (offset 8).
+	 *      It's coming straight from the MRW, but seems to match
+	 *      what we expect (2 or 3). Would need to be checked.
+	 *
+	 * To make things more fun, any change in the HDAT data needs
+	 * to be coordinated with PHYP, which is using (some of) those
+	 * fields.
+	 *
+	 * As a consequence:
+	 * 1. the hdat parsing code in skiboot remains disabled (for
+	 *    opencapi)
+	 * 2. we hard-code the NPU and links entries in the device
+	 *    tree.
+	 *
+	 * Getting the data from HDAT would have the advantage of
+	 * providing the real link speed (20.0 vs. 25.78125 gbps),
+	 * which is useful as there's one speed-dependent setting we
+	 * need to do when initializing the NPU. Our hard coded
+	 * definition assumes the higher speed and may need tuning in
+	 * debug scenario using a lower link speed.
+	 */
+	dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
+		/*
+		 * our hdat parsing code may create NPU nodes with no
+		 * links, so let's make sure we start from a clean
+		 * state
+		 */
+		npu = dt_find_by_name_addr(xscom, "npu", NPU_BASE);
+		if (npu)
+			dt_free(npu);
+
+		npu = dt_new_addr(xscom, "npu", NPU_BASE);
+		dt_add_property_cells(npu, "reg", NPU_BASE, NPU_SIZE);
+		dt_add_property_strings(npu, "compatible", "ibm,power9-npu");
+		dt_add_property_cells(npu, "ibm,npu-index", npu_index++);
+		dt_add_property_cells(npu, "ibm,phb-index", phb_index++);
+		dt_add_property_cells(npu, "ibm,npu-links", 2);
+
+		create_link(npu, 1, 2);
+		create_link(npu, 2, 3);
+	}
+}
+
 static bool zz_probe(void)
 {
 	/* FIXME: make this neater when the dust settles */
@@ -54,8 +145,11 @@  static bool zz_probe(void)
 	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u") ||
 	    dt_node_is_compatible(dt_root, "ibm,zz-1s4u+gen4") ||
 	    dt_node_is_compatible(dt_root, "ibm,zz-2s2u+gen4") ||
-	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4"))
+	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4")) {
+
+		add_opencapi_dt_nodes();
 		return true;
+	}
 
 	return false;
 }