diff mbox series

npu2-opencapi: Add opencapi support on ZZ

Message ID 20190708103005.32195-1-fbarrat@linux.ibm.com
State Changes Requested
Delegated to: Oliver O'Halloran
Headers show
Series npu2-opencapi: Add opencapi support on ZZ | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (4db38a36b31045f0a116d388ddeac850b38c8680)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Frederic Barrat July 8, 2019, 10:30 a.m. UTC
This patch adds opencapi support on ZZ. It hard-codes the required
device tree entries for the NPU and links. The alternative was to use
HDAT, but it somehow proved to painful to do.

The new device tree entries activate the npu2 init code on ZZ. On
systems with no opencapi adapters, it should go unnoticed, as presence
detection will skip link training.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
---

Stewart, Oliver, Vasant: it's hard to see clearly in the ever changing
plan, but opencapi support may be desired for FW940. So hopefully this
patch can be merged in whatever skiboot branch that fw will use (is
that decided yet?).


 hdata/spira.c          |  8 +++-
 platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 2 deletions(-)

Comments

Oliver O'Halloran July 9, 2019, 4:03 a.m. UTC | #1
On Mon, Jul 8, 2019 at 8:30 PM Frederic Barrat <fbarrat@linux.ibm.com> wrote:
>
> This patch adds opencapi support on ZZ. It hard-codes the required
> device tree entries for the NPU and links. The alternative was to use
> HDAT, but it somehow proved to painful to do.

I know that feeling...

> The new device tree entries activate the npu2 init code on ZZ. On
> systems with no opencapi adapters, it should go unnoticed, as presence
> detection will skip link training.
>
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> ---
>
> Stewart, Oliver, Vasant: it's hard to see clearly in the ever changing
> plan, but opencapi support may be desired for FW940. So hopefully this
> patch can be merged in whatever skiboot branch that fw will use (is
> that decided yet?).
>
>
>  hdata/spira.c          |  8 +++-
>  platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 102 insertions(+), 2 deletions(-)
>
> diff --git a/hdata/spira.c b/hdata/spira.c
> index 6891a9c7..c330a1fa 100644
> --- a/hdata/spira.c
> +++ b/hdata/spira.c
> @@ -1369,7 +1369,13 @@ static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
>                 uint64_t speed = 0, nvlink_speed = 0;
>                 struct dt_node *node;
>
> -               /* only add a link node if this link is targeted at at device */
> +               /*
> +                * only add a link node if this link is targeted at a
> +                * (GPU) device.
> +                * If we ever activate it for an opencapi device,
> +                * we'll need to revisit the link definitions
> +                * hard-coded on ZZ
> +                */
>                 if (be32_to_cpu(link->usage) != SMP_LINK_USE_DEVICE)
>                         continue;
>
> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
> index f44c618c..55078003 100644
> --- a/platforms/ibm-fsp/zz.c
> +++ b/platforms/ibm-fsp/zz.c
> @@ -45,6 +45,97 @@ static const struct platform_ocapi zz_ocapi = {
>         .odl_phy_swap        = true,
>  };
>
> +#define NPU_BASE 0x5011000
> +#define NPU_SIZE 0x2c
> +#define NPU_INDIRECT0  0x8000000009010c3f /* OB0 - no OB3 on ZZ */
> +
> +static void create_link(struct dt_node *npu, int group, int index)
> +{
> +       struct dt_node *link;
> +       uint32_t lane_mask;
> +       char namebuf[32];
> +
> +       snprintf(namebuf, sizeof(namebuf), "link@%x", index);
> +       link = dt_new(npu, namebuf);
> +
> +       dt_add_property_string(link, "compatible", "ibm,npu-link");
> +       dt_add_property_cells(link, "ibm,npu-link-index", index);
> +
> +       switch (index) {
> +       case 2:
> +               lane_mask = 0xf1e000; /* 0-3, 7-10 */
> +               break;
> +       case 3:
> +               lane_mask = 0x00078f; /* 13-16, 20-23 */
> +               break;
> +       default:
> +               assert(0);
> +       }
> +
> +       dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
> +       dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
> +       dt_add_property_cells(link, "ibm,npu-group-id", group);
> +       dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
> +}
> +

> +static void zz_opencapi_setup(void)

Call it add_opencapi_dt_nodes() or something. this isn't setting up anything.

> +{
> +       struct dt_node *npu, *xscom;
> +       int npu_index = 0;
> +       int phb_index = 7;
> +       struct dt_property *prop;
> +       char namebuf[32];
Reverse
christmas
tree
declarations

wait, sorry:

declarations
christmas
Reverse
tree

> +
> +       /*
> +        * In an ideal world, we should get all the NPU links
> +        * information from HDAT. But after some effort, HDAT is still
> +        * giving surprising information for opencapi.

Can you be a little more specific about what "surprising" means? At
some point we'll want to remove this and without more context than
"it's broken" it's hard to judge when that's safe to do so.

> +        * As a consequence:
> +        * 1. the hdat parsing code in skiboot remains disabled (for
> +        *    opencapi)
> +        * 2. we hard-code the NPU and links entries in the device
> +        *    tree.
> +        *
> +        * Getting the data from HDAT would have the advantage of
> +        * providing the real link speed (20.0 vs. 25.78125 gbps),
> +        * which is useful as there's one speed-dependent setting we
> +        * need to do when initializing the NPU. Our hard coded
> +        * definition assumes the higher speed and may need tuning in
> +        * debug scenario using a lower link speed.
> +        */
> +       snprintf(namebuf, sizeof(namebuf), "npu@%x", NPU_BASE);
> +       dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
> +               npu = dt_find_by_name(xscom, namebuf);

use dt_find_by_name_addr() and you can drop namebuf.

> +               if (npu) {
> +                       /*
> +                        * our hdat parsing code may create NPU nodes
> +                        * with no links, so we just refresh some
> +                        * properties if needed
> +                        */
> +                       prop = __dt_find_property(npu, "ibm,npu-index");
> +                       if (prop)
> +                               dt_del_property(npu, prop);
> +                       prop = __dt_find_property(npu, "ibm,phb-index");
> +                       if (prop)
> +                               dt_del_property(npu, prop);
> +                       prop = __dt_find_property(npu, "ibm,npu-links");
> +                       if (prop)
> +                               dt_del_property(npu, prop);

Is there anything in the npu node you keep? Why not just nuke the
whole thing with dt_free(npu) and re-create it from scratch? If HDAT
is "fixed" enough to start creating link nodes we might run into
problems since what gets created based on HDAT may might clash with
what you're adding here which will probably break booting.

Also, dt_check_del_prop() does this
find-and-delete-property-if-it-exists. It has a stupid name because I
couldn't think of a better one.

> +               } else {
> +                       npu = dt_new(xscom, namebuf);
dt_new_addr()

> +                       dt_add_property_cells(npu, "reg", NPU_BASE, NPU_SIZE);
> +                       dt_add_property_strings(npu, "compatible",
> +                                               "ibm,power9-npu");
> +               }
> +               dt_add_property_cells(npu, "ibm,npu-index", npu_index++);
> +               dt_add_property_cells(npu, "ibm,phb-index", phb_index++);
> +               dt_add_property_cells(npu, "ibm,npu-links", 2);
> +
> +               create_link(npu, 1, 2);
> +               create_link(npu, 2, 3);
> +       }
> +}
> +
>  static bool zz_probe(void)
>  {
>         /* FIXME: make this neater when the dust settles */
> @@ -54,8 +145,11 @@ static bool zz_probe(void)
>             dt_node_is_compatible(dt_root, "ibm,zz-2s4u") ||
>             dt_node_is_compatible(dt_root, "ibm,zz-1s4u+gen4") ||
>             dt_node_is_compatible(dt_root, "ibm,zz-2s2u+gen4") ||
> -           dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4"))
> +           dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4")) {
> +
> +               zz_opencapi_setup();
>                 return true;
> +       }
>
>         return false;
>  }
> --
> 2.21.0
>
Frederic Barrat July 9, 2019, 9:10 a.m. UTC | #2
Thanks for the review, I'll send a new version soon.
And thanks for openening my eyes on some of those dt APIs.

   Fred


Le 09/07/2019 à 06:03, Oliver O'Halloran a écrit :
> On Mon, Jul 8, 2019 at 8:30 PM Frederic Barrat <fbarrat@linux.ibm.com> wrote:
>>
>> This patch adds opencapi support on ZZ. It hard-codes the required
>> device tree entries for the NPU and links. The alternative was to use
>> HDAT, but it somehow proved to painful to do.
> 
> I know that feeling...
> 
>> The new device tree entries activate the npu2 init code on ZZ. On
>> systems with no opencapi adapters, it should go unnoticed, as presence
>> detection will skip link training.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> ---
>>
>> Stewart, Oliver, Vasant: it's hard to see clearly in the ever changing
>> plan, but opencapi support may be desired for FW940. So hopefully this
>> patch can be merged in whatever skiboot branch that fw will use (is
>> that decided yet?).
>>
>>
>>   hdata/spira.c          |  8 +++-
>>   platforms/ibm-fsp/zz.c | 96 +++++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 102 insertions(+), 2 deletions(-)
>>
>> diff --git a/hdata/spira.c b/hdata/spira.c
>> index 6891a9c7..c330a1fa 100644
>> --- a/hdata/spira.c
>> +++ b/hdata/spira.c
>> @@ -1369,7 +1369,13 @@ static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
>>                  uint64_t speed = 0, nvlink_speed = 0;
>>                  struct dt_node *node;
>>
>> -               /* only add a link node if this link is targeted at at device */
>> +               /*
>> +                * only add a link node if this link is targeted at a
>> +                * (GPU) device.
>> +                * If we ever activate it for an opencapi device,
>> +                * we'll need to revisit the link definitions
>> +                * hard-coded on ZZ
>> +                */
>>                  if (be32_to_cpu(link->usage) != SMP_LINK_USE_DEVICE)
>>                          continue;
>>
>> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
>> index f44c618c..55078003 100644
>> --- a/platforms/ibm-fsp/zz.c
>> +++ b/platforms/ibm-fsp/zz.c
>> @@ -45,6 +45,97 @@ static const struct platform_ocapi zz_ocapi = {
>>          .odl_phy_swap        = true,
>>   };
>>
>> +#define NPU_BASE 0x5011000
>> +#define NPU_SIZE 0x2c
>> +#define NPU_INDIRECT0  0x8000000009010c3f /* OB0 - no OB3 on ZZ */
>> +
>> +static void create_link(struct dt_node *npu, int group, int index)
>> +{
>> +       struct dt_node *link;
>> +       uint32_t lane_mask;
>> +       char namebuf[32];
>> +
>> +       snprintf(namebuf, sizeof(namebuf), "link@%x", index);
>> +       link = dt_new(npu, namebuf);
>> +
>> +       dt_add_property_string(link, "compatible", "ibm,npu-link");
>> +       dt_add_property_cells(link, "ibm,npu-link-index", index);
>> +
>> +       switch (index) {
>> +       case 2:
>> +               lane_mask = 0xf1e000; /* 0-3, 7-10 */
>> +               break;
>> +       case 3:
>> +               lane_mask = 0x00078f; /* 13-16, 20-23 */
>> +               break;
>> +       default:
>> +               assert(0);
>> +       }
>> +
>> +       dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
>> +       dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
>> +       dt_add_property_cells(link, "ibm,npu-group-id", group);
>> +       dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
>> +}
>> +
> 
>> +static void zz_opencapi_setup(void)
> 
> Call it add_opencapi_dt_nodes() or something. this isn't setting up anything.
> 
>> +{
>> +       struct dt_node *npu, *xscom;
>> +       int npu_index = 0;
>> +       int phb_index = 7;
>> +       struct dt_property *prop;
>> +       char namebuf[32];
> Reverse
> christmas
> tree
> declarations
> 
> wait, sorry:
> 
> declarations
> christmas
> Reverse
> tree
> 
>> +
>> +       /*
>> +        * In an ideal world, we should get all the NPU links
>> +        * information from HDAT. But after some effort, HDAT is still
>> +        * giving surprising information for opencapi.
> 
> Can you be a little more specific about what "surprising" means? At
> some point we'll want to remove this and without more context than
> "it's broken" it's hard to judge when that's safe to do so.
> 
>> +        * As a consequence:
>> +        * 1. the hdat parsing code in skiboot remains disabled (for
>> +        *    opencapi)
>> +        * 2. we hard-code the NPU and links entries in the device
>> +        *    tree.
>> +        *
>> +        * Getting the data from HDAT would have the advantage of
>> +        * providing the real link speed (20.0 vs. 25.78125 gbps),
>> +        * which is useful as there's one speed-dependent setting we
>> +        * need to do when initializing the NPU. Our hard coded
>> +        * definition assumes the higher speed and may need tuning in
>> +        * debug scenario using a lower link speed.
>> +        */
>> +       snprintf(namebuf, sizeof(namebuf), "npu@%x", NPU_BASE);
>> +       dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
>> +               npu = dt_find_by_name(xscom, namebuf);
> 
> use dt_find_by_name_addr() and you can drop namebuf.
> 
>> +               if (npu) {
>> +                       /*
>> +                        * our hdat parsing code may create NPU nodes
>> +                        * with no links, so we just refresh some
>> +                        * properties if needed
>> +                        */
>> +                       prop = __dt_find_property(npu, "ibm,npu-index");
>> +                       if (prop)
>> +                               dt_del_property(npu, prop);
>> +                       prop = __dt_find_property(npu, "ibm,phb-index");
>> +                       if (prop)
>> +                               dt_del_property(npu, prop);
>> +                       prop = __dt_find_property(npu, "ibm,npu-links");
>> +                       if (prop)
>> +                               dt_del_property(npu, prop);
> 
> Is there anything in the npu node you keep? Why not just nuke the
> whole thing with dt_free(npu) and re-create it from scratch? If HDAT
> is "fixed" enough to start creating link nodes we might run into
> problems since what gets created based on HDAT may might clash with
> what you're adding here which will probably break booting.
> 
> Also, dt_check_del_prop() does this
> find-and-delete-property-if-it-exists. It has a stupid name because I
> couldn't think of a better one.
> 
>> +               } else {
>> +                       npu = dt_new(xscom, namebuf);
> dt_new_addr()
> 
>> +                       dt_add_property_cells(npu, "reg", NPU_BASE, NPU_SIZE);
>> +                       dt_add_property_strings(npu, "compatible",
>> +                                               "ibm,power9-npu");
>> +               }
>> +               dt_add_property_cells(npu, "ibm,npu-index", npu_index++);
>> +               dt_add_property_cells(npu, "ibm,phb-index", phb_index++);
>> +               dt_add_property_cells(npu, "ibm,npu-links", 2);
>> +
>> +               create_link(npu, 1, 2);
>> +               create_link(npu, 2, 3);
>> +       }
>> +}
>> +
>>   static bool zz_probe(void)
>>   {
>>          /* FIXME: make this neater when the dust settles */
>> @@ -54,8 +145,11 @@ static bool zz_probe(void)
>>              dt_node_is_compatible(dt_root, "ibm,zz-2s4u") ||
>>              dt_node_is_compatible(dt_root, "ibm,zz-1s4u+gen4") ||
>>              dt_node_is_compatible(dt_root, "ibm,zz-2s2u+gen4") ||
>> -           dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4"))
>> +           dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4")) {
>> +
>> +               zz_opencapi_setup();
>>                  return true;
>> +       }
>>
>>          return false;
>>   }
>> --
>> 2.21.0
>>
>
diff mbox series

Patch

diff --git a/hdata/spira.c b/hdata/spira.c
index 6891a9c7..c330a1fa 100644
--- a/hdata/spira.c
+++ b/hdata/spira.c
@@ -1369,7 +1369,13 @@  static void add_npu(struct dt_node *xscom, const struct HDIF_array_hdr *links,
 		uint64_t speed = 0, nvlink_speed = 0;
 		struct dt_node *node;
 
-		/* only add a link node if this link is targeted at at device */
+		/*
+		 * only add a link node if this link is targeted at a
+		 * (GPU) device.
+		 * If we ever activate it for an opencapi device,
+		 * we'll need to revisit the link definitions
+		 * hard-coded on ZZ
+		 */
 		if (be32_to_cpu(link->usage) != SMP_LINK_USE_DEVICE)
 			continue;
 
diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
index f44c618c..55078003 100644
--- a/platforms/ibm-fsp/zz.c
+++ b/platforms/ibm-fsp/zz.c
@@ -45,6 +45,97 @@  static const struct platform_ocapi zz_ocapi = {
 	.odl_phy_swap        = true,
 };
 
+#define NPU_BASE 0x5011000
+#define NPU_SIZE 0x2c
+#define NPU_INDIRECT0	0x8000000009010c3f /* OB0 - no OB3 on ZZ */
+
+static void create_link(struct dt_node *npu, int group, int index)
+{
+	struct dt_node *link;
+	uint32_t lane_mask;
+	char namebuf[32];
+
+	snprintf(namebuf, sizeof(namebuf), "link@%x", index);
+	link = dt_new(npu, namebuf);
+
+	dt_add_property_string(link, "compatible", "ibm,npu-link");
+	dt_add_property_cells(link, "ibm,npu-link-index", index);
+
+	switch (index) {
+	case 2:
+		lane_mask = 0xf1e000; /* 0-3, 7-10 */
+		break;
+	case 3:
+		lane_mask = 0x00078f; /* 13-16, 20-23 */
+		break;
+	default:
+		assert(0);
+	}
+
+	dt_add_property_u64s(link, "ibm,npu-phy", NPU_INDIRECT0);
+	dt_add_property_cells(link, "ibm,npu-lane-mask", lane_mask);
+	dt_add_property_cells(link, "ibm,npu-group-id", group);
+	dt_add_property_u64s(link, "ibm,link-speed", 25000000000ul);
+}
+
+static void zz_opencapi_setup(void)
+{
+	struct dt_node *npu, *xscom;
+	int npu_index = 0;
+	int phb_index = 7;
+	struct dt_property *prop;
+	char namebuf[32];
+
+	/*
+	 * In an ideal world, we should get all the NPU links
+	 * information from HDAT. But after some effort, HDAT is still
+	 * giving surprising information for opencapi.
+	 * As a consequence:
+	 * 1. the hdat parsing code in skiboot remains disabled (for
+	 *    opencapi)
+	 * 2. we hard-code the NPU and links entries in the device
+	 *    tree.
+	 *
+	 * Getting the data from HDAT would have the advantage of
+	 * providing the real link speed (20.0 vs. 25.78125 gbps),
+	 * which is useful as there's one speed-dependent setting we
+	 * need to do when initializing the NPU. Our hard coded
+	 * definition assumes the higher speed and may need tuning in
+	 * debug scenario using a lower link speed.
+	 */
+	snprintf(namebuf, sizeof(namebuf), "npu@%x", NPU_BASE);
+	dt_for_each_compatible(dt_root, xscom, "ibm,xscom") {
+		npu = dt_find_by_name(xscom, namebuf);
+		if (npu) {
+			/*
+			 * our hdat parsing code may create NPU nodes
+			 * with no links, so we just refresh some
+			 * properties if needed
+			 */
+			prop = __dt_find_property(npu, "ibm,npu-index");
+			if (prop)
+				dt_del_property(npu, prop);
+			prop = __dt_find_property(npu, "ibm,phb-index");
+			if (prop)
+				dt_del_property(npu, prop);
+			prop = __dt_find_property(npu, "ibm,npu-links");
+			if (prop)
+				dt_del_property(npu, prop);
+		} else {
+			npu = dt_new(xscom, namebuf);
+			dt_add_property_cells(npu, "reg", NPU_BASE, NPU_SIZE);
+			dt_add_property_strings(npu, "compatible",
+						"ibm,power9-npu");
+		}
+		dt_add_property_cells(npu, "ibm,npu-index", npu_index++);
+		dt_add_property_cells(npu, "ibm,phb-index", phb_index++);
+		dt_add_property_cells(npu, "ibm,npu-links", 2);
+
+		create_link(npu, 1, 2);
+		create_link(npu, 2, 3);
+	}
+}
+
 static bool zz_probe(void)
 {
 	/* FIXME: make this neater when the dust settles */
@@ -54,8 +145,11 @@  static bool zz_probe(void)
 	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u") ||
 	    dt_node_is_compatible(dt_root, "ibm,zz-1s4u+gen4") ||
 	    dt_node_is_compatible(dt_root, "ibm,zz-2s2u+gen4") ||
-	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4"))
+	    dt_node_is_compatible(dt_root, "ibm,zz-2s4u+gen4")) {
+
+		zz_opencapi_setup();
 		return true;
+	}
 
 	return false;
 }