diff mbox series

[1/4] platforms/astbmc/witherspoon: Rework NPU presence detection

Message ID 20181018011617.14191-1-andrew.donnellan@au1.ibm.com
State Changes Requested
Headers show
Series [1/4] platforms/astbmc/witherspoon: Rework NPU presence detection | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied

Commit Message

Andrew Donnellan Oct. 18, 2018, 1:16 a.m. UTC
Rework NPU presence detection in preparation for supporting both NVLink and
OpenCAPI devices operating simultaneously on the same NPU.

If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is connected
to GPU#1, the GPU will only receive 2 links rather than the usual 3.

The reason for this is that without the OpenCAPI card, the GPU would be
use links 3-5, connected to NPU bricks 3-5, which needs both stacks 1 and 2
to be in NVLink mode.

However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1, we
need to use NPU bricks 2-3, which means stack 1 must be set in OpenCAPI
mode. As such, the GPU will be restricted to using links 4 and 5.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
---
 platforms/astbmc/witherspoon.c | 83 ++++++++++++++++++++++++++++--------------
 1 file changed, 55 insertions(+), 28 deletions(-)

Comments

Frederic Barrat Oct. 18, 2018, 4:02 p.m. UTC | #1
Le 18/10/2018 à 03:16, Andrew Donnellan a écrit :
> Rework NPU presence detection in preparation for supporting both NVLink and
> OpenCAPI devices operating simultaneously on the same NPU.
> 
> If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is connected
> to GPU#1, the GPU will only receive 2 links rather than the usual 3.
> 
> The reason for this is that without the OpenCAPI card, the GPU would be
> use links 3-5, connected to NPU bricks 3-5, which needs both stacks 1 and 2
> to be in NVLink mode.
> 
> However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1, we
> need to use NPU bricks 2-3, which means stack 1 must be set in OpenCAPI
> mode. As such, the GPU will be restricted to using links 4 and 5.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> ---
>   platforms/astbmc/witherspoon.c | 83 ++++++++++++++++++++++++++++--------------
>   1 file changed, 55 insertions(+), 28 deletions(-)
> 
> diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
> index f45f739fbb2c..9cb3789acbc8 100644
> --- a/platforms/astbmc/witherspoon.c
> +++ b/platforms/astbmc/witherspoon.c
> @@ -232,7 +232,8 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
>   	struct dt_node *dn;
>   	int rc;
> 
> -	bool gpu0_present, gpu1_present;
> +	enum npu2_dev_type gpu0_type = NPU2_DEV_TYPE_UNKNOWN;
> +	enum npu2_dev_type gpu1_type = NPU2_DEV_TYPE_UNKNOWN;
> 
>   	if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
>   		prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
> @@ -261,16 +262,6 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
>   		return;
>   	}
> 
> -	gpu0_present = occ_get_gpu_presence(chip, 0);
> -	if (gpu0_present) {
> -		prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
> -	}
> -
> -	gpu1_present = occ_get_gpu_presence(chip, 1);
> -	if (gpu1_present) {
> -		prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
> -	}
> -
>   	/* Set pins to input */
>   	state = 0xff;
>   	rc = i2c_request_send(i2c_port_id,
> @@ -287,40 +278,76 @@ static void witherspoon_npu2_device_detect(struct npu2 *npu)
>   	if (rc)
>   		goto i2c_failed;
> 
> -	if (gpu0_present) {
> +	if (occ_get_gpu_presence(chip, 0)) {
> +		prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
>   		if (state & (1 << 0)) {
>   			prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
>   			      chip->id);
> -			/*
> -			 * On witherspoon, bricks 2 and 3 are connected to
> -			 * the lanes matching links 1 and 0 in OpenCAPI mode.
> -			 */
> -			set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
> -			/* We current don't support using the second link */
> -			set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
> +			gpu0_type = NPU2_DEV_TYPE_OPENCAPI;
>   		} else {
>   			prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
>   			      chip->id);
> -			set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
> -			set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
> -			set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
> +			gpu0_type = NPU2_DEV_TYPE_NVLINK;
>   		}
>   	}

I would add a comment to explain that the value of the "state" variable 
is floating if there's no device. We suffered enough to get it working ;-)


> -	if (gpu1_present) {
> +	if (occ_get_gpu_presence(chip, 1)) {
> +		prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
>   		if (state & (1 << 1)) {
>   			prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
>   			      chip->id);
> -			set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
> -			/* We current don't support using the second link */
> -			set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
> +			gpu1_type = NPU2_DEV_TYPE_OPENCAPI;
>   		} else {
>   			prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
>   			      chip->id);
> +			gpu1_type = NPU2_DEV_TYPE_NVLINK;
> +		}
> +	}
> +
> +	if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
> +		/*
> +		 * On witherspoon, bricks 2 and 3 are connected to
> +		 * the lanes matching links 1 and 0 in OpenCAPI mode.
> +		 */
> +		set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
> +		/* We currently don't support using the second link */
> +		set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
> +	}

I think we have a problem here. Not because of this patch, but the 
previous one which introduced the link to brick mapping.
We're targeting the "middle" group of lanes (4-6 11-12 17-19), that's 
the lanes connected to the connector 5 of the GPU slot. So that would be 
link 1 and not link 0 (for brick 3).
I think what we want is:
	set_link_details(npu, 0, 2, NPU2_DEV_TYPE_UNKNOWN);
	set_link_details(npu, 1, 3, NPU2_DEV_TYPE_OPENCAPI);

The surprising thing is that it trains in both cases! It should only 
affect the PHY reset sequence and calibration, and not the rest of the 
training sequence, so we may have been lucky that the default inits were 
close enough.

Link 4 below is good.

   Fred



> +
> +	if (gpu0_type == NPU2_DEV_TYPE_NVLINK) {
> +		set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
> +		set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
> +		set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
> +	}
> +
> +	if (gpu1_type == NPU2_DEV_TYPE_OPENCAPI) {
> +		set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
> +		/* We currently don't support using the second link */
> +		set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
> +	}
> +
> +	/*
> +	 * If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is
> +	 * connected to GPU#1, the GPU will only receive 2 links rather than the
> +	 * usual 3.
> +	 *
> +	 * The reason for this is that without the OpenCAPI card, the GPU would
> +	 * be use links 3-5, connected to NPU bricks 3-5, which needs both
> +	 * stacks 1 and 2 to be in NVLink mode.
> +	 *
> +	 * However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1,
> +	 * we need to use NPU bricks 2-3, which means stack 1 must be set in
> +	 * OpenCAPI mode. As such, the GPU will be restricted to using links 4
> +	 * and 5.
> +	 */
> +	if (gpu1_type == NPU2_DEV_TYPE_NVLINK) {
> +		if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
> +			prlog(PR_WARNING, "PLAT: Chip %d GPU#1 will operate at reduced performance due to presence of OpenCAPI device. For optimal performance, swap device locations\n", chip->id);
> +		} else {
>   			set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
> -			set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
> -			set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
>   		}
> +		set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
> +		set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
>   	}
> 
>   	return;
>
Andrew Donnellan Oct. 19, 2018, 4:49 a.m. UTC | #2
On 19/10/18 3:02 am, Frederic Barrat wrote:
>> -    if (gpu0_present) {
>> +    if (occ_get_gpu_presence(chip, 0)) {
>> +        prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
>>           if (state & (1 << 0)) {
>>               prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
>>                     chip->id);
>> -            /*
>> -             * On witherspoon, bricks 2 and 3 are connected to
>> -             * the lanes matching links 1 and 0 in OpenCAPI mode.
>> -             */
>> -            set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
>> -            /* We current don't support using the second link */
>> -            set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
>> +            gpu0_type = NPU2_DEV_TYPE_OPENCAPI;
>>           } else {
>>               prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
>>                     chip->id);
>> -            set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
>> -            set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
>> -            set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
>> +            gpu0_type = NPU2_DEV_TYPE_NVLINK;
>>           }
>>       }
> 
> I would add a comment to explain that the value of the "state" variable 
> is floating if there's no device. We suffered enough to get it working ;-)
> 

Will do.

> 
>> -    if (gpu1_present) {
>> +    if (occ_get_gpu_presence(chip, 1)) {
>> +        prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
>>           if (state & (1 << 1)) {
>>               prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
>>                     chip->id);
>> -            set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
>> -            /* We current don't support using the second link */
>> -            set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
>> +            gpu1_type = NPU2_DEV_TYPE_OPENCAPI;
>>           } else {
>>               prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
>>                     chip->id);
>> +            gpu1_type = NPU2_DEV_TYPE_NVLINK;
>> +        }
>> +    }
>> +
>> +    if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
>> +        /*
>> +         * On witherspoon, bricks 2 and 3 are connected to
>> +         * the lanes matching links 1 and 0 in OpenCAPI mode.
>> +         */
>> +        set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
>> +        /* We currently don't support using the second link */
>> +        set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
>> +    }
> 
> I think we have a problem here. Not because of this patch, but the 
> previous one which introduced the link to brick mapping.
> We're targeting the "middle" group of lanes (4-6 11-12 17-19), that's 
> the lanes connected to the connector 5 of the GPU slot. So that would be 
> link 1 and not link 0 (for brick 3).
> I think what we want is:
>      set_link_details(npu, 0, 2, NPU2_DEV_TYPE_UNKNOWN);
>      set_link_details(npu, 1, 3, NPU2_DEV_TYPE_OPENCAPI);
> 
> The surprising thing is that it trains in both cases! It should only 
> affect the PHY reset sequence and calibration, and not the rest of the 
> training sequence, so we may have been lucky that the default inits were 
> close enough.
> 
> Link 4 below is good.

I'm not entirely sure how I got to that, I remember having to put a fair 
bit of thought into it, but I can't see how I got there any more. I 
think you're right, and I'll change this in v2.
diff mbox series

Patch

diff --git a/platforms/astbmc/witherspoon.c b/platforms/astbmc/witherspoon.c
index f45f739fbb2c..9cb3789acbc8 100644
--- a/platforms/astbmc/witherspoon.c
+++ b/platforms/astbmc/witherspoon.c
@@ -232,7 +232,8 @@  static void witherspoon_npu2_device_detect(struct npu2 *npu)
 	struct dt_node *dn;
 	int rc;
 
-	bool gpu0_present, gpu1_present;
+	enum npu2_dev_type gpu0_type = NPU2_DEV_TYPE_UNKNOWN;
+	enum npu2_dev_type gpu1_type = NPU2_DEV_TYPE_UNKNOWN;
 
 	if (witherspoon_type != WITHERSPOON_TYPE_REDBUD) {
 		prlog(PR_DEBUG, "PLAT: Setting all NPU links to NVLink, OpenCAPI only supported on Redbud\n");
@@ -261,16 +262,6 @@  static void witherspoon_npu2_device_detect(struct npu2 *npu)
 		return;
 	}
 
-	gpu0_present = occ_get_gpu_presence(chip, 0);
-	if (gpu0_present) {
-		prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
-	}
-
-	gpu1_present = occ_get_gpu_presence(chip, 1);
-	if (gpu1_present) {
-		prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
-	}
-
 	/* Set pins to input */
 	state = 0xff;
 	rc = i2c_request_send(i2c_port_id,
@@ -287,40 +278,76 @@  static void witherspoon_npu2_device_detect(struct npu2 *npu)
 	if (rc)
 		goto i2c_failed;
 
-	if (gpu0_present) {
+	if (occ_get_gpu_presence(chip, 0)) {
+		prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 slot present\n", chip->id);
 		if (state & (1 << 0)) {
 			prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is OpenCAPI\n",
 			      chip->id);
-			/*
-			 * On witherspoon, bricks 2 and 3 are connected to
-			 * the lanes matching links 1 and 0 in OpenCAPI mode.
-			 */
-			set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
-			/* We current don't support using the second link */
-			set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
+			gpu0_type = NPU2_DEV_TYPE_OPENCAPI;
 		} else {
 			prlog(PR_DEBUG, "PLAT: Chip %d GPU#0 is NVLink\n",
 			      chip->id);
-			set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
-			set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
-			set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
+			gpu0_type = NPU2_DEV_TYPE_NVLINK;
 		}
 	}
 
-	if (gpu1_present) {
+	if (occ_get_gpu_presence(chip, 1)) {
+		prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 slot present\n", chip->id);
 		if (state & (1 << 1)) {
 			prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is OpenCAPI\n",
 			      chip->id);
-			set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
-			/* We current don't support using the second link */
-			set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
+			gpu1_type = NPU2_DEV_TYPE_OPENCAPI;
 		} else {
 			prlog(PR_DEBUG, "PLAT: Chip %d GPU#1 is NVLink\n",
 			      chip->id);
+			gpu1_type = NPU2_DEV_TYPE_NVLINK;
+		}
+	}
+
+	if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
+		/*
+		 * On witherspoon, bricks 2 and 3 are connected to
+		 * the lanes matching links 1 and 0 in OpenCAPI mode.
+		 */
+		set_link_details(npu, 0, 3, NPU2_DEV_TYPE_OPENCAPI);
+		/* We currently don't support using the second link */
+		set_link_details(npu, 1, 2, NPU2_DEV_TYPE_UNKNOWN);
+	}
+
+	if (gpu0_type == NPU2_DEV_TYPE_NVLINK) {
+		set_link_details(npu, 0, 0, NPU2_DEV_TYPE_NVLINK);
+		set_link_details(npu, 1, 1, NPU2_DEV_TYPE_NVLINK);
+		set_link_details(npu, 2, 2, NPU2_DEV_TYPE_NVLINK);
+	}
+
+	if (gpu1_type == NPU2_DEV_TYPE_OPENCAPI) {
+		set_link_details(npu, 4, 4, NPU2_DEV_TYPE_OPENCAPI);
+		/* We currently don't support using the second link */
+		set_link_details(npu, 5, 5, NPU2_DEV_TYPE_UNKNOWN);
+	}
+
+	/*
+	 * If an OpenCAPI card is connected to GPU#0, and an NVLink GPU is
+	 * connected to GPU#1, the GPU will only receive 2 links rather than the
+	 * usual 3.
+	 *
+	 * The reason for this is that without the OpenCAPI card, the GPU would
+	 * be use links 3-5, connected to NPU bricks 3-5, which needs both
+	 * stacks 1 and 2 to be in NVLink mode.
+	 *
+	 * However, with an OpenCAPI card in the GPU#0 slot that uses links 0-1,
+	 * we need to use NPU bricks 2-3, which means stack 1 must be set in
+	 * OpenCAPI mode. As such, the GPU will be restricted to using links 4
+	 * and 5.
+	 */
+	if (gpu1_type == NPU2_DEV_TYPE_NVLINK) {
+		if (gpu0_type == NPU2_DEV_TYPE_OPENCAPI) {
+			prlog(PR_WARNING, "PLAT: Chip %d GPU#1 will operate at reduced performance due to presence of OpenCAPI device. For optimal performance, swap device locations\n", chip->id);
+		} else {
 			set_link_details(npu, 3, 3, NPU2_DEV_TYPE_NVLINK);
-			set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
-			set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
 		}
+		set_link_details(npu, 4, 4, NPU2_DEV_TYPE_NVLINK);
+		set_link_details(npu, 5, 5, NPU2_DEV_TYPE_NVLINK);
 	}
 
 	return;