[v2] npu2: Add XTS_BDF_MAP wildcard refcount

Message ID 20181113081602.116897-1-aik@ozlabs.ru
State New
Headers show
Series
  • [v2] npu2: Add XTS_BDF_MAP wildcard refcount
Related show

Checks

Context Check Description
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied

Commit Message

Alexey Kardashevskiy Nov. 13, 2018, 8:16 a.m.
Currently PID wildcard is programmed into the NPU once and never cleared
up. This works for the bare metal as MSR does not change while the host
OS is running.

However with the device virtualization, we need to keep track of wildcard
entries use and clear them up before switching a GPU from a host to
a guest or vice versa.

This adds refcount to a NPU2, one counter per wildcard entry. The index
is a short lparid which is allocated in opal_npu_map_lpar() and should be
smaller than NPU2_XTS_BDF_MAP_SIZE.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Changes:
v2:
* fixed error handling opal_npu_init_context when checking xts_bdf_pid
---
 include/npu2.h |  2 ++
 hw/npu2.c      | 42 +++++++++++++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 9 deletions(-)

Comments

Reza Arbab Nov. 20, 2018, 8:18 p.m. | #1
On Tue, Nov 13, 2018 at 07:16:02PM +1100, Alexey Kardashevskiy wrote: 
>@@ -2097,6 +2096,11 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
> 	}
>
> 	id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
>+	if (id >= NPU2_XTS_BDF_MAP_SIZE) {
>+		NPU2ERR(p, "Out of bounds id=%d\n", id);
>+		id = OPAL_INTERNAL_ERROR;
>+		goto out;
>+	}
> 	NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf);
>
> 	/* Enable this mapping for both real and virtual addresses */

It doesn't hurt to check, I guess, but I'm not sure how/if it's possible 
for this to happen.

>@@ -2129,14 +2133,20 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
> 		    GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) {
> 			NPU2ERR(p, "%s: Unexpected MSR value\n", __func__);
> 			id = OPAL_PARAMETER;
>+			goto out;
>+		} else if (!p->ctx_ref[id]) {
>+			NPU2ERR(p, "%s: Unexpected mapping\n", __func__);
>+			id = OPAL_INTERNAL_ERROR;
>+			goto out;

Same.

> 		}
>-
>-		goto out;
> 	}
>
> 	/* Write the entry */
>-	NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
>-	npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
>+	if (!p->ctx_ref[id]) {
>+		NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
>+		npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
>+	}
>+	++p->ctx_ref[id];
>
> 	if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) {
> 		xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1);
>@@ -2155,7 +2165,7 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
> 	struct phb *phb = pci_get_phb(phb_id);
> 	struct npu2 *p;
> 	uint64_t xts_bdf;
>-	int rc = 0;
>+	int rc = 0, id;
>
> 	if (!phb || phb->phb_type != phb_type_npu_v2)
> 		return OPAL_PARAMETER;
>@@ -2172,10 +2182,24 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
> 	}
>
> 	/*
>-	 * The bdf/pid table only contains wildcard entries, so we don't
>-	 * need to remove anything here.
>+	 * The bdf/pid table contains wildcard entries and MSR bits which
>+	 * we need to clear between switching a device from a host to a guest
>+	 * or vice versa.
> 	 */
>-
>+	id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
>+	if (id >= NPU2_XTS_BDF_MAP_SIZE) {
>+		NPU2ERR(p, "Out of bounds id=%d\n", id);
>+		rc = OPAL_INTERNAL_ERROR;

Same.

>+	} else {
>+		--p->ctx_ref[id];
>+		rc = p->ctx_ref[id]; /* For debug */
>+		if (p->ctx_ref[id] < 0) {
>+			rc = OPAL_INTERNAL_ERROR; /* Sanity check */

If somebody (for whatever reason) calls opal_npu_destroy_context() 
repeatedly for an already-destroyed ID, you don't want the refcount to 
go more and more negative.

I think instead of checking if it is negative afterwards, maybe check to 
see if it is zero before, and return OPAL_PARAMETER.

>+		} else if (!p->ctx_ref[id]) {
>+			NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id);
>+			npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
>+		}
>+	}
> 	unlock(&p->lock);
> 	return rc;
> }
Alexey Kardashevskiy Nov. 29, 2018, 3:26 a.m. | #2
On 21/11/2018 07:18, Reza Arbab wrote:
> On Tue, Nov 13, 2018 at 07:16:02PM +1100, Alexey Kardashevskiy wrote:
>> @@ -2097,6 +2096,11 @@ static int64_t opal_npu_init_context(uint64_t
>> phb_id, int pasid __unused,
>>     }
>>
>>     id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
>> +    if (id >= NPU2_XTS_BDF_MAP_SIZE) {
>> +        NPU2ERR(p, "Out of bounds id=%d\n", id);
>> +        id = OPAL_INTERNAL_ERROR;
>> +        goto out;
>> +    }
>>     NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf);
>>
>>     /* Enable this mapping for both real and virtual addresses */
> 
> It doesn't hurt to check, I guess, but I'm not sure how/if it's possible
> for this to happen.


Heh. True. Guaranteed to succeed here. Although it would be immediately
clear if NPU2_XTS_BDF_MAP_SIZE was defined via
NPU2_XTS_BDF_MAP_LPARSHORT - 16 vs. 4 bits is pretty bulletproof.


> 
>> @@ -2129,14 +2133,20 @@ static int64_t opal_npu_init_context(uint64_t
>> phb_id, int pasid __unused,
>>             GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) {
>>             NPU2ERR(p, "%s: Unexpected MSR value\n", __func__);
>>             id = OPAL_PARAMETER;
>> +            goto out;
>> +        } else if (!p->ctx_ref[id]) {
>> +            NPU2ERR(p, "%s: Unexpected mapping\n", __func__);
>> +            id = OPAL_INTERNAL_ERROR;
>> +            goto out;
> 
> Same.


Not quite the same though. A counter vs. a register value. I'd rather
keep this check.


> 
>>         }
>> -
>> -        goto out;
>>     }
>>
>>     /* Write the entry */
>> -    NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
>> -    npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
>> +    if (!p->ctx_ref[id]) {
>> +        NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
>> +        npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
>> +    }
>> +    ++p->ctx_ref[id];
>>
>>     if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) {
>>         xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1);
>> @@ -2155,7 +2165,7 @@ static int opal_npu_destroy_context(uint64_t
>> phb_id, uint64_t pid __unused,
>>     struct phb *phb = pci_get_phb(phb_id);
>>     struct npu2 *p;
>>     uint64_t xts_bdf;
>> -    int rc = 0;
>> +    int rc = 0, id;
>>
>>     if (!phb || phb->phb_type != phb_type_npu_v2)
>>         return OPAL_PARAMETER;
>> @@ -2172,10 +2182,24 @@ static int opal_npu_destroy_context(uint64_t
>> phb_id, uint64_t pid __unused,
>>     }
>>
>>     /*
>> -     * The bdf/pid table only contains wildcard entries, so we don't
>> -     * need to remove anything here.
>> +     * The bdf/pid table contains wildcard entries and MSR bits which
>> +     * we need to clear between switching a device from a host to a
>> guest
>> +     * or vice versa.
>>      */
>> -
>> +    id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
>> +    if (id >= NPU2_XTS_BDF_MAP_SIZE) {
>> +        NPU2ERR(p, "Out of bounds id=%d\n", id);
>> +        rc = OPAL_INTERNAL_ERROR;
> 
> Same.

Yeah, I'll ditch this one.


> 
>> +    } else {
>> +        --p->ctx_ref[id];
>> +        rc = p->ctx_ref[id]; /* For debug */
>> +        if (p->ctx_ref[id] < 0) {
>> +            rc = OPAL_INTERNAL_ERROR; /* Sanity check */
> 
> If somebody (for whatever reason) calls opal_npu_destroy_context()
> repeatedly for an already-destroyed ID, you don't want the refcount to
> go more and more negative.
> 
> I think instead of checking if it is negative afterwards, maybe check to
> see if it is zero before, and return OPAL_PARAMETER.

True. Thanks for the review. Now please review the kernel patchset.
Thanks :)

> 
>> +        } else if (!p->ctx_ref[id]) {
>> +            NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id);
>> +            npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
>> +        }
>> +    }
>>     unlock(&p->lock);
>>     return rc;
>> }
> 
>

Patch

diff --git a/include/npu2.h b/include/npu2.h
index 1de963d..d05c001 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -19,6 +19,7 @@ 
 
 #include <pci.h>
 #include <phys-map.h>
+#include <npu2-regs.h>
 
 /* Debugging options */
 #define NPU2DBG(p, fmt, a...)	prlog(PR_DEBUG, "NPU%d: " fmt, \
@@ -162,6 +163,7 @@  struct npu2 {
 	uint32_t	total_devices;
 	struct npu2_dev	*devices;
 	enum phys_map_type gpu_map_type;
+	int		ctx_ref[NPU2_XTS_BDF_MAP_SIZE];
 
 	/* IODA cache */
 	uint64_t	lxive_cache[8];
diff --git a/hw/npu2.c b/hw/npu2.c
index ba1264b..80043e3 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -28,7 +28,6 @@ 
 #include <ccan/str/str.h>
 #include <ccan/array_size/array_size.h>
 #include <affinity.h>
-#include <npu2-regs.h>
 #include <npu2.h>
 #include <lock.h>
 #include <xscom.h>
@@ -2097,6 +2096,11 @@  static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
 	}
 
 	id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
+	if (id >= NPU2_XTS_BDF_MAP_SIZE) {
+		NPU2ERR(p, "Out of bounds id=%d\n", id);
+		id = OPAL_INTERNAL_ERROR;
+		goto out;
+	}
 	NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf);
 
 	/* Enable this mapping for both real and virtual addresses */
@@ -2129,14 +2133,20 @@  static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
 		    GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) {
 			NPU2ERR(p, "%s: Unexpected MSR value\n", __func__);
 			id = OPAL_PARAMETER;
+			goto out;
+		} else if (!p->ctx_ref[id]) {
+			NPU2ERR(p, "%s: Unexpected mapping\n", __func__);
+			id = OPAL_INTERNAL_ERROR;
+			goto out;
 		}
-
-		goto out;
 	}
 
 	/* Write the entry */
-	NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
-	npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
+	if (!p->ctx_ref[id]) {
+		NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
+		npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
+	}
+	++p->ctx_ref[id];
 
 	if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) {
 		xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1);
@@ -2155,7 +2165,7 @@  static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
 	struct phb *phb = pci_get_phb(phb_id);
 	struct npu2 *p;
 	uint64_t xts_bdf;
-	int rc = 0;
+	int rc = 0, id;
 
 	if (!phb || phb->phb_type != phb_type_npu_v2)
 		return OPAL_PARAMETER;
@@ -2172,10 +2182,24 @@  static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
 	}
 
 	/*
-	 * The bdf/pid table only contains wildcard entries, so we don't
-	 * need to remove anything here.
+	 * The bdf/pid table contains wildcard entries and MSR bits which
+	 * we need to clear between switching a device from a host to a guest
+	 * or vice versa.
 	 */
-
+	id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
+	if (id >= NPU2_XTS_BDF_MAP_SIZE) {
+		NPU2ERR(p, "Out of bounds id=%d\n", id);
+		rc = OPAL_INTERNAL_ERROR;
+	} else {
+		--p->ctx_ref[id];
+		rc = p->ctx_ref[id]; /* For debug */
+		if (p->ctx_ref[id] < 0) {
+			rc = OPAL_INTERNAL_ERROR; /* Sanity check */
+		} else if (!p->ctx_ref[id]) {
+			NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id);
+			npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
+		}
+	}
 	unlock(&p->lock);
 	return rc;
 }