Message ID | 20181113081602.116897-1-aik@ozlabs.ru |
---|---|
State | Superseded |
Headers | show |
Series | [v2] npu2: Add XTS_BDF_MAP wildcard refcount | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | master/apply_patch Successfully applied |
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot | success | Test snowpatch/job/snowpatch-skiboot on branch master |
On Tue, Nov 13, 2018 at 07:16:02PM +1100, Alexey Kardashevskiy wrote: >@@ -2097,6 +2096,11 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused, > } > > id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); >+ if (id >= NPU2_XTS_BDF_MAP_SIZE) { >+ NPU2ERR(p, "Out of bounds id=%d\n", id); >+ id = OPAL_INTERNAL_ERROR; >+ goto out; >+ } > NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf); > > /* Enable this mapping for both real and virtual addresses */ It doesn't hurt to check, I guess, but I'm not sure how/if it's possible for this to happen. >@@ -2129,14 +2133,20 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused, > GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) { > NPU2ERR(p, "%s: Unexpected MSR value\n", __func__); > id = OPAL_PARAMETER; >+ goto out; >+ } else if (!p->ctx_ref[id]) { >+ NPU2ERR(p, "%s: Unexpected mapping\n", __func__); >+ id = OPAL_INTERNAL_ERROR; >+ goto out; Same. > } >- >- goto out; > } > > /* Write the entry */ >- NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); >- npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); >+ if (!p->ctx_ref[id]) { >+ NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); >+ npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); >+ } >+ ++p->ctx_ref[id]; > > if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) { > xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1); >@@ -2155,7 +2165,7 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused, > struct phb *phb = pci_get_phb(phb_id); > struct npu2 *p; > uint64_t xts_bdf; >- int rc = 0; >+ int rc = 0, id; > > if (!phb || phb->phb_type != phb_type_npu_v2) > return OPAL_PARAMETER; >@@ -2172,10 +2182,24 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused, > } > > /* >- * The bdf/pid table only contains wildcard entries, so we don't >- * need to remove anything here. >+ * The bdf/pid table contains wildcard entries and MSR bits which >+ * we need to clear between switching a device from a host to a guest >+ * or vice versa. > */ >- >+ id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); >+ if (id >= NPU2_XTS_BDF_MAP_SIZE) { >+ NPU2ERR(p, "Out of bounds id=%d\n", id); >+ rc = OPAL_INTERNAL_ERROR; Same. >+ } else { >+ --p->ctx_ref[id]; >+ rc = p->ctx_ref[id]; /* For debug */ >+ if (p->ctx_ref[id] < 0) { >+ rc = OPAL_INTERNAL_ERROR; /* Sanity check */ If somebody (for whatever reason) calls opal_npu_destroy_context() repeatedly for an already-destroyed ID, you don't want the refcount to go more and more negative. I think instead of checking if it is negative afterwards, maybe check to see if it is zero before, and return OPAL_PARAMETER. >+ } else if (!p->ctx_ref[id]) { >+ NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id); >+ npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0); >+ } >+ } > unlock(&p->lock); > return rc; > }
On 21/11/2018 07:18, Reza Arbab wrote: > On Tue, Nov 13, 2018 at 07:16:02PM +1100, Alexey Kardashevskiy wrote: >> @@ -2097,6 +2096,11 @@ static int64_t opal_npu_init_context(uint64_t >> phb_id, int pasid __unused, >> } >> >> id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); >> + if (id >= NPU2_XTS_BDF_MAP_SIZE) { >> + NPU2ERR(p, "Out of bounds id=%d\n", id); >> + id = OPAL_INTERNAL_ERROR; >> + goto out; >> + } >> NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf); >> >> /* Enable this mapping for both real and virtual addresses */ > > It doesn't hurt to check, I guess, but I'm not sure how/if it's possible > for this to happen. Heh. True. Guaranteed to succeed here. Although it would be immediately clear if NPU2_XTS_BDF_MAP_SIZE was defined via NPU2_XTS_BDF_MAP_LPARSHORT - 16 vs. 4 bits is pretty bulletproof. > >> @@ -2129,14 +2133,20 @@ static int64_t opal_npu_init_context(uint64_t >> phb_id, int pasid __unused, >> GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) { >> NPU2ERR(p, "%s: Unexpected MSR value\n", __func__); >> id = OPAL_PARAMETER; >> + goto out; >> + } else if (!p->ctx_ref[id]) { >> + NPU2ERR(p, "%s: Unexpected mapping\n", __func__); >> + id = OPAL_INTERNAL_ERROR; >> + goto out; > > Same. Not quite the same though. A counter vs. a register value. I'd rather keep this check. > >> } >> - >> - goto out; >> } >> >> /* Write the entry */ >> - NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); >> - npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); >> + if (!p->ctx_ref[id]) { >> + NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); >> + npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); >> + } >> + ++p->ctx_ref[id]; >> >> if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) { >> xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1); >> @@ -2155,7 +2165,7 @@ static int opal_npu_destroy_context(uint64_t >> phb_id, uint64_t pid __unused, >> struct phb *phb = pci_get_phb(phb_id); >> struct npu2 *p; >> uint64_t xts_bdf; >> - int rc = 0; >> + int rc = 0, id; >> >> if (!phb || phb->phb_type != phb_type_npu_v2) >> return OPAL_PARAMETER; >> @@ -2172,10 +2182,24 @@ static int opal_npu_destroy_context(uint64_t >> phb_id, uint64_t pid __unused, >> } >> >> /* >> - * The bdf/pid table only contains wildcard entries, so we don't >> - * need to remove anything here. >> + * The bdf/pid table contains wildcard entries and MSR bits which >> + * we need to clear between switching a device from a host to a >> guest >> + * or vice versa. >> */ >> - >> + id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); >> + if (id >= NPU2_XTS_BDF_MAP_SIZE) { >> + NPU2ERR(p, "Out of bounds id=%d\n", id); >> + rc = OPAL_INTERNAL_ERROR; > > Same. Yeah, I'll ditch this one. > >> + } else { >> + --p->ctx_ref[id]; >> + rc = p->ctx_ref[id]; /* For debug */ >> + if (p->ctx_ref[id] < 0) { >> + rc = OPAL_INTERNAL_ERROR; /* Sanity check */ > > If somebody (for whatever reason) calls opal_npu_destroy_context() > repeatedly for an already-destroyed ID, you don't want the refcount to > go more and more negative. > > I think instead of checking if it is negative afterwards, maybe check to > see if it is zero before, and return OPAL_PARAMETER. True. Thanks for the review. Now please review the kernel patchset. Thanks :) > >> + } else if (!p->ctx_ref[id]) { >> + NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id); >> + npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0); >> + } >> + } >> unlock(&p->lock); >> return rc; >> } > >
diff --git a/include/npu2.h b/include/npu2.h index 1de963d..d05c001 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -19,6 +19,7 @@ #include <pci.h> #include <phys-map.h> +#include <npu2-regs.h> /* Debugging options */ #define NPU2DBG(p, fmt, a...) prlog(PR_DEBUG, "NPU%d: " fmt, \ @@ -162,6 +163,7 @@ struct npu2 { uint32_t total_devices; struct npu2_dev *devices; enum phys_map_type gpu_map_type; + int ctx_ref[NPU2_XTS_BDF_MAP_SIZE]; /* IODA cache */ uint64_t lxive_cache[8]; diff --git a/hw/npu2.c b/hw/npu2.c index ba1264b..80043e3 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -28,7 +28,6 @@ #include <ccan/str/str.h> #include <ccan/array_size/array_size.h> #include <affinity.h> -#include <npu2-regs.h> #include <npu2.h> #include <lock.h> #include <xscom.h> @@ -2097,6 +2096,11 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused, } id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); + if (id >= NPU2_XTS_BDF_MAP_SIZE) { + NPU2ERR(p, "Out of bounds id=%d\n", id); + id = OPAL_INTERNAL_ERROR; + goto out; + } NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf); /* Enable this mapping for both real and virtual addresses */ @@ -2129,14 +2133,20 @@ static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused, GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) { NPU2ERR(p, "%s: Unexpected MSR value\n", __func__); id = OPAL_PARAMETER; + goto out; + } else if (!p->ctx_ref[id]) { + NPU2ERR(p, "%s: Unexpected mapping\n", __func__); + id = OPAL_INTERNAL_ERROR; + goto out; } - - goto out; } /* Write the entry */ - NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); - npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); + if (!p->ctx_ref[id]) { + NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid); + npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid); + } + ++p->ctx_ref[id]; if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) { xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1); @@ -2155,7 +2165,7 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused, struct phb *phb = pci_get_phb(phb_id); struct npu2 *p; uint64_t xts_bdf; - int rc = 0; + int rc = 0, id; if (!phb || phb->phb_type != phb_type_npu_v2) return OPAL_PARAMETER; @@ -2172,10 +2182,24 @@ static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused, } /* - * The bdf/pid table only contains wildcard entries, so we don't - * need to remove anything here. + * The bdf/pid table contains wildcard entries and MSR bits which + * we need to clear between switching a device from a host to a guest + * or vice versa. */ - + id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf); + if (id >= NPU2_XTS_BDF_MAP_SIZE) { + NPU2ERR(p, "Out of bounds id=%d\n", id); + rc = OPAL_INTERNAL_ERROR; + } else { + --p->ctx_ref[id]; + rc = p->ctx_ref[id]; /* For debug */ + if (p->ctx_ref[id] < 0) { + rc = OPAL_INTERNAL_ERROR; /* Sanity check */ + } else if (!p->ctx_ref[id]) { + NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n", id); + npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0); + } + } unlock(&p->lock); return rc; }
Currently PID wildcard is programmed into the NPU once and never cleared up. This works for the bare metal as MSR does not change while the host OS is running. However with the device virtualization, we need to keep track of wildcard entries use and clear them up before switching a GPU from a host to a guest or vice versa. This adds refcount to a NPU2, one counter per wildcard entry. The index is a short lparid which is allocated in opal_npu_map_lpar() and should be smaller than NPU2_XTS_BDF_MAP_SIZE. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- Changes: v2: * fixed error handling opal_npu_init_context when checking xts_bdf_pid --- include/npu2.h | 2 ++ hw/npu2.c | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 35 insertions(+), 9 deletions(-)