Message ID | 1c59b92a2dbb5394a6bf8598263e586fd8271fbb.1513579137.git-series.andrew.donnellan@au1.ibm.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Initial OpenCAPI 3.0 Support for P9 | expand |
Le 18/12/2017 à 08:07, Andrew Donnellan a écrit : > From: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > > Add three OPAL API calls that are required by the ocxl driver. > > - OPAL_NPU_SPA_SETUP > > The Shared Process Area (SPA) is a table containing one entry (a > "Process Element") per memory context which can be accessed by the > OpenCAPI device. > > - OPAL_NPU_SPA_CLEAR_CACHE > > The NPU keeps a cache of recently accessed memory contexts. When a > Process Element is removed from the SPA, the cache for the link must be > cleared. > > - OPAL_NPU_TL_SET > > The Transaction Layer specification defines several templates for > messages to be exchanged on the link. During link setup, the host and > device must negotiate what templates are supported on both sides and at > what rates those messages can be sent. > > Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > --- It still looks mostly ok to me. Fred > doc/opal-api/opal-npu2-opencapi-159-160-161.rst | 126 ++++++++++- > hw/npu2-opencapi.c | 206 +++++++++++++++++- > include/npu2-regs.h | 4 +- > include/opal-api.h | 5 +- > 4 files changed, 340 insertions(+), 1 deletion(-) > create mode 100644 doc/opal-api/opal-npu2-opencapi-159-160-161.rst > > diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst > new file mode 100644 > index 0000000..4db3d3e > --- /dev/null > +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst > @@ -0,0 +1,126 @@ > +.. _OPAL_NPU_SPA_SETUP: > + > +OPAL_NPU_SPA_SETUP > +================== > + > +OpenCAPI devices only. > + > +Sets up a Shared Process Area (SPA) with the Shared Process Area > +Pointer (SPAP) set to the provided address `addr`, and sets the OTL PE > +mask (used for PASID to PE handle conversion) to `PE_mask`. > + > +If `addr` is NULL, the SPA will be disabled. `addr` must be 4K aligned. > + > +Parameters > +---------- > +:: > + > + uint64_t phb_id > + int bdfn > + uint64_t addr > + uint64_t PE_mask > + > +``phb_id`` > + OPAL ID of PHB > + > +``bdfn`` > + Bus-Device-Function number of OpenCAPI AFU > + > +``addr`` > + Address of Shared Process Area, or NULL to disable SPA. Must be 4K aligned. > + > +``PE_mask`` > + Process Element mask for PASID to PE handle conversion > + > +Return Values > +------------- > + > +OPAL_SUCCESS > + SPAP and PE mask were successfully set > + > +OPAL_PARAMETER > + A provided parameter was invalid > + > +OPAL_BUSY > + SPA is already enabled (or if addr is NULL, SPA is already disabled) > + > +.. _OPAL_NPU_SPA_CLEAR_CACHE: > + > +OPAL_NPU_SPA_CLEAR_CACHE > +======================== > + > +OpenCAPI devices only. > + > +Invalidates the Process Element with the given `PE_handle` from the NPU's SPA cache. > + > +Parameters > +---------- > +:: > + > + uint64_t phb_id > + uint32_t bdfn > + uint64_t PE_handle > + > +``phb_id`` > + OPAL ID of PHB > + > +``bdfn`` > + Bus-Device-Function number of OpenCAPI AFU > + > +``PE_handle`` > + Handle of Process Element being cleared from SPA cache > + > +Return Values > +------------- > + > +OPAL_SUCCESS > + PE was successfully cleared from SPA cache > + > +OPAL_PARAMETER > + A provided parameter was invalid > + > +OPAL_BUSY > + XSLO is currently invalidating a previously requested entry > + > +.. _OPAL_NPU_TL_SET: > + > +OPAL_NPU_TL_SET > +=============== > + > +OpenCAPI devices only. > + > +Update the NPU OTL configuration with device capabilities. > + > +Parameters > +---------- > +:: > + > + uint64_t phb_id > + uint32_t bdfn > + long capabilities > + uint64_t rate_phys > + int rate_sz > + > +``phb_id`` > + OPAL ID of PHB > + > +``bdfn`` > + Bus-Device-Function number of OpenCAPI AFU > + > +``capabilities`` > + Bitmap of TL templates the device can receive > + > +``rate_phys`` > + Physical address of rates buffer > + > +``rate_sz`` > + Size of rates buffer (must be equal to 32) > + > +Return Values > +------------- > + > +OPAL_SUCCESS > + OTL configuration was successfully updated > + > +OPAL_PARAMETER > + A provided parameter was invalid > diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c > index ea55dad..196b7f7 100644 > --- a/hw/npu2-opencapi.c > +++ b/hw/npu2-opencapi.c > @@ -54,6 +54,9 @@ > > #define NPU_IRQ_LEVELS 35 > #define NPU_IRQ_LEVELS_XSL 23 > +#define MAX_PE_HANDLE ((1 << 15) - 1) > +#define TL_MAX_TEMPLATE 63 > +#define TL_RATE_BUF_SIZE 32 > > static const struct phb_ops npu2_opencapi_ops; > > @@ -1377,3 +1380,206 @@ static const struct phb_ops npu2_opencapi_ops = { > .set_capp_recovery = NULL, > .tce_kill = NULL, > }; > + > +static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn, > + uint64_t addr, uint64_t PE_mask) > +{ > + uint64_t stack, block, offset, reg; > + struct phb *phb = pci_get_phb(phb_id); > + struct npu2_dev *dev; > + int rc; > + > + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) > + return OPAL_PARAMETER; > + > + /* 4k aligned */ > + if (addr & 0xFFF) > + return OPAL_PARAMETER; > + > + if (PE_mask > 15) > + return OPAL_PARAMETER; > + > + dev = phb_to_npu2_dev_ocapi(phb); > + if (!dev) > + return OPAL_PARAMETER; > + > + block = index_to_block(dev->index); > + stack = index_to_stack(dev->index); > + if (block == NPU2_BLOCK_OTL1) > + offset = NPU2_XSL_PSL_SPAP_A1; > + else > + offset = NPU2_XSL_PSL_SPAP_A0; > + > + > + lock(&dev->npu->lock); > + /* > + * set the SPAP used by the device > + */ > + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), > + NPU2_MISC_DA_LEN_8B); > + if ((addr && (reg & NPU2_XSL_PSL_SPAP_EN)) || > + (!addr && !(reg & NPU2_XSL_PSL_SPAP_EN))) { > + rc = OPAL_BUSY; > + goto out; > + } > + /* SPA is disabled by passing a NULL address */ > + reg = addr; > + if (addr) > + reg = addr | NPU2_XSL_PSL_SPAP_EN; > + > + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* > + * set the PE mask that the OS uses for PASID -> PE handle > + * conversion > + */ > + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B); > + reg &= ~NPU2_OTL_CONFIG0_PE_MASK; > + reg |= (PE_mask << (63-7)); > + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B, > + reg); > + rc = OPAL_SUCCESS; > +out: > + unlock(&dev->npu->lock); > + return rc; > +} > +opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4); > + > +static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn, > + uint64_t PE_handle) > +{ > + uint64_t cc_inv, stack, block, reg, rc; > + uint32_t retries = 5; > + struct phb *phb = pci_get_phb(phb_id); > + struct npu2_dev *dev; > + > + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) > + return OPAL_PARAMETER; > + > + if (PE_handle > MAX_PE_HANDLE) > + return OPAL_PARAMETER; > + > + dev = phb_to_npu2_dev_ocapi(phb); > + if (!dev) > + return OPAL_PARAMETER; > + > + block = index_to_block(dev->index); > + stack = index_to_stack(dev->index); > + cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0); > + > + lock(&dev->npu->lock); > + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, > + NPU2_MISC_DA_LEN_8B); > + if (reg & PPC_BIT(16)) { > + rc = OPAL_BUSY; > + goto out; > + } > + > + reg = PE_handle | PPC_BIT(15); > + if (block == NPU2_BLOCK_OTL1) > + reg |= PPC_BIT(48); > + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, > + NPU2_MISC_DA_LEN_8B, reg); > + > + rc = OPAL_HARDWARE; > + while (retries--) { > + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, > + cc_inv, NPU2_MISC_DA_LEN_8B); > + if (!(reg & PPC_BIT(16))) { > + rc = OPAL_SUCCESS; > + break; > + } > + /* the bit expected to flip in less than 200us */ > + time_wait_us(200); > + } > +out: > + unlock(&dev->npu->lock); > + return rc; > +} > +opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3); > + > +static int get_template_rate(unsigned int templ, char *rate_buf) > +{ > + int shift, idx, val; > + > + /* > + * Each rate is encoded over 4 bits (0->15), with 15 being the > + * slowest. The buffer is a succession of rates for all the > + * templates. The first 4 bits are for template 63, followed > + * by 4 bits for template 62, ... etc. So the rate for > + * template 0 is at the very end of the buffer. > + */ > + idx = (TL_MAX_TEMPLATE - templ) / 2; > + shift = 4 * (1 - ((TL_MAX_TEMPLATE - templ) % 2)); > + val = rate_buf[idx] >> shift; > + return val; > +} > + > +static bool is_template_supported(unsigned int templ, long capabilities) > +{ > + return !!(capabilities & (1ull << templ)); > +} > + > +static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, > + long capabilities, uint64_t rate_phys, int rate_sz) > +{ > + struct phb *phb = pci_get_phb(phb_id); > + struct npu2_dev *dev; > + uint64_t stack, block, reg, templ_rate; > + int i, rate_pos; > + char *rate = (char *) rate_phys; > + > + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) > + return OPAL_PARAMETER; > + if (!opal_addr_valid(rate) || rate_sz != TL_RATE_BUF_SIZE) > + return OPAL_PARAMETER; > + > + dev = phb_to_npu2_dev_ocapi(phb); > + if (!dev) > + return OPAL_PARAMETER; > + > + block = index_to_block(dev->index); > + stack = index_to_stack(dev->index); > + /* > + * The 'capabilities' argument defines what TL template the > + * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so > + * that's one bit per template. > + * > + * For each template, the device processing time may vary, so > + * the device advertises at what rate a message of a given > + * template can be sent. That's encoded in the 'rate' buffer. > + * > + * On P9, NPU only knows about TL templates 0 -> 3. > + * Per the spec, template 0 must be supported. > + */ > + if (!is_template_supported(0, capabilities)) > + return OPAL_PARAMETER; > + > + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_OTL_CONFIG1(stack, block), > + NPU2_MISC_DA_LEN_8B); > + reg &= ~(NPU2_OTL_CONFIG1_TX_TEMP1_EN | NPU2_OTL_CONFIG1_TX_TEMP3_EN | > + NPU2_OTL_CONFIG1_TX_TEMP1_EN); > + for (i = 0; i < 4; i++) { > + /* Skip template 0 as it is implicitly enabled */ > + if (i && is_template_supported(i, capabilities)) > + reg |= PPC_BIT(i); > + /* The tx rate should still be set for template 0 */ > + templ_rate = get_template_rate(i, rate); > + rate_pos = 8 + i * 4; > + reg = SETFIELD(PPC_BITMASK(rate_pos, rate_pos + 3), reg, > + templ_rate); > + } > + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, > + NPU2_OTL_CONFIG1(stack, block), NPU2_MISC_DA_LEN_8B, > + reg); > + prlog(PR_DEBUG, "OCAPI: Link %llx:%x, TL conf1 register set to %llx\n", > + phb_id, bdfn, reg); > + return OPAL_SUCCESS; > +} > +opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5); > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > index 876d7f0..5d99a3e 100644 > --- a/include/npu2-regs.h > +++ b/include/npu2-regs.h > @@ -321,6 +321,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C) > > /* OpenCAPI - XSL registers */ > +#define NPU2_XSL_PSL_LLCMD_A0 0x008 > #define NPU2_XSL_PSL_SCNTL_A0 0x010 > #define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0) > #define NPU2_XSL_DEF 0x040 > @@ -328,6 +329,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16) > #define NPU2_XSL_WRAP_CFG 0x0C0 > #define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0) > +#define NPU2_XSL_PSL_SPAP_A0 0 > +#define NPU2_XSL_PSL_SPAP_A1 0x18 > +#define NPU2_XSL_PSL_SPAP_EN PPC_BIT(63) > > /* OpenCAPI - OTL registers */ > #define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) > diff --git a/include/opal-api.h b/include/opal-api.h > index 1c1b9cc..05d6f4c 100644 > --- a/include/opal-api.h > +++ b/include/opal-api.h > @@ -215,7 +215,10 @@ > #define OPAL_SENSOR_GROUP_CLEAR 156 > #define OPAL_PCI_SET_P2P 157 > #define OPAL_QUIESCE 158 > -#define OPAL_LAST 158 > +#define OPAL_NPU_SPA_SETUP 159 > +#define OPAL_NPU_SPA_CLEAR_CACHE 160 > +#define OPAL_NPU_TL_SET 161 > +#define OPAL_LAST 161 > > #define QUIESCE_HOLD 1 /* Spin all calls at entry */ > #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ >
diff --git a/doc/opal-api/opal-npu2-opencapi-159-160-161.rst b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst new file mode 100644 index 0000000..4db3d3e --- /dev/null +++ b/doc/opal-api/opal-npu2-opencapi-159-160-161.rst @@ -0,0 +1,126 @@ +.. _OPAL_NPU_SPA_SETUP: + +OPAL_NPU_SPA_SETUP +================== + +OpenCAPI devices only. + +Sets up a Shared Process Area (SPA) with the Shared Process Area +Pointer (SPAP) set to the provided address `addr`, and sets the OTL PE +mask (used for PASID to PE handle conversion) to `PE_mask`. + +If `addr` is NULL, the SPA will be disabled. `addr` must be 4K aligned. + +Parameters +---------- +:: + + uint64_t phb_id + int bdfn + uint64_t addr + uint64_t PE_mask + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``addr`` + Address of Shared Process Area, or NULL to disable SPA. Must be 4K aligned. + +``PE_mask`` + Process Element mask for PASID to PE handle conversion + +Return Values +------------- + +OPAL_SUCCESS + SPAP and PE mask were successfully set + +OPAL_PARAMETER + A provided parameter was invalid + +OPAL_BUSY + SPA is already enabled (or if addr is NULL, SPA is already disabled) + +.. _OPAL_NPU_SPA_CLEAR_CACHE: + +OPAL_NPU_SPA_CLEAR_CACHE +======================== + +OpenCAPI devices only. + +Invalidates the Process Element with the given `PE_handle` from the NPU's SPA cache. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + uint64_t PE_handle + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``PE_handle`` + Handle of Process Element being cleared from SPA cache + +Return Values +------------- + +OPAL_SUCCESS + PE was successfully cleared from SPA cache + +OPAL_PARAMETER + A provided parameter was invalid + +OPAL_BUSY + XSLO is currently invalidating a previously requested entry + +.. _OPAL_NPU_TL_SET: + +OPAL_NPU_TL_SET +=============== + +OpenCAPI devices only. + +Update the NPU OTL configuration with device capabilities. + +Parameters +---------- +:: + + uint64_t phb_id + uint32_t bdfn + long capabilities + uint64_t rate_phys + int rate_sz + +``phb_id`` + OPAL ID of PHB + +``bdfn`` + Bus-Device-Function number of OpenCAPI AFU + +``capabilities`` + Bitmap of TL templates the device can receive + +``rate_phys`` + Physical address of rates buffer + +``rate_sz`` + Size of rates buffer (must be equal to 32) + +Return Values +------------- + +OPAL_SUCCESS + OTL configuration was successfully updated + +OPAL_PARAMETER + A provided parameter was invalid diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index ea55dad..196b7f7 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -54,6 +54,9 @@ #define NPU_IRQ_LEVELS 35 #define NPU_IRQ_LEVELS_XSL 23 +#define MAX_PE_HANDLE ((1 << 15) - 1) +#define TL_MAX_TEMPLATE 63 +#define TL_RATE_BUF_SIZE 32 static const struct phb_ops npu2_opencapi_ops; @@ -1377,3 +1380,206 @@ static const struct phb_ops npu2_opencapi_ops = { .set_capp_recovery = NULL, .tce_kill = NULL, }; + +static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t __unused bdfn, + uint64_t addr, uint64_t PE_mask) +{ + uint64_t stack, block, offset, reg; + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + int rc; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + /* 4k aligned */ + if (addr & 0xFFF) + return OPAL_PARAMETER; + + if (PE_mask > 15) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + if (block == NPU2_BLOCK_OTL1) + offset = NPU2_XSL_PSL_SPAP_A1; + else + offset = NPU2_XSL_PSL_SPAP_A0; + + + lock(&dev->npu->lock); + /* + * set the SPAP used by the device + */ + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), + NPU2_MISC_DA_LEN_8B); + if ((addr && (reg & NPU2_XSL_PSL_SPAP_EN)) || + (!addr && !(reg & NPU2_XSL_PSL_SPAP_EN))) { + rc = OPAL_BUSY; + goto out; + } + /* SPA is disabled by passing a NULL address */ + reg = addr; + if (addr) + reg = addr | NPU2_XSL_PSL_SPAP_EN; + + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, offset), + NPU2_MISC_DA_LEN_8B, reg); + + /* + * set the PE mask that the OS uses for PASID -> PE handle + * conversion + */ + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B); + reg &= ~NPU2_OTL_CONFIG0_PE_MASK; + reg |= (PE_mask << (63-7)); + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG0(stack, block), NPU2_MISC_DA_LEN_8B, + reg); + rc = OPAL_SUCCESS; +out: + unlock(&dev->npu->lock); + return rc; +} +opal_call(OPAL_NPU_SPA_SETUP, opal_npu_spa_setup, 4); + +static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t __unused bdfn, + uint64_t PE_handle) +{ + uint64_t cc_inv, stack, block, reg, rc; + uint32_t retries = 5; + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + + if (PE_handle > MAX_PE_HANDLE) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + cc_inv = NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_PSL_LLCMD_A0); + + lock(&dev->npu->lock); + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, + NPU2_MISC_DA_LEN_8B); + if (reg & PPC_BIT(16)) { + rc = OPAL_BUSY; + goto out; + } + + reg = PE_handle | PPC_BIT(15); + if (block == NPU2_BLOCK_OTL1) + reg |= PPC_BIT(48); + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, cc_inv, + NPU2_MISC_DA_LEN_8B, reg); + + rc = OPAL_HARDWARE; + while (retries--) { + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + cc_inv, NPU2_MISC_DA_LEN_8B); + if (!(reg & PPC_BIT(16))) { + rc = OPAL_SUCCESS; + break; + } + /* the bit expected to flip in less than 200us */ + time_wait_us(200); + } +out: + unlock(&dev->npu->lock); + return rc; +} +opal_call(OPAL_NPU_SPA_CLEAR_CACHE, opal_npu_spa_clear_cache, 3); + +static int get_template_rate(unsigned int templ, char *rate_buf) +{ + int shift, idx, val; + + /* + * Each rate is encoded over 4 bits (0->15), with 15 being the + * slowest. The buffer is a succession of rates for all the + * templates. The first 4 bits are for template 63, followed + * by 4 bits for template 62, ... etc. So the rate for + * template 0 is at the very end of the buffer. + */ + idx = (TL_MAX_TEMPLATE - templ) / 2; + shift = 4 * (1 - ((TL_MAX_TEMPLATE - templ) % 2)); + val = rate_buf[idx] >> shift; + return val; +} + +static bool is_template_supported(unsigned int templ, long capabilities) +{ + return !!(capabilities & (1ull << templ)); +} + +static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, + long capabilities, uint64_t rate_phys, int rate_sz) +{ + struct phb *phb = pci_get_phb(phb_id); + struct npu2_dev *dev; + uint64_t stack, block, reg, templ_rate; + int i, rate_pos; + char *rate = (char *) rate_phys; + + if (!phb || phb->phb_type != phb_type_npu_v2_opencapi) + return OPAL_PARAMETER; + if (!opal_addr_valid(rate) || rate_sz != TL_RATE_BUF_SIZE) + return OPAL_PARAMETER; + + dev = phb_to_npu2_dev_ocapi(phb); + if (!dev) + return OPAL_PARAMETER; + + block = index_to_block(dev->index); + stack = index_to_stack(dev->index); + /* + * The 'capabilities' argument defines what TL template the + * device can receive. OpenCAPI 3.0 and 4.0 define 64 templates, so + * that's one bit per template. + * + * For each template, the device processing time may vary, so + * the device advertises at what rate a message of a given + * template can be sent. That's encoded in the 'rate' buffer. + * + * On P9, NPU only knows about TL templates 0 -> 3. + * Per the spec, template 0 must be supported. + */ + if (!is_template_supported(0, capabilities)) + return OPAL_PARAMETER; + + reg = npu2_scom_read(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG1(stack, block), + NPU2_MISC_DA_LEN_8B); + reg &= ~(NPU2_OTL_CONFIG1_TX_TEMP1_EN | NPU2_OTL_CONFIG1_TX_TEMP3_EN | + NPU2_OTL_CONFIG1_TX_TEMP1_EN); + for (i = 0; i < 4; i++) { + /* Skip template 0 as it is implicitly enabled */ + if (i && is_template_supported(i, capabilities)) + reg |= PPC_BIT(i); + /* The tx rate should still be set for template 0 */ + templ_rate = get_template_rate(i, rate); + rate_pos = 8 + i * 4; + reg = SETFIELD(PPC_BITMASK(rate_pos, rate_pos + 3), reg, + templ_rate); + } + npu2_scom_write(dev->npu->chip_id, dev->npu->xscom_base, + NPU2_OTL_CONFIG1(stack, block), NPU2_MISC_DA_LEN_8B, + reg); + prlog(PR_DEBUG, "OCAPI: Link %llx:%x, TL conf1 register set to %llx\n", + phb_id, bdfn, reg); + return OPAL_SUCCESS; +} +opal_call(OPAL_NPU_TL_SET, opal_npu_tl_set, 5); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 876d7f0..5d99a3e 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -321,6 +321,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C) /* OpenCAPI - XSL registers */ +#define NPU2_XSL_PSL_LLCMD_A0 0x008 #define NPU2_XSL_PSL_SCNTL_A0 0x010 #define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0) #define NPU2_XSL_DEF 0x040 @@ -328,6 +329,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16) #define NPU2_XSL_WRAP_CFG 0x0C0 #define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0) +#define NPU2_XSL_PSL_SPAP_A0 0 +#define NPU2_XSL_PSL_SPAP_A1 0x18 +#define NPU2_XSL_PSL_SPAP_EN PPC_BIT(63) /* OpenCAPI - OTL registers */ #define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) diff --git a/include/opal-api.h b/include/opal-api.h index 1c1b9cc..05d6f4c 100644 --- a/include/opal-api.h +++ b/include/opal-api.h @@ -215,7 +215,10 @@ #define OPAL_SENSOR_GROUP_CLEAR 156 #define OPAL_PCI_SET_P2P 157 #define OPAL_QUIESCE 158 -#define OPAL_LAST 158 +#define OPAL_NPU_SPA_SETUP 159 +#define OPAL_NPU_SPA_CLEAR_CACHE 160 +#define OPAL_NPU_TL_SET 161 +#define OPAL_LAST 161 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */