diff mbox series

[14/16,14/16] opencapi5: add opal functions

Message ID 20210820094557.29743-15-clombard@linux.vnet.ibm.com
State Superseded
Headers show
Series OpenCAPI 5.0 Support for P10 | expand

Commit Message

Christophe Lombard Aug. 20, 2021, 9:45 a.m. UTC
Add three OPAL API calls that are required by the ocxl driver.

  - OPAL_PAU_SPA_SETUP

    The Shared Process Area (SPA) is a table containing one entry (a
    "Process Element") per memory context which can be accessed by the
    OpenCAPI device.

  - OPAL_PAU_SPA_CLEAR_CACHE

    The PAU keeps a cache of recently accessed memory contexts. When a
    Process Element is removed from the SPA, the cache for the link must
    be cleared.

  - OPAL_PAU_TL_SET

    The Transaction Layer specification defines several templates for
    messages to be exchanged on the link. During link setup, the host
    and device must negotiate what templates are supported on both sides
    and at what rates those messages can be sent.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 hw/npu-opal.c      |   8 +++
 hw/pau.c           | 159 +++++++++++++++++++++++++++++++++++++++++++++
 include/pau-regs.h |  13 ++++
 include/pau.h      |   9 +++
 4 files changed, 189 insertions(+)

Comments

Stewart Smith Aug. 24, 2021, 2:25 a.m. UTC | #1
On Fri, Aug 20, 2021, at 2:45 AM, Christophe Lombard wrote:
> Add three OPAL API calls that are required by the ocxl driver.
> 
>   - OPAL_PAU_SPA_SETUP
> 
>     The Shared Process Area (SPA) is a table containing one entry (a
>     "Process Element") per memory context which can be accessed by the
>     OpenCAPI device.
> 
>   - OPAL_PAU_SPA_CLEAR_CACHE
> 
>     The PAU keeps a cache of recently accessed memory contexts. When a
>     Process Element is removed from the SPA, the cache for the link must
>     be cleared.
> 
>   - OPAL_PAU_TL_SET
> 
>     The Transaction Layer specification defines several templates for
>     messages to be exchanged on the link. During link setup, the host
>     and device must negotiate what templates are supported on both sides
>     and at what rates those messages can be sent.
> 
> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
> ---
>  hw/npu-opal.c      |   8 +++
>  hw/pau.c           | 159 +++++++++++++++++++++++++++++++++++++++++++++
>  include/pau-regs.h |  13 ++++
>  include/pau.h      |   9 +++
>  4 files changed, 189 insertions(+)

Probably want to add some documentation on the OPAL calls in doc/opal-api/
Christophe Lombard Aug. 25, 2021, 9:14 a.m. UTC | #2
Le 24/08/2021 à 04:25, Stewart Smith a écrit :
> On Fri, Aug 20, 2021, at 2:45 AM, Christophe Lombard wrote:
>> Add three OPAL API calls that are required by the ocxl driver.
>>
>>    - OPAL_PAU_SPA_SETUP
>>
>>      The Shared Process Area (SPA) is a table containing one entry (a
>>      "Process Element") per memory context which can be accessed by the
>>      OpenCAPI device.
>>
>>    - OPAL_PAU_SPA_CLEAR_CACHE
>>
>>      The PAU keeps a cache of recently accessed memory contexts. When a
>>      Process Element is removed from the SPA, the cache for the link must
>>      be cleared.
>>
>>    - OPAL_PAU_TL_SET
>>
>>      The Transaction Layer specification defines several templates for
>>      messages to be exchanged on the link. During link setup, the host
>>      and device must negotiate what templates are supported on both sides
>>      and at what rates those messages can be sent.
>>
>> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
>> ---
>>   hw/npu-opal.c      |   8 +++
>>   hw/pau.c           | 159 +++++++++++++++++++++++++++++++++++++++++++++
>>   include/pau-regs.h |  13 ++++
>>   include/pau.h      |   9 +++
>>   4 files changed, 189 insertions(+)
> Probably want to add some documentation on the OPAL calls in doc/opal-api/
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot

right. Thanks.
Frederic Barrat Sept. 8, 2021, 1:20 p.m. UTC | #3
On 20/08/2021 11:45, Christophe Lombard wrote:
> Add three OPAL API calls that are required by the ocxl driver.
> 
>    - OPAL_PAU_SPA_SETUP
> 
>      The Shared Process Area (SPA) is a table containing one entry (a
>      "Process Element") per memory context which can be accessed by the
>      OpenCAPI device.
> 
>    - OPAL_PAU_SPA_CLEAR_CACHE
> 
>      The PAU keeps a cache of recently accessed memory contexts. When a
>      Process Element is removed from the SPA, the cache for the link must
>      be cleared.
> 
>    - OPAL_PAU_TL_SET
> 
>      The Transaction Layer specification defines several templates for
>      messages to be exchanged on the link. During link setup, the host
>      and device must negotiate what templates are supported on both sides
>      and at what rates those messages can be sent.
> 
> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
> ---


Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>


>   hw/npu-opal.c      |   8 +++
>   hw/pau.c           | 159 +++++++++++++++++++++++++++++++++++++++++++++
>   include/pau-regs.h |  13 ++++
>   include/pau.h      |   9 +++
>   4 files changed, 189 insertions(+)
> 
> diff --git a/hw/npu-opal.c b/hw/npu-opal.c
> index 64e36852..4fc4c662 100644
> --- a/hw/npu-opal.c
> +++ b/hw/npu-opal.c
> @@ -8,6 +8,7 @@
>   #include <phb4.h>
>   #include <npu2.h>
>   #include <npu3.h>
> +#include <pau.h>
> 
>   static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
>   				     uint64_t msr, uint64_t bdf)
> @@ -195,6 +196,8 @@ static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
> 
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
>   	else
>   		return OPAL_PARAMETER;
> 
> @@ -216,6 +219,8 @@ static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
> 
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
>   	else
>   		return OPAL_PARAMETER;
> 
> @@ -235,6 +240,9 @@ static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
>   	if (phb->phb_type == phb_type_npu_v2_opencapi)
>   		rc = npu2_opencapi_tl_set(phb, bdfn, capabilities,
>   					  rate_phys, rate_sz);
> +	else if (phb->phb_type == phb_type_pau_opencapi)
> +		rc = pau_opencapi_tl_set(phb, bdfn, capabilities,
> +					 rate_phys, rate_sz);
>   	else
>   		return OPAL_PARAMETER;
> 
> diff --git a/hw/pau.c b/hw/pau.c
> index 63655118..33d33c65 100644
> --- a/hw/pau.c
> +++ b/hw/pau.c
> @@ -15,6 +15,9 @@
>   #define PAU_MAX_PE_NUM		16
>   #define PAU_RESERVED_PE_NUM	15
> 
> +#define PAU_TL_MAX_TEMPLATE	63
> +#define PAU_TL_RATE_BUF_SIZE	32
> +
>   #define PAU_SLOT_NORMAL			PCI_SLOT_STATE_NORMAL
>   #define PAU_SLOT_LINK			PCI_SLOT_STATE_LINK
>   #define   PAU_SLOT_LINK_START			(PAU_SLOT_LINK + 1)
> @@ -271,6 +274,162 @@ static void pau_device_detect_fixup(struct pau_dev *dev)
>   	dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
>   }
> 
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t addr, uint64_t PE_mask)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau = dev->pau;
> +	uint64_t reg, val;
> +	int64_t rc;
> +
> +	lock(&pau->lock);
> +
> +	reg = PAU_XSL_OSL_SPAP_AN(dev->index);
> +	val = pau_read(pau, reg);
> +	if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) ||
> +		(!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) {
> +		rc = OPAL_BUSY;
> +		goto out;
> +	}
> +
> +	/* SPA is disabled by passing a NULL address */
> +	val = addr;
> +	if (addr)
> +		val = addr | PAU_XSL_OSL_SPAP_AN_EN;
> +	pau_write(pau, reg, val);
> +
> +	/*
> +	 * set the PE mask that the OS uses for PASID -> PE handle
> +	 * conversion
> +	 */
> +	reg = PAU_OTL_MISC_CFG0(dev->index);
> +	val = pau_read(pau, reg);
> +	val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask);
> +	pau_write(pau, reg, val);
> +	rc = OPAL_SUCCESS;
> +out:
> +	unlock(&pau->lock);
> +	return rc;
> +}
> +
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> +				     uint32_t __unused bdfn,
> +				     uint64_t PE_handle)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau = dev->pau;
> +	uint64_t reg, val;
> +	int64_t rc, retries = 5;
> +
> +	lock(&pau->lock);
> +
> +	reg = PAU_XSL_OSL_CCINV;
> +	val = pau_read(pau, reg);
> +	if (val & PAU_XSL_OSL_CCINV_PENDING) {
> +		rc = OPAL_BUSY;
> +		goto out;
> +	}
> +
> +	val = PAU_XSL_OSL_CCINV_REMOVE;
> +	val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle);
> +	if (dev->index)
> +		val |= PAU_XSL_OSL_CCINV_BRICK;
> +	pau_write(pau, reg, val);
> +
> +	rc = OPAL_HARDWARE;
> +	while (retries--) {
> +		val = pau_read(pau, reg);
> +		if (!(val & PAU_XSL_OSL_CCINV_PENDING)) {
> +			rc = OPAL_SUCCESS;
> +			break;
> +		}
> +		/* the bit expected to flip in less than 200us */
> +		time_wait_us(200);
> +	}
> +out:
> +	unlock(&pau->lock);
> +	return rc;
> +}
> +
> +static int pau_opencapi_get_templ_rate(unsigned int templ,
> +				       char *rate_buf)
> +{
> +	int shift, idx, val;
> +
> +	/*
> +	 * Each rate is encoded over 4 bits (0->15), with 15 being the
> +	 * slowest. The buffer is a succession of rates for all the
> +	 * templates. The first 4 bits are for template 63, followed
> +	 * by 4 bits for template 62, ... etc. So the rate for
> +	 * template 0 is at the very end of the buffer.
> +	 */
> +	idx = (PAU_TL_MAX_TEMPLATE - templ) / 2;
> +	shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2));
> +	val = rate_buf[idx] >> shift;
> +	return val;
> +}
> +
> +static bool pau_opencapi_is_templ_supported(unsigned int templ,
> +					    long capabilities)
> +{
> +	return !!(capabilities & (1ull << templ));
> +}
> +
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> +			    long capabilities, uint64_t rate_phys,
> +			    int rate_sz)
> +{
> +	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
> +	struct pau *pau;
> +	char *rate = (char *) rate_phys;
> +	uint64_t reg, val, templ_rate;
> +	int i, rate_pos;
> +
> +	if (!dev)
> +		return OPAL_PARAMETER;
> +	pau = dev->pau;
> +
> +	if (!opal_addr_valid(rate) || rate_sz != PAU_TL_RATE_BUF_SIZE)
> +		return OPAL_PARAMETER;
> +
> +	/* The 'capabilities' argument defines what TL template the
> +	 * device can receive. OpenCAPI 5.0 defines 64 templates, so
> +	 * that's one bit per template.
> +	 *
> +	 * For each template, the device processing time may vary, so
> +	 * the device advertises at what rate a message of a given
> +	 * template can be sent. That's encoded in the 'rate' buffer.
> +	 *
> +	 * On P10, PAU only knows about TL templates 0 -> 3.
> +	 * Per the spec, template 0 must be supported.
> +	 */
> +	if (!pau_opencapi_is_templ_supported(0, capabilities))
> +		return OPAL_PARAMETER;
> +
> +	reg = PAU_OTL_MISC_CFG_TX(dev->index);
> +	val = pau_read(pau, reg);
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN;
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN;
> +	val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN;
> +
> +	for (i = 0; i < 4; i++) {
> +		/* Skip template 0 as it is implicitly enabled.
> +		 * Enable other template If supported by AFU
> +		 */
> +		if (i && pau_opencapi_is_templ_supported(i, capabilities))
> +			val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i);
> +		/* The tx rate should still be set for template 0 */
> +		templ_rate = pau_opencapi_get_templ_rate(i, rate);
> +		rate_pos = 8 + i * 4;
> +		val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3),
> +			       val, templ_rate);
> +	}
> +	pau_write(pau, reg, val);
> +	PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val);
> +
> +	return OPAL_SUCCESS;
> +}
> +
>   #define CQ_CTL_STATUS_TIMEOUT  10 /* milliseconds */
> 
>   static int pau_opencapi_set_fence_control(struct pau_dev *dev,
> diff --git a/include/pau-regs.h b/include/pau-regs.h
> index 7a5aaa5f..57c2d723 100644
> --- a/include/pau-regs.h
> +++ b/include/pau-regs.h
> @@ -118,6 +118,7 @@
>   #define   PAU_OTL_MISC_CFG0_EN			PPC_BIT(0)
>   #define   PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE	PPC_BIT(1)
>   #define   PAU_OTL_MISC_CFG0_BRICKID		PPC_BITMASK(2, 3)
> +#define   PAU_OTL_MISC_CFG0_PE_MASK		PPC_BITMASK(4, 7)
>   #define   PAU_OTL_MISC_CFG0_ENABLE_4_0		PPC_BIT(51)
>   #define   PAU_OTL_MISC_CFG0_XLATE_RELEASE	PPC_BIT(62)
>   #define   PAU_OTL_MISC_CFG0_ENABLE_5_0		PPC_BIT(63)
> @@ -132,11 +133,16 @@
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2	PPC_BITMASK(48, 55)
>   #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3	PPC_BITMASK(56, 63)
>   #define PAU_OTL_MISC_CFG_TX(brk)		(PAU_BLOCK_OTL(brk) + 0x058)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP1_EN		PPC_BIT(1)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP2_EN		PPC_BIT(2)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP3_EN		PPC_BIT(3)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP_EN(n)	PPC_BIT(n)
>   #define   PAU_OTL_MISC_CFG_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP1_RATE	PPC_BITMASK(12, 15)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
>   #define   PAU_OTL_MISC_CFG_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
> +#define   PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1)	PPC_BITMASK(nib0, nib1)
>   #define   PAU_OTL_MISC_CFG_TX_CRET_FREQ		PPC_BITMASK(32, 34)
>   #define PAU_OTL_MISC_OTL_REM0(brk)		(PAU_BLOCK_OTL(brk) + 0x068)
>   #define PAU_OTL_MISC_ERROR_SIG_RXI(brk)		(PAU_BLOCK_OTL(brk) + 0x070)
> @@ -150,11 +156,18 @@
>   #define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk)	(PAU_BLOCK_OTL_PSL(brk) + 0x018)
> 
>   /* XSL block registers */
> +#define PAU_XSL_OSL_SPAP_AN(brk)		(PAU_BLOCK_XSL + 0x000 + (brk) * 8)
> +#define   PAU_XSL_OSL_SPAP_AN_EN		PPC_BIT(63)
>   #define PAU_XSL_WRAP_CFG			(PAU_BLOCK_XSL + 0x100)
>   #define   PAU_XSL_WRAP_CFG_CLOCK_ENABLE		PPC_BIT(0)
>   #define PAU_XSL_OSL_XLATE_CFG(brk)		(PAU_BLOCK_XSL + 0x040 + (brk) * 8)
>   #define   PAU_XSL_OSL_XLATE_CFG_AFU_DIAL	PPC_BIT(0)
>   #define   PAU_XSL_OSL_XLATE_CFG_OPENCAPI3	PPC_BIT(32)
> +#define PAU_XSL_OSL_CCINV			(PAU_BLOCK_XSL + 0x070)
> +#define   PAU_XSL_OSL_CCINV_REMOVE		PPC_BIT(15)
> +#define   PAU_XSL_OSL_CCINV_PENDING		PPC_BIT(16)
> +#define   PAU_XSL_OSL_CCINV_BRICK		PPC_BIT(47)
> +#define   PAU_XSL_OSL_CCINV_PE_HANDLE		PPC_BITMASK(48, 62)
> 
>   /* XTS block registers */
>   #define PAU_XTS_CFG				(PAU_BLOCK_PAU_XTS + 0x020)
> diff --git a/include/pau.h b/include/pau.h
> index 8b978bd6..61b17925 100644
> --- a/include/pau.h
> +++ b/include/pau.h
> @@ -200,6 +200,15 @@ static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
>   }
> 
>   void pau_opencapi_dump_scoms(struct phb *phb);
> +int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
> +			       uint64_t addr, uint64_t PE_mask);
> +int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
> +				     uint32_t __unused bdfn,
> +				     uint64_t PE_handle);
> +int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
> +			    long capabilities, uint64_t rate_phys,
> +			    int rate_sz);
> +
> 
>   /* PHY */
>   int pau_dev_phy_reset(struct pau_dev *dev);
>
diff mbox series

Patch

diff --git a/hw/npu-opal.c b/hw/npu-opal.c
index 64e36852..4fc4c662 100644
--- a/hw/npu-opal.c
+++ b/hw/npu-opal.c
@@ -8,6 +8,7 @@ 
 #include <phb4.h>
 #include <npu2.h>
 #include <npu3.h>
+#include <pau.h>
 
 static int64_t opal_npu_init_context(uint64_t phb_id, int pid __unused,
 				     uint64_t msr, uint64_t bdf)
@@ -195,6 +196,8 @@  static int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
 
 	if (phb->phb_type == phb_type_npu_v2_opencapi)
 		rc = npu2_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
+	else if (phb->phb_type == phb_type_pau_opencapi)
+		rc = pau_opencapi_spa_setup(phb, bdfn, addr, PE_mask);
 	else
 		return OPAL_PARAMETER;
 
@@ -216,6 +219,8 @@  static int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
 
 	if (phb->phb_type == phb_type_npu_v2_opencapi)
 		rc = npu2_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
+	else if (phb->phb_type == phb_type_pau_opencapi)
+		rc = pau_opencapi_spa_clear_cache(phb, bdfn, PE_handle);
 	else
 		return OPAL_PARAMETER;
 
@@ -235,6 +240,9 @@  static int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn,
 	if (phb->phb_type == phb_type_npu_v2_opencapi)
 		rc = npu2_opencapi_tl_set(phb, bdfn, capabilities,
 					  rate_phys, rate_sz);
+	else if (phb->phb_type == phb_type_pau_opencapi)
+		rc = pau_opencapi_tl_set(phb, bdfn, capabilities,
+					 rate_phys, rate_sz);
 	else
 		return OPAL_PARAMETER;
 
diff --git a/hw/pau.c b/hw/pau.c
index 63655118..33d33c65 100644
--- a/hw/pau.c
+++ b/hw/pau.c
@@ -15,6 +15,9 @@ 
 #define PAU_MAX_PE_NUM		16
 #define PAU_RESERVED_PE_NUM	15
 
+#define PAU_TL_MAX_TEMPLATE	63
+#define PAU_TL_RATE_BUF_SIZE	32
+
 #define PAU_SLOT_NORMAL			PCI_SLOT_STATE_NORMAL
 #define PAU_SLOT_LINK			PCI_SLOT_STATE_LINK
 #define   PAU_SLOT_LINK_START			(PAU_SLOT_LINK + 1)
@@ -271,6 +274,162 @@  static void pau_device_detect_fixup(struct pau_dev *dev)
 	dt_add_property_strings(dn, "ibm,pau-link-type", "unknown");
 }
 
+int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
+			       uint64_t addr, uint64_t PE_mask)
+{
+	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+	struct pau *pau = dev->pau;
+	uint64_t reg, val;
+	int64_t rc;
+
+	lock(&pau->lock);
+
+	reg = PAU_XSL_OSL_SPAP_AN(dev->index);
+	val = pau_read(pau, reg);
+	if ((addr && (val & PAU_XSL_OSL_SPAP_AN_EN)) ||
+		(!addr && !(val & PAU_XSL_OSL_SPAP_AN_EN))) {
+		rc = OPAL_BUSY;
+		goto out;
+	}
+
+	/* SPA is disabled by passing a NULL address */
+	val = addr;
+	if (addr)
+		val = addr | PAU_XSL_OSL_SPAP_AN_EN;
+	pau_write(pau, reg, val);
+
+	/*
+	 * set the PE mask that the OS uses for PASID -> PE handle
+	 * conversion
+	 */
+	reg = PAU_OTL_MISC_CFG0(dev->index);
+	val = pau_read(pau, reg);
+	val = SETFIELD(PAU_OTL_MISC_CFG0_PE_MASK, val, PE_mask);
+	pau_write(pau, reg, val);
+	rc = OPAL_SUCCESS;
+out:
+	unlock(&pau->lock);
+	return rc;
+}
+
+int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
+				     uint32_t __unused bdfn,
+				     uint64_t PE_handle)
+{
+	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+	struct pau *pau = dev->pau;
+	uint64_t reg, val;
+	int64_t rc, retries = 5;
+
+	lock(&pau->lock);
+
+	reg = PAU_XSL_OSL_CCINV;
+	val = pau_read(pau, reg);
+	if (val & PAU_XSL_OSL_CCINV_PENDING) {
+		rc = OPAL_BUSY;
+		goto out;
+	}
+
+	val = PAU_XSL_OSL_CCINV_REMOVE;
+	val |= SETFIELD(PAU_XSL_OSL_CCINV_PE_HANDLE, val, PE_handle);
+	if (dev->index)
+		val |= PAU_XSL_OSL_CCINV_BRICK;
+	pau_write(pau, reg, val);
+
+	rc = OPAL_HARDWARE;
+	while (retries--) {
+		val = pau_read(pau, reg);
+		if (!(val & PAU_XSL_OSL_CCINV_PENDING)) {
+			rc = OPAL_SUCCESS;
+			break;
+		}
+		/* the bit expected to flip in less than 200us */
+		time_wait_us(200);
+	}
+out:
+	unlock(&pau->lock);
+	return rc;
+}
+
+static int pau_opencapi_get_templ_rate(unsigned int templ,
+				       char *rate_buf)
+{
+	int shift, idx, val;
+
+	/*
+	 * Each rate is encoded over 4 bits (0->15), with 15 being the
+	 * slowest. The buffer is a succession of rates for all the
+	 * templates. The first 4 bits are for template 63, followed
+	 * by 4 bits for template 62, ... etc. So the rate for
+	 * template 0 is at the very end of the buffer.
+	 */
+	idx = (PAU_TL_MAX_TEMPLATE - templ) / 2;
+	shift = 4 * (1 - ((PAU_TL_MAX_TEMPLATE - templ) % 2));
+	val = rate_buf[idx] >> shift;
+	return val;
+}
+
+static bool pau_opencapi_is_templ_supported(unsigned int templ,
+					    long capabilities)
+{
+	return !!(capabilities & (1ull << templ));
+}
+
+int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
+			    long capabilities, uint64_t rate_phys,
+			    int rate_sz)
+{
+	struct pau_dev *dev = pau_phb_to_opencapi_dev(phb);
+	struct pau *pau;
+	char *rate = (char *) rate_phys;
+	uint64_t reg, val, templ_rate;
+	int i, rate_pos;
+
+	if (!dev)
+		return OPAL_PARAMETER;
+	pau = dev->pau;
+
+	if (!opal_addr_valid(rate) || rate_sz != PAU_TL_RATE_BUF_SIZE)
+		return OPAL_PARAMETER;
+
+	/* The 'capabilities' argument defines what TL template the
+	 * device can receive. OpenCAPI 5.0 defines 64 templates, so
+	 * that's one bit per template.
+	 *
+	 * For each template, the device processing time may vary, so
+	 * the device advertises at what rate a message of a given
+	 * template can be sent. That's encoded in the 'rate' buffer.
+	 *
+	 * On P10, PAU only knows about TL templates 0 -> 3.
+	 * Per the spec, template 0 must be supported.
+	 */
+	if (!pau_opencapi_is_templ_supported(0, capabilities))
+		return OPAL_PARAMETER;
+
+	reg = PAU_OTL_MISC_CFG_TX(dev->index);
+	val = pau_read(pau, reg);
+	val &= ~PAU_OTL_MISC_CFG_TX_TEMP1_EN;
+	val &= ~PAU_OTL_MISC_CFG_TX_TEMP2_EN;
+	val &= ~PAU_OTL_MISC_CFG_TX_TEMP3_EN;
+
+	for (i = 0; i < 4; i++) {
+		/* Skip template 0 as it is implicitly enabled.
+		 * Enable other template If supported by AFU
+		 */
+		if (i && pau_opencapi_is_templ_supported(i, capabilities))
+			val |= PAU_OTL_MISC_CFG_TX_TEMP_EN(i);
+		/* The tx rate should still be set for template 0 */
+		templ_rate = pau_opencapi_get_templ_rate(i, rate);
+		rate_pos = 8 + i * 4;
+		val = SETFIELD(PAU_OTL_MISC_CFG_TX_TEMP_RATE(rate_pos, rate_pos + 3),
+			       val, templ_rate);
+	}
+	pau_write(pau, reg, val);
+	PAUDEVDBG(dev, "OTL configuration register set to %llx\n", val);
+
+	return OPAL_SUCCESS;
+}
+
 #define CQ_CTL_STATUS_TIMEOUT  10 /* milliseconds */
 
 static int pau_opencapi_set_fence_control(struct pau_dev *dev,
diff --git a/include/pau-regs.h b/include/pau-regs.h
index 7a5aaa5f..57c2d723 100644
--- a/include/pau-regs.h
+++ b/include/pau-regs.h
@@ -118,6 +118,7 @@ 
 #define   PAU_OTL_MISC_CFG0_EN			PPC_BIT(0)
 #define   PAU_OTL_MISC_CFG0_BLOCK_PE_HANDLE	PPC_BIT(1)
 #define   PAU_OTL_MISC_CFG0_BRICKID		PPC_BITMASK(2, 3)
+#define   PAU_OTL_MISC_CFG0_PE_MASK		PPC_BITMASK(4, 7)
 #define   PAU_OTL_MISC_CFG0_ENABLE_4_0		PPC_BIT(51)
 #define   PAU_OTL_MISC_CFG0_XLATE_RELEASE	PPC_BIT(62)
 #define   PAU_OTL_MISC_CFG0_ENABLE_5_0		PPC_BIT(63)
@@ -132,11 +133,16 @@ 
 #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP2	PPC_BITMASK(48, 55)
 #define   PAU_OTL_MISC_CFG_TLX_CREDITS_DCP3	PPC_BITMASK(56, 63)
 #define PAU_OTL_MISC_CFG_TX(brk)		(PAU_BLOCK_OTL(brk) + 0x058)
+#define   PAU_OTL_MISC_CFG_TX_TEMP1_EN		PPC_BIT(1)
+#define   PAU_OTL_MISC_CFG_TX_TEMP2_EN		PPC_BIT(2)
+#define   PAU_OTL_MISC_CFG_TX_TEMP3_EN		PPC_BIT(3)
+#define   PAU_OTL_MISC_CFG_TX_TEMP_EN(n)	PPC_BIT(n)
 #define   PAU_OTL_MISC_CFG_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
 #define   PAU_OTL_MISC_CFG_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
 #define   PAU_OTL_MISC_CFG_TX_TEMP1_RATE	PPC_BITMASK(12, 15)
 #define   PAU_OTL_MISC_CFG_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
 #define   PAU_OTL_MISC_CFG_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
+#define   PAU_OTL_MISC_CFG_TX_TEMP_RATE(nib0, nib1)	PPC_BITMASK(nib0, nib1)
 #define   PAU_OTL_MISC_CFG_TX_CRET_FREQ		PPC_BITMASK(32, 34)
 #define PAU_OTL_MISC_OTL_REM0(brk)		(PAU_BLOCK_OTL(brk) + 0x068)
 #define PAU_OTL_MISC_ERROR_SIG_RXI(brk)		(PAU_BLOCK_OTL(brk) + 0x070)
@@ -150,11 +156,18 @@ 
 #define PAU_OTL_MISC_PSL_PEHANDLE_AN(brk)	(PAU_BLOCK_OTL_PSL(brk) + 0x018)
 
 /* XSL block registers */
+#define PAU_XSL_OSL_SPAP_AN(brk)		(PAU_BLOCK_XSL + 0x000 + (brk) * 8)
+#define   PAU_XSL_OSL_SPAP_AN_EN		PPC_BIT(63)
 #define PAU_XSL_WRAP_CFG			(PAU_BLOCK_XSL + 0x100)
 #define   PAU_XSL_WRAP_CFG_CLOCK_ENABLE		PPC_BIT(0)
 #define PAU_XSL_OSL_XLATE_CFG(brk)		(PAU_BLOCK_XSL + 0x040 + (brk) * 8)
 #define   PAU_XSL_OSL_XLATE_CFG_AFU_DIAL	PPC_BIT(0)
 #define   PAU_XSL_OSL_XLATE_CFG_OPENCAPI3	PPC_BIT(32)
+#define PAU_XSL_OSL_CCINV			(PAU_BLOCK_XSL + 0x070)
+#define   PAU_XSL_OSL_CCINV_REMOVE		PPC_BIT(15)
+#define   PAU_XSL_OSL_CCINV_PENDING		PPC_BIT(16)
+#define   PAU_XSL_OSL_CCINV_BRICK		PPC_BIT(47)
+#define   PAU_XSL_OSL_CCINV_PE_HANDLE		PPC_BITMASK(48, 62)
 
 /* XTS block registers */
 #define PAU_XTS_CFG				(PAU_BLOCK_PAU_XTS + 0x020)
diff --git a/include/pau.h b/include/pau.h
index 8b978bd6..61b17925 100644
--- a/include/pau.h
+++ b/include/pau.h
@@ -200,6 +200,15 @@  static inline uint64_t pau_read(struct pau *pau, uint64_t reg)
 }
 
 void pau_opencapi_dump_scoms(struct phb *phb);
+int64_t pau_opencapi_spa_setup(struct phb *phb, uint32_t __unused bdfn,
+			       uint64_t addr, uint64_t PE_mask);
+int64_t pau_opencapi_spa_clear_cache(struct phb *phb,
+				     uint32_t __unused bdfn,
+				     uint64_t PE_handle);
+int64_t pau_opencapi_tl_set(struct phb *phb, uint32_t __unused bdfn,
+			    long capabilities, uint64_t rate_phys,
+			    int rate_sz);
+
 
 /* PHY */
 int pau_dev_phy_reset(struct pau_dev *dev);