[3/4] capi: Enable capi mode for phb4

Submitted by Christophe Lombard on Feb. 10, 2017, 9:04 a.m.

Details

Message ID 1486717462-5016-4-git-send-email-clombard@linux.vnet.ibm.com
State New
Headers show

Commit Message

Christophe Lombard Feb. 10, 2017, 9:04 a.m.
Enable the Coherently attached processor interface. The PHB is used as
a CAPI interface.
CAPI Adapters can be connected to whether PEC0 or PEC2. Single port
CAPI adapter can be connected to either PEC0 or PEC2, but Dual-Port
Adapter can be only connected to PEC2
   CAPP0 attached to PHB0(PEC0 - single port)
   CAPP1 attached to PHB3(PEC2 - single or dual port)
As we did for PHB3, a new specific file 'phb4-capp.h' is created to
contain the CAPP register definitions.

Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
---
 hw/phb4.c           | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/chip.h      |   1 +
 include/phb4-capp.h |  57 ++++++++++++
 include/phb4-regs.h |   8 +-
 4 files changed, 328 insertions(+), 2 deletions(-)
 create mode 100644 include/phb4-capp.h

Comments

Andrew Donnellan March 2, 2017, 5:46 a.m.
On 10/02/17 20:04, Christophe Lombard wrote:
> Enable the Coherently attached processor interface. The PHB is used as
> a CAPI interface.
> CAPI Adapters can be connected to whether PEC0 or PEC2. Single port

either

> CAPI adapter can be connected to either PEC0 or PEC2, but Dual-Port
> Adapter can be only connected to PEC2
>    CAPP0 attached to PHB0(PEC0 - single port)
>    CAPP1 attached to PHB3(PEC2 - single or dual port)
> As we did for PHB3, a new specific file 'phb4-capp.h' is created to
> contain the CAPP register definitions.
>
> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>

This patch doesn't implement disable_capi_mode(). P9 doesn't have fast 
reboot yet but we'll eventually need to care about that, as well as kexec.

I assume do_capp_recovery_scoms() etc will be coming in a later patch?

This is a fairly tough patch to review, there's some parts I haven't 
reviewed in depth, in particular the TVT stuff.

> ---
>  hw/phb4.c           | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/chip.h      |   1 +
>  include/phb4-capp.h |  57 ++++++++++++
>  include/phb4-regs.h |   8 +-
>  4 files changed, 328 insertions(+), 2 deletions(-)
>  create mode 100644 include/phb4-capp.h
>
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 3cdacea..9858ad8 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -48,7 +48,7 @@
>  #include <affinity.h>
>  #include <phb4.h>
>  #include <phb4-regs.h>
> -#include <capp.h>
> +#include <phb4-capp.h>
>  #include <fsp.h>
>  #include <chip.h>
>  #include <chiptod.h>
> @@ -1976,6 +1976,8 @@ static int64_t phb4_freset(struct pci_slot *slot)
>  	return OPAL_HARDWARE;
>  }
>
> +extern struct lock capi_lock;

Minor - should we move capi_lock to somewhere other than phb3.c? Perhaps 
capp.c or something?

> +
>  static int64_t phb4_creset(struct pci_slot *slot)
>  {
>  	struct phb4 *p = phb_to_phb4(slot->phb);
> @@ -2374,6 +2376,265 @@ static int64_t phb4_get_diag_data(struct phb *phb,
>  	return OPAL_SUCCESS;
>  }
>
> +static void phb4_init_capp_regs(struct phb4 *p)
> +{
> +	uint64_t reg;
> +	uint32_t offset;
> +
> +	offset = PHB4_CAPP_REG_OFFSET(p);
> +
> +	/* enable combined response examination (set by initfile) */
> +	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
> +	reg |= PPC_BIT(0);
> +	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
> +
> +	/* Set PHB mode, HPC Dir State and P9 mode */
> +	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset, 0x1072000000000000);
> +	PHBINF(p, "CAPP: port attached\n");
> +
> +	/* should be enabled on LCO shifts only */
> +	/* xscom_write(p->chip_id, LCO_MASTER_TARGET + offset, 0xFFF2000000000000); */
> +
> +	/* Set snoop ttype decoding , dir size to 256k */
> +	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0xA000000000000000);
> +
> +	/* Use Read Epsilon Tier2 for all scopes, Address Pipeline Master
> +	 * Wait Count to highest(1023) and Number of rpt_hang.data to 3
> +	 */
> +	xscom_write(p->chip_id, SNOOP_CONTROL + offset, 0x8000000010072000);
> +
> +	/* TLBI Hang Divider = 1 (initfile).  LPC buffers=0. X16 PCIe(14 buffers) */
> +	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, 0x401404000400000B);
> +
> +	/* Enable epoch timer */
> +	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset, 0xC0000000FFF0FFE0);
> +
> +	/* Deassert TLBI_FENCED and tlbi_psl_is_dead */
> +	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
> +
> +	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
> +		    0x1DCF5F6600000000);
> +	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
> +		    0xE3105005C8000000);
> +	xscom_write(p->chip_id, APC_FSM_READ_MASK + offset,
> +		    0xFFFFFFFFFFFF0000);
> +	xscom_write(p->chip_id, XPT_FSM_RMM + offset,
> +		    0xFFFFFFFFFFFF0000);
> +}
> +
> +/* override some inits with CAPI defaults */
> +static void phb4_init_capp_errors(struct phb4 *p)
> +{
> +	out_be64(p->regs + 0x0d30,	0xdff7ff0bf7ddfff0ull);
> +	out_be64(p->regs + 0x0db0,	0xfbffd7bbff7fbfefull);
> +	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
> +	out_be64(p->regs + 0x0eb0,	0xfbaeffaf00000000ull);

Hmm, this doesn't seem to match Init_103 in my version of the PHB4 spec, 
but that might be outdated info?

> +	out_be64(p->regs + 0x0cb0,	0x35777073ff000000ull);
> +}

Would be nice to see details of which registers these are and 
explanation of how these values are different from the regular init 
sequence (yes I know the regular init sequence is completely uncommented 
as well...). I think it's justified to use macros for the register 
offsets. Init numbers might be somewhat helpful for the IBMers.

> +
> +/* Power Bus Common Queue Registers
> + * All PBCQ and PBAIB registers are accessed via SCOM
> + * NestBase = 4010C00 for PEC0
> + *            4011000 for PEC1
> + *            4011400 for PEC2
> + *
> + * Some registers are shared amongst all of the stacks and will only
> + * have 1 copy. Other registers are implemented one per stack.
> + * Registers that are duplicated will have an additional offset
> + * of “StackBase” so that they have a unique address.
> + * Stackoffset = 00000040 for Stack0
> + *             = 00000080 for Stack1
> + *             = 000000C0 for Stack2
> + */
> +static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number)
> +{
> +	uint64_t reg;
> +	/*uint64_t mbt0, mbt1;*/

Drop this?

> +	uint32_t offset;
> +	int i;
> +
> +	xscom_read(p->chip_id, p->pe_xscom + 0x7, &reg);
> +	if (reg & PPC_BIT(0))
> +		PHBDBG(p, "Already in CAPP mode\n");
> +
> +	/* PEC Phase 3 (PBCQ) registers Init */
> +	/* poll cqstat */
> +	offset = 0x40;
> +	if (p->index > 0 && p->index < 3)
> +		offset = 0x80;
> +	else if (p->index > 2)
> +		offset = 0xC0;
> +
> +	for (i = 0; i < 500000; i++) {
> +		xscom_read(p->chip_id, p->pe_xscom + offset + 0xC, &reg);
> +		if (!(reg & 0xC000000000000000))
> +			break;
> +		time_wait_us(10);
> +	}
> +	if (reg & 0xC000000000000000) {
> +		PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
> +		return OPAL_HARDWARE;
> +	}
> +
> +	/* Enable CAPP Mode , Set 14 CI Store buffers for CAPP,
> +	 * Set 48 Read machines for CAPP)

I don't think you need the parenthesis here

> +	 */
> +	reg = 0x8000DFFFFFFFFFFFUll;
> +	xscom_write(p->chip_id, p->pe_xscom + 0x7, reg);
> +
> +	/* PEC Phase 4 (PHB) registers adjustement

adjustment

> +	 * Bit [0:7] XSL_DSNCTL[capiind]
> +	 * Init_25 - CAPI Compare/Mask
> +	 */
> +	out_be64(p->regs + PHB_CAPI_CMPM,
> +		 0x0200FE0000000000Ull | PHB_CAPI_CMPM_ENABLE);
> +
> +	if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
> +		/* Init_123 :  NBW Compare/Mask Register */
> +		out_be64(p->regs + PHB_PBL_NBW_CMPM,
> +			 0x0300FF0000000000Ull);
> +
> +		/* Init_24 - ASN Compare/Mask */
> +		out_be64(p->regs + PHB_PBL_ASN_CMPM,
> +			 0x0400FF0000000000Ull);
> +	}
> +
> +	/* non-translate/50-bit mode */
> +	out_be64(p->regs + PHB_XLATE_PREFIX, 0x0000000000000000Ull);
> +
> +	/* set tve no translate mode allow mmio window */
> +	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
> +
> +	/*
> +	 * In 50-bit non-translate mode, the fields of the TVE are
> +	 * used to perform an address range check. In this mode TCE
> +	 * Table Size(0) must be a '1' (TVE[51] = 1)
> +	 *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
> +	 *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
> +	 *
> +	 * TVE[51] = 1
> +	 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
> +	 * TVE[0:23] = 0x000000
> +	 * TVE[24:47] = 0xFFFFFF
> +	 *
> +	 * capi dma mode: CAPP DMA mode needs access to all of memory
> +	 * capi mode: Allow address range (bit 14 = 1)
> +	 *            0x0002000000000000: 0x0002FFFFFFFFFFFF
> +	 *            TVE[52:53] = '10' and TVE[54:55] = '10'
> +	 *
> +	 * --> we use capi dma mode by default
> +	 */
> +	p->tve_cache[pe_number * 2]  = PPC_BIT(51);
> +	p->tve_cache[pe_number * 2] |= IODA3_TVT_NON_TRANSLATE_50;
> +	p->tve_cache[pe_number * 2] |= (0xfffffful << 16);
> +
> +	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
> +	for (i = 0; i < p->tvt_size; i++)
> +		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
> +
> +	/* set mbt bar to pass capi mmio window. First applied cleared
> +	 * values to HW
> +	 */
> +	for (i = 0; i < p->mbt_size; i++) {
> +		p->mbt_cache[i][0] = 0;
> +		p->mbt_cache[i][1] = 0;
> +	}
> +	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
> +	for (i = 0; i < p->mbt_size; i++) {
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
> +	}
> +
> +	p->mbt_cache[0][0] = IODA3_MBT0_ENABLE |
> +			     IODA3_MBT0_TYPE_M64 |
> +		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
> +		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
> +		(p->mm0_base & IODA3_MBT0_BASE_ADDR);
> +	p->mbt_cache[0][1] = IODA3_MBT1_ENABLE |
> +		((~(p->mm0_size - 1)) & IODA3_MBT1_MASK) |
> +		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
> +
> +	p->mbt_cache[1][0] = IODA3_MBT0_ENABLE |
> +			     IODA3_MBT0_TYPE_M64 |
> +		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
> +		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
> +		(0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
> +	p->mbt_cache[1][1] = IODA3_MBT1_ENABLE |
> +		(0x00ff000000000000ULL & IODA3_MBT1_MASK) |
> +		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
> +
> +	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
> +	for (i = 0; i < p->mbt_size; i++) {
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
> +		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
> +	}
> +
> +	phb4_init_capp_errors(p);
> +
> +	phb4_init_capp_regs(p);
> +
> +	if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
> +		if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
> +						CAPP_TB,
> +						PHB4_CAPP_REG_OFFSET(p))) {
> +			PHBERR(p, "CAPP: Failed to sync timebase\n");
> +			return OPAL_HARDWARE;
> +		}
> +	}
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
> +				  uint64_t pe_number)
> +{
> +	struct phb4 *p = phb_to_phb4(phb);
> +	struct proc_chip *chip = get_chip(p->chip_id);
> +	uint64_t reg;
> +	uint32_t offset;
> +
> +	lock(&capi_lock);
> +	/* Only PHB0 and PHB3 have the PHB/CAPP I/F so CAPI Adapters can

I assume I/F means "interface" here, in which case it's better to spell 
it out in full.

> +	 * be connected to whether PEC0 or PEC2. Single port CAPI adapter

either

> +	 * can be connected to either PEC0 or PEC2, but Dual-Port Adapter
> +	 * can be only connected to PEC2
> +	 */

I'm not sure I quite see the relevance of the comment to this particular 
block of code?

> +	chip->capp_phb4_attached_mask |= 1 << p->index;
> +	unlock(&capi_lock);
> +
> +	offset = PHB4_CAPP_REG_OFFSET(p);
> +	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
> +	if ((reg & PPC_BIT(5))) {
> +		PHBERR(p, "CAPP: recovery failed (%016llx)\n", reg);
> +		return OPAL_HARDWARE;
> +	} else if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
> +		PHBDBG(p, "CAPP: recovery in progress\n");
> +		return OPAL_BUSY;
> +	}
> +
> +	switch (mode) {
> +	case OPAL_PHB_CAPI_MODE_PCIE:
> +		return OPAL_UNSUPPORTED;
> +
> +	case OPAL_PHB_CAPI_MODE_CAPI:
> +		return enable_capi_mode(p, pe_number);
> +
> +	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
> +		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset,
> +			    0x0000000000000000);
> +		return OPAL_SUCCESS;
> +
> +	case OPAL_PHB_CAPI_MODE_SNOOP_ON:
> +		xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset,
> +			    0x0000000000000000);
> +		reg = 0xA1F0000000000000;
> +		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, reg);
> +
> +		return OPAL_SUCCESS;
> +	}
> +
> +	return OPAL_UNSUPPORTED;
> +}
> +

LGTM

>  static const struct phb_ops phb4_ops = {
>  	.cfg_read8		= phb4_pcicfg_read8,
>  	.cfg_read16		= phb4_pcicfg_read16,
> @@ -2405,6 +2666,7 @@ static const struct phb_ops phb4_ops = {
>  	.get_diag_data		= NULL,
>  	.get_diag_data2		= phb4_get_diag_data,
>  	.tce_kill		= phb4_tce_kill,
> +	.set_capi_mode		= phb4_set_capi_mode,
>  };
>

LGTM

>  static void phb4_init_ioda3(struct phb4 *p)
> diff --git a/include/chip.h b/include/chip.h
> index 588db9f..a4a1e1e 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -190,6 +190,7 @@ struct proc_chip {
>
>  	/* Must hold capi_lock to change */
>  	uint8_t			capp_phb3_attached_mask;
> +	uint8_t			capp_phb4_attached_mask;
>  	uint8_t			capp_ucode_loaded;
>

LGTM

>  	/* Used by hw/centaur.c */
> diff --git a/include/phb4-capp.h b/include/phb4-capp.h
> new file mode 100644
> index 0000000..a42383e
> --- /dev/null
> +++ b/include/phb4-capp.h
> @@ -0,0 +1,57 @@
> +/* Copyright 2013-2017 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *	http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#ifndef __PHB4_CAPP_H
> +#define __PHB4_CAPP_H
> +
> +#define CAPP_SNP_ARRAY_WRITE_REG		0x2010841  /* S2 */

I assume "S2" here means satellite S2 per the comment below?

> +#define CAPP_SNP_ARRAY_ADDR_REG 		0x2010828
> +#define CAPP_APC_MASTER_ARRAY_ADDR_REG		0x201082A
> +#define CAPP_APC_MASTER_ARRAY_WRITE_REG 	0x2010842  /* S2 */
> +
> +#define APC_MASTER_PB_CTRL			0x2010818
> +#define APC_MASTER_CAPI_CTRL			0x2010819

Annoyingly the (IBM internal) CAPP workbook calls this the "APC Master 
CAPI Control" register, but some other (also IBM internal) documentation 
refers to this as the "APC Master Config" register... same goes for a 
bunch of other registers here. If only we had canonical naming schemes :(

> +#define LCO_MASTER_TARGET			0x2010821
> +#define EPOCH_RECOVERY_TIMERS_CTRL		0x201082C
> +#define SNOOP_CAPI_CONFIG			0x201081A
> +#define SNOOP_CONTROL				0x201081B
> +#define TRANSPORT_CONTROL			0x201081C
> +#define CAPP_TB 				0x2010826
> +#define CAPP_TFMR				0x2010827
> +#define CAPP_ERR_STATUS_CTRL			0x201080E
> +#define FLUSH_SUE_STATE_MAP			0x201080F
> +#define FLUSH_CPIG_STATE_MAP			0x2010820  /* TBD */
> +#define FLUSH_SUE_UOP1				0x2010843  /* S2 */
> +#define APC_FSM_READ_MASK			0x2010823
> +#define XPT_FSM_RMM				0x2010831

SCOM addresses all look correct.

> +
> +/* CAPP0 attached to PHB0(PEC0 - single port)
> + * CAPP1 attached to PHB3(PEC2 - single or dual port)
> + *
> + * SCOM address Base (Ring = ‘0010’b)
> + * CAPP Unit Satellite           SCOM address Base
> + * CAPP 0    S1 (sat = ‘0000’b)  x02010800
> + * CAPP 0    S2 (sat = ‘0001’b)  x02010840
> + * CAPP 1    S1 (sat = ‘0000’b)  x04010800
> + * CAPP 1    S2 (sat = ‘0001’b)  x04010840
> + */

Very useful comment - I always forget where to find the SCOM address 
base when I'm reading the workbooks...

> +#define CAPP1_REG_OFFSET 0x2000000
> +
> +#define PHB4_CAPP_MAX_PHB_INDEX 3
> +
> +#define PHB4_CAPP_REG_OFFSET(p) ((p)->index == 0 ? 0x0 : CAPP1_REG_OFFSET)
> +
> +#endif /* __PHB4_CAPP_H */
> diff --git a/include/phb4-regs.h b/include/phb4-regs.h
> index 48953e4..4cb0b26 100644
> --- a/include/phb4-regs.h
> +++ b/include/phb4-regs.h
> @@ -71,8 +71,12 @@
>  #define PHB_PEST_BAR			0x1a8
>  #define   PHB_PEST_BAR_ENABLE		PPC_BIT(0)
>  #define   PHB_PEST_BASE_ADDRESS		PPC_BITMASK(8,51)
> +#define PHB_PBL_ASN_CMPM		0x1C0
> +#define   PHB_CAPI_ASN_ENABLE		PPC_BIT(63)
> +#define PHB_CAPI_CMPM			0x1C8
> +#define   PHB_CAPI_CMPM_ENABLE		PPC_BIT(63)
>  #define PHB_M64_UPPER_BITS		0x1f0
> -#define PHB_INTREP_TIMER		0x1f8
> +#define PHB_XLATE_PREFIX		0x1f8
>  #define PHB_DMARD_SYNC			0x200
>  #define   PHB_DMARD_SYNC_START		PPC_BIT(0)
>  #define   PHB_DMARD_SYNC_COMPLETE	PPC_BIT(1)
> @@ -157,6 +161,8 @@
>  #define PHB_RC_CONFIG_BASE		0x1000
>
>  #define PHB_PBL_TIMEOUT_CTRL		0x1810
> +#define PHB_PBL_NBW_CMPM		0x1830
> +#define   PHB_CAPI_NBW_ENABLE		PPC_BIT(63)

All of these offsets/bits look correct.
Christophe Lombard March 24, 2017, 4 p.m.
Le 02/03/2017 à 06:46, Andrew Donnellan a écrit :
> On 10/02/17 20:04, Christophe Lombard wrote:
>> Enable the Coherently attached processor interface. The PHB is used as
>> a CAPI interface.
>> CAPI Adapters can be connected to whether PEC0 or PEC2. Single port
>
> either
>
>> CAPI adapter can be connected to either PEC0 or PEC2, but Dual-Port
>> Adapter can be only connected to PEC2
>>    CAPP0 attached to PHB0(PEC0 - single port)
>>    CAPP1 attached to PHB3(PEC2 - single or dual port)
>> As we did for PHB3, a new specific file 'phb4-capp.h' is created to
>> contain the CAPP register definitions.
>>
>> Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
>
> This patch doesn't implement disable_capi_mode(). P9 doesn't have fast 
> reboot yet but we'll eventually need to care about that, as well as 
> kexec.
>

I will look at for disable_capi_mode().

> I assume do_capp_recovery_scoms() etc will be coming in a later patch?
>

it will be in PATCH V2.


> This is a fairly tough patch to review, there's some parts I haven't 
> reviewed in depth, in particular the TVT stuff.
>
>> ---
>>  hw/phb4.c           | 264 
>> +++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  include/chip.h      |   1 +
>>  include/phb4-capp.h |  57 ++++++++++++
>>  include/phb4-regs.h |   8 +-
>>  4 files changed, 328 insertions(+), 2 deletions(-)
>>  create mode 100644 include/phb4-capp.h
>>
>> diff --git a/hw/phb4.c b/hw/phb4.c
>> index 3cdacea..9858ad8 100644
>> --- a/hw/phb4.c
>> +++ b/hw/phb4.c
>> @@ -48,7 +48,7 @@
>>  #include <affinity.h>
>>  #include <phb4.h>
>>  #include <phb4-regs.h>
>> -#include <capp.h>
>> +#include <phb4-capp.h>
>>  #include <fsp.h>
>>  #include <chip.h>
>>  #include <chiptod.h>
>> @@ -1976,6 +1976,8 @@ static int64_t phb4_freset(struct pci_slot *slot)
>>      return OPAL_HARDWARE;
>>  }
>>
>> +extern struct lock capi_lock;
>
> Minor - should we move capi_lock to somewhere other than phb3.c? 
> Perhaps capp.c or something?
>

hum, capp.c will be created in the next patch. I'll think about it.

>> +
>>  static int64_t phb4_creset(struct pci_slot *slot)
>>  {
>>      struct phb4 *p = phb_to_phb4(slot->phb);
>> @@ -2374,6 +2376,265 @@ static int64_t phb4_get_diag_data(struct phb 
>> *phb,
>>      return OPAL_SUCCESS;
>>  }
>>
>> +static void phb4_init_capp_regs(struct phb4 *p)
>> +{
>> +    uint64_t reg;
>> +    uint32_t offset;
>> +
>> +    offset = PHB4_CAPP_REG_OFFSET(p);
>> +
>> +    /* enable combined response examination (set by initfile) */
>> +    xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
>> +    reg |= PPC_BIT(0);
>> +    xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
>> +
>> +    /* Set PHB mode, HPC Dir State and P9 mode */
>> +    xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset, 
>> 0x1072000000000000);
>> +    PHBINF(p, "CAPP: port attached\n");
>> +
>> +    /* should be enabled on LCO shifts only */
>> +    /* xscom_write(p->chip_id, LCO_MASTER_TARGET + offset, 
>> 0xFFF2000000000000); */
>> +
>> +    /* Set snoop ttype decoding , dir size to 256k */
>> +    xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 
>> 0xA000000000000000);
>> +
>> +    /* Use Read Epsilon Tier2 for all scopes, Address Pipeline Master
>> +     * Wait Count to highest(1023) and Number of rpt_hang.data to 3
>> +     */
>> +    xscom_write(p->chip_id, SNOOP_CONTROL + offset, 
>> 0x8000000010072000);
>> +
>> +    /* TLBI Hang Divider = 1 (initfile).  LPC buffers=0. X16 PCIe(14 
>> buffers) */
>> +    xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, 
>> 0x401404000400000B);
>> +
>> +    /* Enable epoch timer */
>> +    xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset, 
>> 0xC0000000FFF0FFE0);
>> +
>> +    /* Deassert TLBI_FENCED and tlbi_psl_is_dead */
>> +    xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
>> +
>> +    xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
>> +            0x1DCF5F6600000000);
>> +    xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
>> +            0xE3105005C8000000);
>> +    xscom_write(p->chip_id, APC_FSM_READ_MASK + offset,
>> +            0xFFFFFFFFFFFF0000);
>> +    xscom_write(p->chip_id, XPT_FSM_RMM + offset,
>> +            0xFFFFFFFFFFFF0000);
>> +}
>> +
>> +/* override some inits with CAPI defaults */
>> +static void phb4_init_capp_errors(struct phb4 *p)
>> +{
>> +    out_be64(p->regs + 0x0d30,    0xdff7ff0bf7ddfff0ull);
>> +    out_be64(p->regs + 0x0db0,    0xfbffd7bbff7fbfefull);
>> +    out_be64(p->regs + 0x0e30,    0xfffffeffff7fff57ull);
>> +    out_be64(p->regs + 0x0eb0,    0xfbaeffaf00000000ull);
>
> Hmm, this doesn't seem to match Init_103 in my version of the PHB4 
> spec, but that might be outdated info?
>

Last version I have is phb4_spec_053.pdf
did you talk about this register: ? If yes, you are right.
     Init_104: Wr x0EB0 0x9FAEFFAF_3FFFFFFF
                                    0xFFAEFFAF_FFFFFFFF (CAPI) RXE_TCE 
Error AIB Fence Enable Register

>> +    out_be64(p->regs + 0x0cb0, 0x35777073ff000000ull);
>> +}
>
> Would be nice to see details of which registers these are and 
> explanation of how these values are different from the regular init 
> sequence (yes I know the regular init sequence is completely 
> uncommented as well...). I think it's justified to use macros for the 
> register offsets. Init numbers might be somewhat helpful for the IBMers.
>

ok

>> +
>> +/* Power Bus Common Queue Registers
>> + * All PBCQ and PBAIB registers are accessed via SCOM
>> + * NestBase = 4010C00 for PEC0
>> + *            4011000 for PEC1
>> + *            4011400 for PEC2
>> + *
>> + * Some registers are shared amongst all of the stacks and will only
>> + * have 1 copy. Other registers are implemented one per stack.
>> + * Registers that are duplicated will have an additional offset
>> + * of “StackBase” so that they have a unique address.
>> + * Stackoffset = 00000040 for Stack0
>> + *             = 00000080 for Stack1
>> + *             = 000000C0 for Stack2
>> + */
>> +static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number)
>> +{
>> +    uint64_t reg;
>> +    /*uint64_t mbt0, mbt1;*/
>
> Drop this?
>
>> +    uint32_t offset;
>> +    int i;
>> +
>> +    xscom_read(p->chip_id, p->pe_xscom + 0x7, &reg);
>> +    if (reg & PPC_BIT(0))
>> +        PHBDBG(p, "Already in CAPP mode\n");
>> +
>> +    /* PEC Phase 3 (PBCQ) registers Init */
>> +    /* poll cqstat */
>> +    offset = 0x40;
>> +    if (p->index > 0 && p->index < 3)
>> +        offset = 0x80;
>> +    else if (p->index > 2)
>> +        offset = 0xC0;
>> +
>> +    for (i = 0; i < 500000; i++) {
>> +        xscom_read(p->chip_id, p->pe_xscom + offset + 0xC, &reg);
>> +        if (!(reg & 0xC000000000000000))
>> +            break;
>> +        time_wait_us(10);
>> +    }
>> +    if (reg & 0xC000000000000000) {
>> +        PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
>> +        return OPAL_HARDWARE;
>> +    }
>> +
>> +    /* Enable CAPP Mode , Set 14 CI Store buffers for CAPP,
>> +     * Set 48 Read machines for CAPP)
>
> I don't think you need the parenthesis here
>
>> +     */
>> +    reg = 0x8000DFFFFFFFFFFFUll;
>> +    xscom_write(p->chip_id, p->pe_xscom + 0x7, reg);
>> +
>> +    /* PEC Phase 4 (PHB) registers adjustement
>
> adjustment
>
>> +     * Bit [0:7] XSL_DSNCTL[capiind]
>> +     * Init_25 - CAPI Compare/Mask
>> +     */
>> +    out_be64(p->regs + PHB_CAPI_CMPM,
>> +         0x0200FE0000000000Ull | PHB_CAPI_CMPM_ENABLE);
>> +
>> +    if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
>> +        /* Init_123 :  NBW Compare/Mask Register */
>> +        out_be64(p->regs + PHB_PBL_NBW_CMPM,
>> +             0x0300FF0000000000Ull);
>> +
>> +        /* Init_24 - ASN Compare/Mask */
>> +        out_be64(p->regs + PHB_PBL_ASN_CMPM,
>> +             0x0400FF0000000000Ull);
>> +    }
>> +
>> +    /* non-translate/50-bit mode */
>> +    out_be64(p->regs + PHB_XLATE_PREFIX, 0x0000000000000000Ull);
>> +
>> +    /* set tve no translate mode allow mmio window */
>> +    memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
>> +
>> +    /*
>> +     * In 50-bit non-translate mode, the fields of the TVE are
>> +     * used to perform an address range check. In this mode TCE
>> +     * Table Size(0) must be a '1' (TVE[51] = 1)
>> +     *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
>> +     *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
>> +     *
>> +     * TVE[51] = 1
>> +     * TVE[56] = 1: 50-bit Non-Translate Mode Enable
>> +     * TVE[0:23] = 0x000000
>> +     * TVE[24:47] = 0xFFFFFF
>> +     *
>> +     * capi dma mode: CAPP DMA mode needs access to all of memory
>> +     * capi mode: Allow address range (bit 14 = 1)
>> +     *            0x0002000000000000: 0x0002FFFFFFFFFFFF
>> +     *            TVE[52:53] = '10' and TVE[54:55] = '10'
>> +     *
>> +     * --> we use capi dma mode by default
>> +     */
>> +    p->tve_cache[pe_number * 2]  = PPC_BIT(51);
>> +    p->tve_cache[pe_number * 2] |= IODA3_TVT_NON_TRANSLATE_50;
>> +    p->tve_cache[pe_number * 2] |= (0xfffffful << 16);
>> +
>> +    phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
>> +    for (i = 0; i < p->tvt_size; i++)
>> +        out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
>> +
>> +    /* set mbt bar to pass capi mmio window. First applied cleared
>> +     * values to HW
>> +     */
>> +    for (i = 0; i < p->mbt_size; i++) {
>> +        p->mbt_cache[i][0] = 0;
>> +        p->mbt_cache[i][1] = 0;
>> +    }
>> +    phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
>> +    for (i = 0; i < p->mbt_size; i++) {
>> +        out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
>> +        out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
>> +    }
>> +
>> +    p->mbt_cache[0][0] = IODA3_MBT0_ENABLE |
>> +                 IODA3_MBT0_TYPE_M64 |
>> +        SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
>> +        SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
>> +        (p->mm0_base & IODA3_MBT0_BASE_ADDR);
>> +    p->mbt_cache[0][1] = IODA3_MBT1_ENABLE |
>> +        ((~(p->mm0_size - 1)) & IODA3_MBT1_MASK) |
>> +        SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
>> +
>> +    p->mbt_cache[1][0] = IODA3_MBT0_ENABLE |
>> +                 IODA3_MBT0_TYPE_M64 |
>> +        SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
>> +        SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
>> +        (0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
>> +    p->mbt_cache[1][1] = IODA3_MBT1_ENABLE |
>> +        (0x00ff000000000000ULL & IODA3_MBT1_MASK) |
>> +        SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
>> +
>> +    phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
>> +    for (i = 0; i < p->mbt_size; i++) {
>> +        out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
>> +        out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
>> +    }
>> +
>> +    phb4_init_capp_errors(p);
>> +
>> +    phb4_init_capp_regs(p);
>> +
>> +    if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
>> +        if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
>> +                        CAPP_TB,
>> +                        PHB4_CAPP_REG_OFFSET(p))) {
>> +            PHBERR(p, "CAPP: Failed to sync timebase\n");
>> +            return OPAL_HARDWARE;
>> +        }
>> +    }
>> +    return OPAL_SUCCESS;
>> +}
>> +
>> +static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
>> +                  uint64_t pe_number)
>> +{
>> +    struct phb4 *p = phb_to_phb4(phb);
>> +    struct proc_chip *chip = get_chip(p->chip_id);
>> +    uint64_t reg;
>> +    uint32_t offset;
>> +
>> +    lock(&capi_lock);
>> +    /* Only PHB0 and PHB3 have the PHB/CAPP I/F so CAPI Adapters can
>
> I assume I/F means "interface" here, in which case it's better to 
> spell it out in full.
>

ok

>> +     * be connected to whether PEC0 or PEC2. Single port CAPI adapter
>
> either
>
>> +     * can be connected to either PEC0 or PEC2, but Dual-Port Adapter
>> +     * can be only connected to PEC2
>> +     */
>
> I'm not sure I quite see the relevance of the comment to this 
> particular block of code?
>
>> +    chip->capp_phb4_attached_mask |= 1 << p->index;
>> +    unlock(&capi_lock);
>> +
>> +    offset = PHB4_CAPP_REG_OFFSET(p);
>> +    xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
>> +    if ((reg & PPC_BIT(5))) {
>> +        PHBERR(p, "CAPP: recovery failed (%016llx)\n", reg);
>> +        return OPAL_HARDWARE;
>> +    } else if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
>> +        PHBDBG(p, "CAPP: recovery in progress\n");
>> +        return OPAL_BUSY;
>> +    }
>> +
>> +    switch (mode) {
>> +    case OPAL_PHB_CAPI_MODE_PCIE:
>> +        return OPAL_UNSUPPORTED;
>> +
>> +    case OPAL_PHB_CAPI_MODE_CAPI:
>> +        return enable_capi_mode(p, pe_number);
>> +
>> +    case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
>> +        xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset,
>> +                0x0000000000000000);
>> +        return OPAL_SUCCESS;
>> +
>> +    case OPAL_PHB_CAPI_MODE_SNOOP_ON:
>> +        xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset,
>> +                0x0000000000000000);
>> +        reg = 0xA1F0000000000000;
>> +        xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, reg);
>> +
>> +        return OPAL_SUCCESS;
>> +    }
>> +
>> +    return OPAL_UNSUPPORTED;
>> +}
>> +
>
> LGTM
>
>>  static const struct phb_ops phb4_ops = {
>>      .cfg_read8        = phb4_pcicfg_read8,
>>      .cfg_read16        = phb4_pcicfg_read16,
>> @@ -2405,6 +2666,7 @@ static const struct phb_ops phb4_ops = {
>>      .get_diag_data        = NULL,
>>      .get_diag_data2        = phb4_get_diag_data,
>>      .tce_kill        = phb4_tce_kill,
>> +    .set_capi_mode        = phb4_set_capi_mode,
>>  };
>>
>
> LGTM
>
>>  static void phb4_init_ioda3(struct phb4 *p)
>> diff --git a/include/chip.h b/include/chip.h
>> index 588db9f..a4a1e1e 100644
>> --- a/include/chip.h
>> +++ b/include/chip.h
>> @@ -190,6 +190,7 @@ struct proc_chip {
>>
>>      /* Must hold capi_lock to change */
>>      uint8_t            capp_phb3_attached_mask;
>> +    uint8_t            capp_phb4_attached_mask;
>>      uint8_t            capp_ucode_loaded;
>>
>
> LGTM
>
>>      /* Used by hw/centaur.c */
>> diff --git a/include/phb4-capp.h b/include/phb4-capp.h
>> new file mode 100644
>> index 0000000..a42383e
>> --- /dev/null
>> +++ b/include/phb4-capp.h
>> @@ -0,0 +1,57 @@
>> +/* Copyright 2013-2017 IBM Corp.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at
>> + *
>> + *    http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> + * implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#ifndef __PHB4_CAPP_H
>> +#define __PHB4_CAPP_H
>> +
>> +#define CAPP_SNP_ARRAY_WRITE_REG        0x2010841  /* S2 */
>
> I assume "S2" here means satellite S2 per the comment below?
>

right.

>> +#define CAPP_SNP_ARRAY_ADDR_REG 0x2010828
>> +#define CAPP_APC_MASTER_ARRAY_ADDR_REG        0x201082A
>> +#define CAPP_APC_MASTER_ARRAY_WRITE_REG     0x2010842  /* S2 */
>> +
>> +#define APC_MASTER_PB_CTRL            0x2010818
>> +#define APC_MASTER_CAPI_CTRL            0x2010819
>
> Annoyingly the (IBM internal) CAPP workbook calls this the "APC Master 
> CAPI Control" register, but some other (also IBM internal) 
> documentation refers to this as the "APC Master Config" register... 
> same goes for a bunch of other registers here. If only we had 
> canonical naming schemes :(
>
>> +#define LCO_MASTER_TARGET 0x2010821
>> +#define EPOCH_RECOVERY_TIMERS_CTRL        0x201082C
>> +#define SNOOP_CAPI_CONFIG            0x201081A
>> +#define SNOOP_CONTROL                0x201081B
>> +#define TRANSPORT_CONTROL            0x201081C
>> +#define CAPP_TB                 0x2010826
>> +#define CAPP_TFMR                0x2010827
>> +#define CAPP_ERR_STATUS_CTRL            0x201080E
>> +#define FLUSH_SUE_STATE_MAP            0x201080F
>> +#define FLUSH_CPIG_STATE_MAP            0x2010820  /* TBD */
>> +#define FLUSH_SUE_UOP1                0x2010843  /* S2 */
>> +#define APC_FSM_READ_MASK            0x2010823
>> +#define XPT_FSM_RMM                0x2010831
>
> SCOM addresses all look correct.
>
>> +
>> +/* CAPP0 attached to PHB0(PEC0 - single port)
>> + * CAPP1 attached to PHB3(PEC2 - single or dual port)
>> + *
>> + * SCOM address Base (Ring = ‘0010’b)
>> + * CAPP Unit Satellite           SCOM address Base
>> + * CAPP 0    S1 (sat = ‘0000’b)  x02010800
>> + * CAPP 0    S2 (sat = ‘0001’b)  x02010840
>> + * CAPP 1    S1 (sat = ‘0000’b)  x04010800
>> + * CAPP 1    S2 (sat = ‘0001’b)  x04010840
>> + */
>
> Very useful comment - I always forget where to find the SCOM address 
> base when I'm reading the workbooks...
>
>> +#define CAPP1_REG_OFFSET 0x2000000
>> +
>> +#define PHB4_CAPP_MAX_PHB_INDEX 3
>> +
>> +#define PHB4_CAPP_REG_OFFSET(p) ((p)->index == 0 ? 0x0 : 
>> CAPP1_REG_OFFSET)
>> +
>> +#endif /* __PHB4_CAPP_H */
>> diff --git a/include/phb4-regs.h b/include/phb4-regs.h
>> index 48953e4..4cb0b26 100644
>> --- a/include/phb4-regs.h
>> +++ b/include/phb4-regs.h
>> @@ -71,8 +71,12 @@
>>  #define PHB_PEST_BAR            0x1a8
>>  #define   PHB_PEST_BAR_ENABLE        PPC_BIT(0)
>>  #define   PHB_PEST_BASE_ADDRESS        PPC_BITMASK(8,51)
>> +#define PHB_PBL_ASN_CMPM        0x1C0
>> +#define   PHB_CAPI_ASN_ENABLE        PPC_BIT(63)
>> +#define PHB_CAPI_CMPM            0x1C8
>> +#define   PHB_CAPI_CMPM_ENABLE        PPC_BIT(63)
>>  #define PHB_M64_UPPER_BITS        0x1f0
>> -#define PHB_INTREP_TIMER        0x1f8
>> +#define PHB_XLATE_PREFIX        0x1f8
>>  #define PHB_DMARD_SYNC            0x200
>>  #define   PHB_DMARD_SYNC_START        PPC_BIT(0)
>>  #define   PHB_DMARD_SYNC_COMPLETE    PPC_BIT(1)
>> @@ -157,6 +161,8 @@
>>  #define PHB_RC_CONFIG_BASE        0x1000
>>
>>  #define PHB_PBL_TIMEOUT_CTRL        0x1810
>> +#define PHB_PBL_NBW_CMPM        0x1830
>> +#define   PHB_CAPI_NBW_ENABLE        PPC_BIT(63)
>
> All of these offsets/bits look correct.
>

Patch hide | download patch | download mbox

diff --git a/hw/phb4.c b/hw/phb4.c
index 3cdacea..9858ad8 100644
--- a/hw/phb4.c
+++ b/hw/phb4.c
@@ -48,7 +48,7 @@ 
 #include <affinity.h>
 #include <phb4.h>
 #include <phb4-regs.h>
-#include <capp.h>
+#include <phb4-capp.h>
 #include <fsp.h>
 #include <chip.h>
 #include <chiptod.h>
@@ -1976,6 +1976,8 @@  static int64_t phb4_freset(struct pci_slot *slot)
 	return OPAL_HARDWARE;
 }
 
+extern struct lock capi_lock;
+
 static int64_t phb4_creset(struct pci_slot *slot)
 {
 	struct phb4 *p = phb_to_phb4(slot->phb);
@@ -2374,6 +2376,265 @@  static int64_t phb4_get_diag_data(struct phb *phb,
 	return OPAL_SUCCESS;
 }
 
+static void phb4_init_capp_regs(struct phb4 *p)
+{
+	uint64_t reg;
+	uint32_t offset;
+
+	offset = PHB4_CAPP_REG_OFFSET(p);
+
+	/* enable combined response examination (set by initfile) */
+	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
+	reg |= PPC_BIT(0);
+	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
+
+	/* Set PHB mode, HPC Dir State and P9 mode */
+	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset, 0x1072000000000000);
+	PHBINF(p, "CAPP: port attached\n");
+
+	/* should be enabled on LCO shifts only */
+	/* xscom_write(p->chip_id, LCO_MASTER_TARGET + offset, 0xFFF2000000000000); */
+
+	/* Set snoop ttype decoding , dir size to 256k */
+	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0xA000000000000000);
+
+	/* Use Read Epsilon Tier2 for all scopes, Address Pipeline Master
+	 * Wait Count to highest(1023) and Number of rpt_hang.data to 3
+	 */
+	xscom_write(p->chip_id, SNOOP_CONTROL + offset, 0x8000000010072000);
+
+	/* TLBI Hang Divider = 1 (initfile).  LPC buffers=0. X16 PCIe(14 buffers) */
+	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, 0x401404000400000B);
+
+	/* Enable epoch timer */
+	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset, 0xC0000000FFF0FFE0);
+
+	/* Deassert TLBI_FENCED and tlbi_psl_is_dead */
+	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
+
+	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
+		    0x1DCF5F6600000000);
+	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
+		    0xE3105005C8000000);
+	xscom_write(p->chip_id, APC_FSM_READ_MASK + offset,
+		    0xFFFFFFFFFFFF0000);
+	xscom_write(p->chip_id, XPT_FSM_RMM + offset,
+		    0xFFFFFFFFFFFF0000);
+}
+
+/* override some inits with CAPI defaults */
+static void phb4_init_capp_errors(struct phb4 *p)
+{
+	out_be64(p->regs + 0x0d30,	0xdff7ff0bf7ddfff0ull);
+	out_be64(p->regs + 0x0db0,	0xfbffd7bbff7fbfefull);
+	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
+	out_be64(p->regs + 0x0eb0,	0xfbaeffaf00000000ull);
+	out_be64(p->regs + 0x0cb0,	0x35777073ff000000ull);
+}
+
+/* Power Bus Common Queue Registers
+ * All PBCQ and PBAIB registers are accessed via SCOM
+ * NestBase = 4010C00 for PEC0
+ *            4011000 for PEC1
+ *            4011400 for PEC2
+ *
+ * Some registers are shared amongst all of the stacks and will only
+ * have 1 copy. Other registers are implemented one per stack.
+ * Registers that are duplicated will have an additional offset
+ * of “StackBase” so that they have a unique address.
+ * Stackoffset = 00000040 for Stack0
+ *             = 00000080 for Stack1
+ *             = 000000C0 for Stack2
+ */
+static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number)
+{
+	uint64_t reg;
+	/*uint64_t mbt0, mbt1;*/
+	uint32_t offset;
+	int i;
+
+	xscom_read(p->chip_id, p->pe_xscom + 0x7, &reg);
+	if (reg & PPC_BIT(0))
+		PHBDBG(p, "Already in CAPP mode\n");
+
+	/* PEC Phase 3 (PBCQ) registers Init */
+	/* poll cqstat */
+	offset = 0x40;
+	if (p->index > 0 && p->index < 3)
+		offset = 0x80;
+	else if (p->index > 2)
+		offset = 0xC0;
+
+	for (i = 0; i < 500000; i++) {
+		xscom_read(p->chip_id, p->pe_xscom + offset + 0xC, &reg);
+		if (!(reg & 0xC000000000000000))
+			break;
+		time_wait_us(10);
+	}
+	if (reg & 0xC000000000000000) {
+		PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
+		return OPAL_HARDWARE;
+	}
+
+	/* Enable CAPP Mode , Set 14 CI Store buffers for CAPP,
+	 * Set 48 Read machines for CAPP)
+	 */
+	reg = 0x8000DFFFFFFFFFFFUll;
+	xscom_write(p->chip_id, p->pe_xscom + 0x7, reg);
+
+	/* PEC Phase 4 (PHB) registers adjustement
+	 * Bit [0:7] XSL_DSNCTL[capiind]
+	 * Init_25 - CAPI Compare/Mask
+	 */
+	out_be64(p->regs + PHB_CAPI_CMPM,
+		 0x0200FE0000000000Ull | PHB_CAPI_CMPM_ENABLE);
+
+	if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
+		/* Init_123 :  NBW Compare/Mask Register */
+		out_be64(p->regs + PHB_PBL_NBW_CMPM,
+			 0x0300FF0000000000Ull);
+
+		/* Init_24 - ASN Compare/Mask */
+		out_be64(p->regs + PHB_PBL_ASN_CMPM,
+			 0x0400FF0000000000Ull);
+	}
+
+	/* non-translate/50-bit mode */
+	out_be64(p->regs + PHB_XLATE_PREFIX, 0x0000000000000000Ull);
+
+	/* set tve no translate mode allow mmio window */
+	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
+
+	/*
+	 * In 50-bit non-translate mode, the fields of the TVE are
+	 * used to perform an address range check. In this mode TCE
+	 * Table Size(0) must be a '1' (TVE[51] = 1)
+	 *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
+	 *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
+	 *
+	 * TVE[51] = 1
+	 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
+	 * TVE[0:23] = 0x000000
+	 * TVE[24:47] = 0xFFFFFF
+	 *
+	 * capi dma mode: CAPP DMA mode needs access to all of memory
+	 * capi mode: Allow address range (bit 14 = 1)
+	 *            0x0002000000000000: 0x0002FFFFFFFFFFFF
+	 *            TVE[52:53] = '10' and TVE[54:55] = '10'
+	 *
+	 * --> we use capi dma mode by default
+	 */
+	p->tve_cache[pe_number * 2]  = PPC_BIT(51);
+	p->tve_cache[pe_number * 2] |= IODA3_TVT_NON_TRANSLATE_50;
+	p->tve_cache[pe_number * 2] |= (0xfffffful << 16);
+
+	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
+	for (i = 0; i < p->tvt_size; i++)
+		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
+
+	/* set mbt bar to pass capi mmio window. First applied cleared
+	 * values to HW
+	 */
+	for (i = 0; i < p->mbt_size; i++) {
+		p->mbt_cache[i][0] = 0;
+		p->mbt_cache[i][1] = 0;
+	}
+	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
+	for (i = 0; i < p->mbt_size; i++) {
+		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
+		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
+	}
+
+	p->mbt_cache[0][0] = IODA3_MBT0_ENABLE |
+			     IODA3_MBT0_TYPE_M64 |
+		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
+		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
+		(p->mm0_base & IODA3_MBT0_BASE_ADDR);
+	p->mbt_cache[0][1] = IODA3_MBT1_ENABLE |
+		((~(p->mm0_size - 1)) & IODA3_MBT1_MASK) |
+		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
+
+	p->mbt_cache[1][0] = IODA3_MBT0_ENABLE |
+			     IODA3_MBT0_TYPE_M64 |
+		SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
+		SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
+		(0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
+	p->mbt_cache[1][1] = IODA3_MBT1_ENABLE |
+		(0x00ff000000000000ULL & IODA3_MBT1_MASK) |
+		SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
+
+	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
+	for (i = 0; i < p->mbt_size; i++) {
+		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
+		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
+	}
+
+	phb4_init_capp_errors(p);
+
+	phb4_init_capp_regs(p);
+
+	if (!(p->rev == PHB4_REV_NIMBUS_DD10)) {
+		if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
+						CAPP_TB,
+						PHB4_CAPP_REG_OFFSET(p))) {
+			PHBERR(p, "CAPP: Failed to sync timebase\n");
+			return OPAL_HARDWARE;
+		}
+	}
+	return OPAL_SUCCESS;
+}
+
+static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
+				  uint64_t pe_number)
+{
+	struct phb4 *p = phb_to_phb4(phb);
+	struct proc_chip *chip = get_chip(p->chip_id);
+	uint64_t reg;
+	uint32_t offset;
+
+	lock(&capi_lock);
+	/* Only PHB0 and PHB3 have the PHB/CAPP I/F so CAPI Adapters can
+	 * be connected to whether PEC0 or PEC2. Single port CAPI adapter
+	 * can be connected to either PEC0 or PEC2, but Dual-Port Adapter
+	 * can be only connected to PEC2
+	 */
+	chip->capp_phb4_attached_mask |= 1 << p->index;
+	unlock(&capi_lock);
+
+	offset = PHB4_CAPP_REG_OFFSET(p);
+	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
+	if ((reg & PPC_BIT(5))) {
+		PHBERR(p, "CAPP: recovery failed (%016llx)\n", reg);
+		return OPAL_HARDWARE;
+	} else if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
+		PHBDBG(p, "CAPP: recovery in progress\n");
+		return OPAL_BUSY;
+	}
+
+	switch (mode) {
+	case OPAL_PHB_CAPI_MODE_PCIE:
+		return OPAL_UNSUPPORTED;
+
+	case OPAL_PHB_CAPI_MODE_CAPI:
+		return enable_capi_mode(p, pe_number);
+
+	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
+		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset,
+			    0x0000000000000000);
+		return OPAL_SUCCESS;
+
+	case OPAL_PHB_CAPI_MODE_SNOOP_ON:
+		xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset,
+			    0x0000000000000000);
+		reg = 0xA1F0000000000000;
+		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, reg);
+
+		return OPAL_SUCCESS;
+	}
+
+	return OPAL_UNSUPPORTED;
+}
+
 static const struct phb_ops phb4_ops = {
 	.cfg_read8		= phb4_pcicfg_read8,
 	.cfg_read16		= phb4_pcicfg_read16,
@@ -2405,6 +2666,7 @@  static const struct phb_ops phb4_ops = {
 	.get_diag_data		= NULL,
 	.get_diag_data2		= phb4_get_diag_data,
 	.tce_kill		= phb4_tce_kill,
+	.set_capi_mode		= phb4_set_capi_mode,
 };
 
 static void phb4_init_ioda3(struct phb4 *p)
diff --git a/include/chip.h b/include/chip.h
index 588db9f..a4a1e1e 100644
--- a/include/chip.h
+++ b/include/chip.h
@@ -190,6 +190,7 @@  struct proc_chip {
 
 	/* Must hold capi_lock to change */
 	uint8_t			capp_phb3_attached_mask;
+	uint8_t			capp_phb4_attached_mask;
 	uint8_t			capp_ucode_loaded;
 
 	/* Used by hw/centaur.c */
diff --git a/include/phb4-capp.h b/include/phb4-capp.h
new file mode 100644
index 0000000..a42383e
--- /dev/null
+++ b/include/phb4-capp.h
@@ -0,0 +1,57 @@ 
+/* Copyright 2013-2017 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PHB4_CAPP_H
+#define __PHB4_CAPP_H
+
+#define CAPP_SNP_ARRAY_WRITE_REG		0x2010841  /* S2 */
+#define CAPP_SNP_ARRAY_ADDR_REG 		0x2010828
+#define CAPP_APC_MASTER_ARRAY_ADDR_REG		0x201082A
+#define CAPP_APC_MASTER_ARRAY_WRITE_REG 	0x2010842  /* S2 */
+
+#define APC_MASTER_PB_CTRL			0x2010818
+#define APC_MASTER_CAPI_CTRL			0x2010819
+#define LCO_MASTER_TARGET			0x2010821
+#define EPOCH_RECOVERY_TIMERS_CTRL		0x201082C
+#define SNOOP_CAPI_CONFIG			0x201081A
+#define SNOOP_CONTROL				0x201081B
+#define TRANSPORT_CONTROL			0x201081C
+#define CAPP_TB 				0x2010826
+#define CAPP_TFMR				0x2010827
+#define CAPP_ERR_STATUS_CTRL			0x201080E
+#define FLUSH_SUE_STATE_MAP			0x201080F
+#define FLUSH_CPIG_STATE_MAP			0x2010820  /* TBD */
+#define FLUSH_SUE_UOP1				0x2010843  /* S2 */
+#define APC_FSM_READ_MASK			0x2010823
+#define XPT_FSM_RMM				0x2010831
+
+/* CAPP0 attached to PHB0(PEC0 - single port)
+ * CAPP1 attached to PHB3(PEC2 - single or dual port)
+ *
+ * SCOM address Base (Ring = ‘0010’b)
+ * CAPP Unit Satellite           SCOM address Base
+ * CAPP 0    S1 (sat = ‘0000’b)  x02010800
+ * CAPP 0    S2 (sat = ‘0001’b)  x02010840
+ * CAPP 1    S1 (sat = ‘0000’b)  x04010800
+ * CAPP 1    S2 (sat = ‘0001’b)  x04010840
+ */
+#define CAPP1_REG_OFFSET 0x2000000
+
+#define PHB4_CAPP_MAX_PHB_INDEX 3
+
+#define PHB4_CAPP_REG_OFFSET(p) ((p)->index == 0 ? 0x0 : CAPP1_REG_OFFSET)
+
+#endif /* __PHB4_CAPP_H */
diff --git a/include/phb4-regs.h b/include/phb4-regs.h
index 48953e4..4cb0b26 100644
--- a/include/phb4-regs.h
+++ b/include/phb4-regs.h
@@ -71,8 +71,12 @@ 
 #define PHB_PEST_BAR			0x1a8
 #define   PHB_PEST_BAR_ENABLE		PPC_BIT(0)
 #define   PHB_PEST_BASE_ADDRESS		PPC_BITMASK(8,51)
+#define PHB_PBL_ASN_CMPM		0x1C0
+#define   PHB_CAPI_ASN_ENABLE		PPC_BIT(63)
+#define PHB_CAPI_CMPM			0x1C8
+#define   PHB_CAPI_CMPM_ENABLE		PPC_BIT(63)
 #define PHB_M64_UPPER_BITS		0x1f0
-#define PHB_INTREP_TIMER		0x1f8
+#define PHB_XLATE_PREFIX		0x1f8
 #define PHB_DMARD_SYNC			0x200
 #define   PHB_DMARD_SYNC_START		PPC_BIT(0)
 #define   PHB_DMARD_SYNC_COMPLETE	PPC_BIT(1)
@@ -157,6 +161,8 @@ 
 #define PHB_RC_CONFIG_BASE		0x1000
 
 #define PHB_PBL_TIMEOUT_CTRL		0x1810
+#define PHB_PBL_NBW_CMPM		0x1830
+#define   PHB_CAPI_NBW_ENABLE		PPC_BIT(63)
 
 // FIXME add more here
 #define PHB_PCIE_SCR			0x1A00