[1/5] npu2-opencapi: Use presence detection

Message ID 20180412124526.12662-2-fbarrat@linux.ibm.com
State Changes Requested
Headers show
Series
  • npu2-opencapi: Use presence detection and reset
Related show

Commit Message

Frederic Barrat April 12, 2018, 12:45 p.m.
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

Presence detection is not part of the opencapi specification. So each
platform may choose to implement it the way it wants.

All current platforms implement it through an i2c device where we can
query a pin to know if a device is connected or not. ZZ and Zaius have
a similar design and even use the same i2c information and pin
numbers.
However, presence detection on older ZZ planar (older than v4) doesn't
work, so we don't activate it for now, until our lab systems are
upgraded and it's better tested.

Presence detection on witherspoon is still being worked on, but is
very likely to follow something similar, i.e. an i2c pin to query per
device.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
---
 core/platform.c          |  14 +++++-
 hw/npu2-opencapi.c       | 121 ++++++++++++++++++++++++++++++++++++++---------
 include/platform.h       |   4 ++
 platforms/astbmc/zaius.c |   9 +++-
 platforms/ibm-fsp/zz.c   |  14 +++++-
 5 files changed, 137 insertions(+), 25 deletions(-)

Comments

Andrew Donnellan April 13, 2018, 7:24 a.m. | #1
On 12/04/18 22:45, Frederic Barrat wrote:
> From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> 
> Presence detection is not part of the opencapi specification. So each
> platform may choose to implement it the way it wants.
> 
> All current platforms implement it through an i2c device where we can
> query a pin to know if a device is connected or not. ZZ and Zaius have
> a similar design and even use the same i2c information and pin
> numbers.
> However, presence detection on older ZZ planar (older than v4) doesn't
> work, so we don't activate it for now, until our lab systems are
> upgraded and it's better tested.
> 
> Presence detection on witherspoon is still being worked on, but is
> very likely to follow something similar, i.e. an i2c pin to query per
> device.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

A few comments below. Smoketested this on one of our zaiuses, which 
let's be honest was probably one of the same machines you tested on...

Apart from comments below:

Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

> ---
>   core/platform.c          |  14 +++++-
>   hw/npu2-opencapi.c       | 121 ++++++++++++++++++++++++++++++++++++++---------
>   include/platform.h       |   4 ++
>   platforms/astbmc/zaius.c |   9 +++-
>   platforms/ibm-fsp/zz.c   |  14 +++++-
>   5 files changed, 137 insertions(+), 25 deletions(-)
> 
> diff --git a/core/platform.c b/core/platform.c
> index f09ea3c1..174235d9 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>   		 OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>   		 OPAL_ABNORMAL_POWER_OFF);
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +

I'm not convinced there's an advantage to having these as #defines 
rather than literals in platform_ocapi like all the other stuff already 
there.

In any case, it should be noted that these values, like everything else 
in generic_ocapi, are for a ZZ.

>   /*
>    * Various wrappers for platform functions
>    */
> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	/*
> +	 * i2c presence detection is broken on ZZ planar < v4 so we
> +	 * force the presence until all our systems are upgraded
> +	 */
> +	.force_presence    = true,
> +	.odl_phy_swap   = true,
>   };
>   
>   static struct bmc_platform generic_bmc = {
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 2cc776c7..b2bc0628 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
>   	}
>   }
>   
> +static bool i2c_presence_detect(struct npu2_dev *dev)
> +{
> +	uint8_t state, data;
> +	int rc;
> +
> +	/*
> +	 * Opencapi presence detection is done through i2c
> +	 *
> +	 * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
> +	 * Witherspoon will need a specific implementation, TBD.
> +	 */
> +	rc = i2c_request_send(dev->i2c_port_id_ocapi,
> +			platform.ocapi->i2c_presence_addr,
> +			SMBUS_READ, 0, 1,
> +			&state, 1, 120);
> +	if (rc) {
> +		prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
> +			rc);
> +		return true; /* assume link exists */
> +	}
> +
> +	prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
> +
> +	switch (dev->index) {
> +	case 2:
> +		data = platform.ocapi->i2c_presence_odl0;
> +		break;
> +	case 3:
> +		data = platform.ocapi->i2c_presence_odl1;
> +		break;
> +	default:
> +		prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
> +		return true;
> +	}
> +	/* Presence detect bits are active low */
> +	return !(state & data);
> +}
> +
>   static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
>   {
>   	uint64_t reg, config_xscom;
> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
>   	return OPAL_HARDWARE;
>   }
>   
> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
> +						uint8_t *val)
> +{
> +	bool present;
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> +
> +	if (platform.ocapi->force_presence)
> +		present = true;
> +	else
> +		present = i2c_presence_detect(dev);
> +	*val = present;
> +	return OPAL_SUCCESS;
> +}
> +
>   static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
>   {
>   	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> @@ -926,9 +978,9 @@ static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
>   		return slot;
>   
>   	/* TODO: Figure out other slot functions */
> -	slot->ops.get_presence_state = NULL;
> -	slot->ops.get_link_state = npu2_opencapi_get_link_state;
> -	slot->ops.get_power_state = NULL;
> +	slot->ops.get_presence_state  = npu2_opencapi_get_presence_state;
> +	slot->ops.get_link_state      = npu2_opencapi_get_link_state;
> +	slot->ops.get_power_state     = NULL;
>   	slot->ops.get_attention_state = NULL;
>   	slot->ops.get_latch_state     = NULL;
>   	slot->ops.set_power_state     = NULL;
> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>   
>   #define LINK_TRAINING_RETRIES	5
>   
> +static int train_link(int chip_id, struct npu2_dev *dev)
> +{
> +	bool train;
> +	int rc;
> +	int retries = LINK_TRAINING_RETRIES;
> +
> +	if (platform.ocapi->force_presence)
> +		train = true;
> +	else
> +		train = i2c_presence_detect(dev);
> +	if (!train) {
> +		/*
> +		 * FIXME: if there's no card on the link, we should consider
> +		 * powering off the unused lanes to save energy
> +		 */
> +		prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
> +			dev->index, chip_id);
> +		return -1;
> +	}
> +
> +	do {
> +		rc = odl_train(chip_id, dev->index, dev);
> +	} while (rc != OPAL_SUCCESS && --retries);
> +
> +	if (rc != OPAL_SUCCESS && retries == 0) {
> +		/**
> +		 * @fwts-label OCAPILinkTrainingFailed
> +		 * @fwts-advice The OpenCAPI link training procedure failed.
> +		 * This indicates a hardware or firmware bug. OpenCAPI
> +		 * functionality will not be available on this link.
> +		 */
> +		prlog(PR_ERR,
> +			"OCAPI: Link %d on chip %u failed to train\n",
> +			dev->index, chip_id);
> +		prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> +			get_odl_status(chip_id, dev->index));
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
>   static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   				       struct npu2_dev *dev)
>   {
> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   	struct pci_slot *slot;
>   	char port_name[17];
>   	uint64_t mm_win[2];
> -	int retries = LINK_TRAINING_RETRIES;
>   	int rc;
>   
>   	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
>   		break;
>   
>   	case NPU2_TRAIN_DEFAULT:
> -		do {
> -			rc = odl_train(n->chip_id, dev->index, dev);
> -		} while (rc != OPAL_SUCCESS && --retries);
> -
> -		if (rc != OPAL_SUCCESS && retries == 0) {
> -			/**
> -			 * @fwts-label OCAPILinkTrainingFailed
> -			 * @fwts-advice The OpenCAPI link training procedure failed.
> -			 * This indicates a hardware or firmware bug. OpenCAPI
> -			 * functionality will not be available on this link.
> -			 */
> -			prlog(PR_ERR,
> -				"OCAPI: Link %d on chip %u failed to train\n",
> -				dev->index, n->chip_id);
> -			prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> -				get_odl_status(n->chip_id, dev->index));
> +		rc = train_link(n->chip_id, dev);
> +		if (rc)
>   			goto failed;
> -		}
>   
>   		otl_enabletx(n->chip_id, n->xscom_base, dev->index);
> -
>   		slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
>   		if (!slot) {
>   			/**
> diff --git a/include/platform.h b/include/platform.h
> index a7776446..9a04ab37 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -51,6 +51,10 @@ struct platform_ocapi {
>   	uint32_t i2c_offset[3];		/* Offsets on I2C device */
>   	uint8_t i2c_odl0_data[3];	/* Data to reset ODL0 */
>   	uint8_t i2c_odl1_data[3];	/* Data to reset ODL1 */
> +	uint8_t i2c_presence_addr;	/* I2C address for presence detection */
> +	uint8_t i2c_presence_odl0;	/* I2C pin to read to detect ODL0 */
> +	uint8_t i2c_presence_odl1;	/* I2C pin to read to detect ODL1 */

We use these as a mask rather than a pin index, maybe note that explicitly

> +	bool force_presence;            /* don't use i2c detection */
>   	bool odl_phy_swap;		/* Swap ODL1 to use brick 2 rather than
>   					 * brick 1 lanes */
>   };
> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
> index 7678ad12..279fbf8b 100644
> --- a/platforms/astbmc/zaius.c
> +++ b/platforms/astbmc/zaius.c
> @@ -24,13 +24,20 @@
>   
>   #include "astbmc.h"
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +
>   const struct platform_ocapi zaius_ocapi = {
>   	.i2c_engine	= 1,
>   	.i2c_port	= 4,
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	.odl_phy_swap   = true,
>   };
>   
>   #define NPU_BASE 0x5011000
> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
> index 9a849290..66f29d44 100644
> --- a/platforms/ibm-fsp/zz.c
> +++ b/platforms/ibm-fsp/zz.c
> @@ -27,6 +27,10 @@
>   #include "ibm-fsp.h"
>   #include "lxvpd.h"
>   
> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
> +
>   /* We don't yet create NPU device nodes on ZZ, but these values are correct */
>   const struct platform_ocapi zz_ocapi = {
>   	.i2c_engine	= 1,
> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
>   	.i2c_offset	= { 0x3, 0x1, 0x1 },
>   	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
>   	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
> -	.odl_phy_swap	= true,
> +	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
> +	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
> +	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
> +	/*
> +	 * i2c presence detection is broken on ZZ planar < v4 so we
> +	 * force the presence until all our systems are upgraded
> +	 */
> +	.force_presence    = true,
> +	.odl_phy_swap   = true,
>   };
>   
>   static bool zz_probe(void)
>
Frederic Barrat April 13, 2018, 2:10 p.m. | #2
Le 13/04/2018 à 09:24, Andrew Donnellan a écrit :
> On 12/04/18 22:45, Frederic Barrat wrote:
>> From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
>>
>> Presence detection is not part of the opencapi specification. So each
>> platform may choose to implement it the way it wants.
>>
>> All current platforms implement it through an i2c device where we can
>> query a pin to know if a device is connected or not. ZZ and Zaius have
>> a similar design and even use the same i2c information and pin
>> numbers.
>> However, presence detection on older ZZ planar (older than v4) doesn't
>> work, so we don't activate it for now, until our lab systems are
>> upgraded and it's better tested.
>>
>> Presence detection on witherspoon is still being worked on, but is
>> very likely to follow something similar, i.e. an i2c pin to query per
>> device.
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> 
> A few comments below. Smoketested this on one of our zaiuses, which 
> let's be honest was probably one of the same machines you tested on...

:-)
I had borrowed some other zaius to test several configurations. I've 
tested a cpu with only the "top" slot busy. Another cpu with only the 
"bottom" slot busy. "Our" zaius has a cpu with the 2 slots busy. It was 
all looking good, the i2c operation is giving the expected results. On 
zaius. On ZZ, it's a different story, as indicated in the patch.


> Apart from comments below:
> 
> Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> 
>> ---
>>   core/platform.c          |  14 +++++-
>>   hw/npu2-opencapi.c       | 121 
>> ++++++++++++++++++++++++++++++++++++++---------
>>   include/platform.h       |   4 ++
>>   platforms/astbmc/zaius.c |   9 +++-
>>   platforms/ibm-fsp/zz.c   |  14 +++++-
>>   5 files changed, 137 insertions(+), 25 deletions(-)
>>
>> diff --git a/core/platform.c b/core/platform.c
>> index f09ea3c1..174235d9 100644
>> --- a/core/platform.c
>> +++ b/core/platform.c
>> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, 
>> OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>>            OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>>            OPAL_ABNORMAL_POWER_OFF);
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
> 
> I'm not convinced there's an advantage to having these as #defines 
> rather than literals in platform_ocapi like all the other stuff already 
> there.

So you're not going to like the next patch, where I align the reset 
parameters to look the same. I can get rid of the macros, I don't feel 
strongly about it.

> In any case, it should be noted that these values, like everything else 
> in generic_ocapi, are for a ZZ.

True.

I'll wait for the review of the other patches to send a respin.

   Fred



>>   /*
>>    * Various wrappers for platform functions
>>    */
>> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = {
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    /*
>> +     * i2c presence detection is broken on ZZ planar < v4 so we
>> +     * force the presence until all our systems are upgraded
>> +     */
>> +    .force_presence    = true,
>> +    .odl_phy_swap   = true,
>>   };
>>   static struct bmc_platform generic_bmc = {
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 2cc776c7..b2bc0628 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev)
>>       }
>>   }
>> +static bool i2c_presence_detect(struct npu2_dev *dev)
>> +{
>> +    uint8_t state, data;
>> +    int rc;
>> +
>> +    /*
>> +     * Opencapi presence detection is done through i2c
>> +     *
>> +     * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
>> +     * Witherspoon will need a specific implementation, TBD.
>> +     */
>> +    rc = i2c_request_send(dev->i2c_port_id_ocapi,
>> +            platform.ocapi->i2c_presence_addr,
>> +            SMBUS_READ, 0, 1,
>> +            &state, 1, 120);
>> +    if (rc) {
>> +        prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
>> +            rc);
>> +        return true; /* assume link exists */
>> +    }
>> +
>> +    prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
>> +
>> +    switch (dev->index) {
>> +    case 2:
>> +        data = platform.ocapi->i2c_presence_odl0;
>> +        break;
>> +    case 3:
>> +        data = platform.ocapi->i2c_presence_odl1;
>> +        break;
>> +    default:
>> +        prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
>> +        return true;
>> +    }
>> +    /* Presence detect bits are active low */
>> +    return !(state & data);
>> +}
>> +
>>   static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev 
>> *dev)
>>   {
>>       uint64_t reg, config_xscom;
>> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t 
>> index, struct npu2_dev *dev)
>>       return OPAL_HARDWARE;
>>   }
>> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
>> +                        uint8_t *val)
>> +{
>> +    bool present;
>> +    struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> +
>> +    if (platform.ocapi->force_presence)
>> +        present = true;
>> +    else
>> +        present = i2c_presence_detect(dev);
>> +    *val = present;
>> +    return OPAL_SUCCESS;
>> +}
>> +
>>   static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, 
>> uint8_t *val)
>>   {
>>       struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
>> @@ -926,9 +978,9 @@ static struct pci_slot 
>> *npu2_opencapi_slot_create(struct phb *phb)
>>           return slot;
>>       /* TODO: Figure out other slot functions */
>> -    slot->ops.get_presence_state = NULL;
>> -    slot->ops.get_link_state = npu2_opencapi_get_link_state;
>> -    slot->ops.get_power_state = NULL;
>> +    slot->ops.get_presence_state  = npu2_opencapi_get_presence_state;
>> +    slot->ops.get_link_state      = npu2_opencapi_get_link_state;
>> +    slot->ops.get_power_state     = NULL;
>>       slot->ops.get_attention_state = NULL;
>>       slot->ops.get_latch_state     = NULL;
>>       slot->ops.set_power_state     = NULL;
>> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p)
>>   #define LINK_TRAINING_RETRIES    5
>> +static int train_link(int chip_id, struct npu2_dev *dev)
>> +{
>> +    bool train;
>> +    int rc;
>> +    int retries = LINK_TRAINING_RETRIES;
>> +
>> +    if (platform.ocapi->force_presence)
>> +        train = true;
>> +    else
>> +        train = i2c_presence_detect(dev);
>> +    if (!train) {
>> +        /*
>> +         * FIXME: if there's no card on the link, we should consider
>> +         * powering off the unused lanes to save energy
>> +         */
>> +        prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
>> +            dev->index, chip_id);
>> +        return -1;
>> +    }
>> +
>> +    do {
>> +        rc = odl_train(chip_id, dev->index, dev);
>> +    } while (rc != OPAL_SUCCESS && --retries);
>> +
>> +    if (rc != OPAL_SUCCESS && retries == 0) {
>> +        /**
>> +         * @fwts-label OCAPILinkTrainingFailed
>> +         * @fwts-advice The OpenCAPI link training procedure failed.
>> +         * This indicates a hardware or firmware bug. OpenCAPI
>> +         * functionality will not be available on this link.
>> +         */
>> +        prlog(PR_ERR,
>> +            "OCAPI: Link %d on chip %u failed to train\n",
>> +            dev->index, chip_id);
>> +        prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> +            get_odl_status(chip_id, dev->index));
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   static void npu2_opencapi_setup_device(struct dt_node *dn_link, 
>> struct npu2 *n,
>>                          struct npu2_dev *dev)
>>   {
>> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct 
>> dt_node *dn_link, struct npu2 *n,
>>       struct pci_slot *slot;
>>       char port_name[17];
>>       uint64_t mm_win[2];
>> -    int retries = LINK_TRAINING_RETRIES;
>>       int rc;
>>       dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
>> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct 
>> dt_node *dn_link, struct npu2 *n,
>>           break;
>>       case NPU2_TRAIN_DEFAULT:
>> -        do {
>> -            rc = odl_train(n->chip_id, dev->index, dev);
>> -        } while (rc != OPAL_SUCCESS && --retries);
>> -
>> -        if (rc != OPAL_SUCCESS && retries == 0) {
>> -            /**
>> -             * @fwts-label OCAPILinkTrainingFailed
>> -             * @fwts-advice The OpenCAPI link training procedure failed.
>> -             * This indicates a hardware or firmware bug. OpenCAPI
>> -             * functionality will not be available on this link.
>> -             */
>> -            prlog(PR_ERR,
>> -                "OCAPI: Link %d on chip %u failed to train\n",
>> -                dev->index, n->chip_id);
>> -            prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
>> -                get_odl_status(n->chip_id, dev->index));
>> +        rc = train_link(n->chip_id, dev);
>> +        if (rc)
>>               goto failed;
>> -        }
>>           otl_enabletx(n->chip_id, n->xscom_base, dev->index);
>> -
>>           slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
>>           if (!slot) {
>>               /**
>> diff --git a/include/platform.h b/include/platform.h
>> index a7776446..9a04ab37 100644
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -51,6 +51,10 @@ struct platform_ocapi {
>>       uint32_t i2c_offset[3];        /* Offsets on I2C device */
>>       uint8_t i2c_odl0_data[3];    /* Data to reset ODL0 */
>>       uint8_t i2c_odl1_data[3];    /* Data to reset ODL1 */
>> +    uint8_t i2c_presence_addr;    /* I2C address for presence 
>> detection */
>> +    uint8_t i2c_presence_odl0;    /* I2C pin to read to detect ODL0 */
>> +    uint8_t i2c_presence_odl1;    /* I2C pin to read to detect ODL1 */
> 
> We use these as a mask rather than a pin index, maybe note that explicitly
> 
>> +    bool force_presence;            /* don't use i2c detection */
>>       bool odl_phy_swap;        /* Swap ODL1 to use brick 2 rather than
>>                        * brick 1 lanes */
>>   };
>> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
>> index 7678ad12..279fbf8b 100644
>> --- a/platforms/astbmc/zaius.c
>> +++ b/platforms/astbmc/zaius.c
>> @@ -24,13 +24,20 @@
>>   #include "astbmc.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
>>   const struct platform_ocapi zaius_ocapi = {
>>       .i2c_engine    = 1,
>>       .i2c_port    = 4,
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    .odl_phy_swap   = true,
>>   };
>>   #define NPU_BASE 0x5011000
>> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
>> index 9a849290..66f29d44 100644
>> --- a/platforms/ibm-fsp/zz.c
>> +++ b/platforms/ibm-fsp/zz.c
>> @@ -27,6 +27,10 @@
>>   #include "ibm-fsp.h"
>>   #include "lxvpd.h"
>> +#define OCAPI_I2C_PRESENCE_ADDR    0x20
>> +#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
>> +#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
>> +
>>   /* We don't yet create NPU device nodes on ZZ, but these values are 
>> correct */
>>   const struct platform_ocapi zz_ocapi = {
>>       .i2c_engine    = 1,
>> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = {
>>       .i2c_offset    = { 0x3, 0x1, 0x1 },
>>       .i2c_odl0_data    = { 0xFD, 0xFD, 0xFF },
>>       .i2c_odl1_data    = { 0xBF, 0xBF, 0xFF },
>> -    .odl_phy_swap    = true,
>> +    .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
>> +    .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
>> +    .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
>> +    /*
>> +     * i2c presence detection is broken on ZZ planar < v4 so we
>> +     * force the presence until all our systems are upgraded
>> +     */
>> +    .force_presence    = true,
>> +    .odl_phy_swap   = true,
>>   };
>>   static bool zz_probe(void)
>>
>
Stewart Smith April 23, 2018, 4:59 a.m. | #3
Frederic Barrat <fbarrat@linux.ibm.com> writes:
> I'll wait for the review of the other patches to send a respin.

I'll wait for it before merging then :)

Patch

diff --git a/core/platform.c b/core/platform.c
index f09ea3c1..174235d9 100644
--- a/core/platform.c
+++ b/core/platform.c
@@ -34,6 +34,10 @@  DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
 		 OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
 		 OPAL_ABNORMAL_POWER_OFF);
 
+#define OCAPI_I2C_PRESENCE_ADDR    0x20
+#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
+#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
+
 /*
  * Various wrappers for platform functions
  */
@@ -175,7 +179,15 @@  const struct platform_ocapi generic_ocapi = {
 	.i2c_offset	= { 0x3, 0x1, 0x1 },
 	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
 	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
-	.odl_phy_swap	= true,
+	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
+	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
+	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
+	/*
+	 * i2c presence detection is broken on ZZ planar < v4 so we
+	 * force the presence until all our systems are upgraded
+	 */
+	.force_presence    = true,
+	.odl_phy_swap   = true,
 };
 
 static struct bmc_platform generic_bmc = {
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 2cc776c7..b2bc0628 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -829,6 +829,44 @@  static void reset_ocapi_device(struct npu2_dev *dev)
 	}
 }
 
+static bool i2c_presence_detect(struct npu2_dev *dev)
+{
+	uint8_t state, data;
+	int rc;
+
+	/*
+	 * Opencapi presence detection is done through i2c
+	 *
+	 * Lagrange platforms (ZZ, Zaius) use the same default mechanism.
+	 * Witherspoon will need a specific implementation, TBD.
+	 */
+	rc = i2c_request_send(dev->i2c_port_id_ocapi,
+			platform.ocapi->i2c_presence_addr,
+			SMBUS_READ, 0, 1,
+			&state, 1, 120);
+	if (rc) {
+		prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n",
+			rc);
+		return true; /* assume link exists */
+	}
+
+	prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state);
+
+	switch (dev->index) {
+	case 2:
+		data = platform.ocapi->i2c_presence_odl0;
+		break;
+	case 3:
+		data = platform.ocapi->i2c_presence_odl1;
+		break;
+	default:
+		prlog(PR_ERR, "OCAPI: presence detection on invalid link\n");
+		return true;
+	}
+	/* Presence detect bits are active low */
+	return !(state & data);
+}
+
 static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
 {
 	uint64_t reg, config_xscom;
@@ -896,6 +934,20 @@  static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
 	return OPAL_HARDWARE;
 }
 
+static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot,
+						uint8_t *val)
+{
+	bool present;
+	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
+
+	if (platform.ocapi->force_presence)
+		present = true;
+	else
+		present = i2c_presence_detect(dev);
+	*val = present;
+	return OPAL_SUCCESS;
+}
+
 static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
 {
 	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
@@ -926,9 +978,9 @@  static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
 		return slot;
 
 	/* TODO: Figure out other slot functions */
-	slot->ops.get_presence_state = NULL;
-	slot->ops.get_link_state = npu2_opencapi_get_link_state;
-	slot->ops.get_power_state = NULL;
+	slot->ops.get_presence_state  = npu2_opencapi_get_presence_state;
+	slot->ops.get_link_state      = npu2_opencapi_get_link_state;
+	slot->ops.get_power_state     = NULL;
 	slot->ops.get_attention_state = NULL;
 	slot->ops.get_latch_state     = NULL;
 	slot->ops.set_power_state     = NULL;
@@ -1264,6 +1316,48 @@  static int setup_irq(struct npu2 *p)
 
 #define LINK_TRAINING_RETRIES	5
 
+static int train_link(int chip_id, struct npu2_dev *dev)
+{
+	bool train;
+	int rc;
+	int retries = LINK_TRAINING_RETRIES;
+
+	if (platform.ocapi->force_presence)
+		train = true;
+	else
+		train = i2c_presence_detect(dev);
+	if (!train) {
+		/*
+		 * FIXME: if there's no card on the link, we should consider
+		 * powering off the unused lanes to save energy
+		 */
+		prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n",
+			dev->index, chip_id);
+		return -1;
+	}
+
+	do {
+		rc = odl_train(chip_id, dev->index, dev);
+	} while (rc != OPAL_SUCCESS && --retries);
+
+	if (rc != OPAL_SUCCESS && retries == 0) {
+		/**
+		 * @fwts-label OCAPILinkTrainingFailed
+		 * @fwts-advice The OpenCAPI link training procedure failed.
+		 * This indicates a hardware or firmware bug. OpenCAPI
+		 * functionality will not be available on this link.
+		 */
+		prlog(PR_ERR,
+			"OCAPI: Link %d on chip %u failed to train\n",
+			dev->index, chip_id);
+		prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
+			get_odl_status(chip_id, dev->index));
+		return -1;
+	}
+
+	return 0;
+}
+
 static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 				       struct npu2_dev *dev)
 {
@@ -1272,7 +1366,6 @@  static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 	struct pci_slot *slot;
 	char port_name[17];
 	uint64_t mm_win[2];
-	int retries = LINK_TRAINING_RETRIES;
 	int rc;
 
 	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
@@ -1367,27 +1460,11 @@  static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
 		break;
 
 	case NPU2_TRAIN_DEFAULT:
-		do {
-			rc = odl_train(n->chip_id, dev->index, dev);
-		} while (rc != OPAL_SUCCESS && --retries);
-
-		if (rc != OPAL_SUCCESS && retries == 0) {
-			/**
-			 * @fwts-label OCAPILinkTrainingFailed
-			 * @fwts-advice The OpenCAPI link training procedure failed.
-			 * This indicates a hardware or firmware bug. OpenCAPI
-			 * functionality will not be available on this link.
-			 */
-			prlog(PR_ERR,
-				"OCAPI: Link %d on chip %u failed to train\n",
-				dev->index, n->chip_id);
-			prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
-				get_odl_status(n->chip_id, dev->index));
+		rc = train_link(n->chip_id, dev);
+		if (rc)
 			goto failed;
-		}
 
 		otl_enabletx(n->chip_id, n->xscom_base, dev->index);
-
 		slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
 		if (!slot) {
 			/**
diff --git a/include/platform.h b/include/platform.h
index a7776446..9a04ab37 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -51,6 +51,10 @@  struct platform_ocapi {
 	uint32_t i2c_offset[3];		/* Offsets on I2C device */
 	uint8_t i2c_odl0_data[3];	/* Data to reset ODL0 */
 	uint8_t i2c_odl1_data[3];	/* Data to reset ODL1 */
+	uint8_t i2c_presence_addr;	/* I2C address for presence detection */
+	uint8_t i2c_presence_odl0;	/* I2C pin to read to detect ODL0 */
+	uint8_t i2c_presence_odl1;	/* I2C pin to read to detect ODL1 */
+	bool force_presence;            /* don't use i2c detection */
 	bool odl_phy_swap;		/* Swap ODL1 to use brick 2 rather than
 					 * brick 1 lanes */
 };
diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c
index 7678ad12..279fbf8b 100644
--- a/platforms/astbmc/zaius.c
+++ b/platforms/astbmc/zaius.c
@@ -24,13 +24,20 @@ 
 
 #include "astbmc.h"
 
+#define OCAPI_I2C_PRESENCE_ADDR    0x20
+#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
+#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
+
 const struct platform_ocapi zaius_ocapi = {
 	.i2c_engine	= 1,
 	.i2c_port	= 4,
 	.i2c_offset	= { 0x3, 0x1, 0x1 },
 	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
 	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
-	.odl_phy_swap	= true,
+	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
+	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
+	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
+	.odl_phy_swap   = true,
 };
 
 #define NPU_BASE 0x5011000
diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c
index 9a849290..66f29d44 100644
--- a/platforms/ibm-fsp/zz.c
+++ b/platforms/ibm-fsp/zz.c
@@ -27,6 +27,10 @@ 
 #include "ibm-fsp.h"
 #include "lxvpd.h"
 
+#define OCAPI_I2C_PRESENCE_ADDR    0x20
+#define OCAPI_I2C_PRESENCE_BOTTOM  (1 << 2)
+#define OCAPI_I2C_PRESENCE_TOP     (1 << 7)
+
 /* We don't yet create NPU device nodes on ZZ, but these values are correct */
 const struct platform_ocapi zz_ocapi = {
 	.i2c_engine	= 1,
@@ -34,7 +38,15 @@  const struct platform_ocapi zz_ocapi = {
 	.i2c_offset	= { 0x3, 0x1, 0x1 },
 	.i2c_odl0_data	= { 0xFD, 0xFD, 0xFF },
 	.i2c_odl1_data	= { 0xBF, 0xBF, 0xFF },
-	.odl_phy_swap	= true,
+	.i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR,
+	.i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM,
+	.i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP,
+	/*
+	 * i2c presence detection is broken on ZZ planar < v4 so we
+	 * force the presence until all our systems are upgraded
+	 */
+	.force_presence    = true,
+	.odl_phy_swap   = true,
 };
 
 static bool zz_probe(void)