Message ID | 20180412124526.12662-2-fbarrat@linux.ibm.com |
---|---|
State | Changes Requested |
Headers | show |
Series | npu2-opencapi: Use presence detection and reset | expand |
On 12/04/18 22:45, Frederic Barrat wrote: > From: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > > Presence detection is not part of the opencapi specification. So each > platform may choose to implement it the way it wants. > > All current platforms implement it through an i2c device where we can > query a pin to know if a device is connected or not. ZZ and Zaius have > a similar design and even use the same i2c information and pin > numbers. > However, presence detection on older ZZ planar (older than v4) doesn't > work, so we don't activate it for now, until our lab systems are > upgraded and it's better tested. > > Presence detection on witherspoon is still being worked on, but is > very likely to follow something similar, i.e. an i2c pin to query per > device. > > Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> A few comments below. Smoketested this on one of our zaiuses, which let's be honest was probably one of the same machines you tested on... Apart from comments below: Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > --- > core/platform.c | 14 +++++- > hw/npu2-opencapi.c | 121 ++++++++++++++++++++++++++++++++++++++--------- > include/platform.h | 4 ++ > platforms/astbmc/zaius.c | 9 +++- > platforms/ibm-fsp/zz.c | 14 +++++- > 5 files changed, 137 insertions(+), 25 deletions(-) > > diff --git a/core/platform.c b/core/platform.c > index f09ea3c1..174235d9 100644 > --- a/core/platform.c > +++ b/core/platform.c > @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC, > OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT, > OPAL_ABNORMAL_POWER_OFF); > > +#define OCAPI_I2C_PRESENCE_ADDR 0x20 > +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) > +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) > + I'm not convinced there's an advantage to having these as #defines rather than literals in platform_ocapi like all the other stuff already there. In any case, it should be noted that these values, like everything else in generic_ocapi, are for a ZZ. > /* > * Various wrappers for platform functions > */ > @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = { > .i2c_offset = { 0x3, 0x1, 0x1 }, > .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, > .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, > - .odl_phy_swap = true, > + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, > + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, > + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, > + /* > + * i2c presence detection is broken on ZZ planar < v4 so we > + * force the presence until all our systems are upgraded > + */ > + .force_presence = true, > + .odl_phy_swap = true, > }; > > static struct bmc_platform generic_bmc = { > diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c > index 2cc776c7..b2bc0628 100644 > --- a/hw/npu2-opencapi.c > +++ b/hw/npu2-opencapi.c > @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev) > } > } > > +static bool i2c_presence_detect(struct npu2_dev *dev) > +{ > + uint8_t state, data; > + int rc; > + > + /* > + * Opencapi presence detection is done through i2c > + * > + * Lagrange platforms (ZZ, Zaius) use the same default mechanism. > + * Witherspoon will need a specific implementation, TBD. > + */ > + rc = i2c_request_send(dev->i2c_port_id_ocapi, > + platform.ocapi->i2c_presence_addr, > + SMBUS_READ, 0, 1, > + &state, 1, 120); > + if (rc) { > + prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n", > + rc); > + return true; /* assume link exists */ > + } > + > + prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state); > + > + switch (dev->index) { > + case 2: > + data = platform.ocapi->i2c_presence_odl0; > + break; > + case 3: > + data = platform.ocapi->i2c_presence_odl1; > + break; > + default: > + prlog(PR_ERR, "OCAPI: presence detection on invalid link\n"); > + return true; > + } > + /* Presence detect bits are active low */ > + return !(state & data); > +} > + > static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) > { > uint64_t reg, config_xscom; > @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) > return OPAL_HARDWARE; > } > > +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot, > + uint8_t *val) > +{ > + bool present; > + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); > + > + if (platform.ocapi->force_presence) > + present = true; > + else > + present = i2c_presence_detect(dev); > + *val = present; > + return OPAL_SUCCESS; > +} > + > static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val) > { > struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); > @@ -926,9 +978,9 @@ static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb) > return slot; > > /* TODO: Figure out other slot functions */ > - slot->ops.get_presence_state = NULL; > - slot->ops.get_link_state = npu2_opencapi_get_link_state; > - slot->ops.get_power_state = NULL; > + slot->ops.get_presence_state = npu2_opencapi_get_presence_state; > + slot->ops.get_link_state = npu2_opencapi_get_link_state; > + slot->ops.get_power_state = NULL; > slot->ops.get_attention_state = NULL; > slot->ops.get_latch_state = NULL; > slot->ops.set_power_state = NULL; > @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p) > > #define LINK_TRAINING_RETRIES 5 > > +static int train_link(int chip_id, struct npu2_dev *dev) > +{ > + bool train; > + int rc; > + int retries = LINK_TRAINING_RETRIES; > + > + if (platform.ocapi->force_presence) > + train = true; > + else > + train = i2c_presence_detect(dev); > + if (!train) { > + /* > + * FIXME: if there's no card on the link, we should consider > + * powering off the unused lanes to save energy > + */ > + prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n", > + dev->index, chip_id); > + return -1; > + } > + > + do { > + rc = odl_train(chip_id, dev->index, dev); > + } while (rc != OPAL_SUCCESS && --retries); > + > + if (rc != OPAL_SUCCESS && retries == 0) { > + /** > + * @fwts-label OCAPILinkTrainingFailed > + * @fwts-advice The OpenCAPI link training procedure failed. > + * This indicates a hardware or firmware bug. OpenCAPI > + * functionality will not be available on this link. > + */ > + prlog(PR_ERR, > + "OCAPI: Link %d on chip %u failed to train\n", > + dev->index, chip_id); > + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", > + get_odl_status(chip_id, dev->index)); > + return -1; > + } > + > + return 0; > +} > + > static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, > struct npu2_dev *dev) > { > @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, > struct pci_slot *slot; > char port_name[17]; > uint64_t mm_win[2]; > - int retries = LINK_TRAINING_RETRIES; > int rc; > > dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); > @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, > break; > > case NPU2_TRAIN_DEFAULT: > - do { > - rc = odl_train(n->chip_id, dev->index, dev); > - } while (rc != OPAL_SUCCESS && --retries); > - > - if (rc != OPAL_SUCCESS && retries == 0) { > - /** > - * @fwts-label OCAPILinkTrainingFailed > - * @fwts-advice The OpenCAPI link training procedure failed. > - * This indicates a hardware or firmware bug. OpenCAPI > - * functionality will not be available on this link. > - */ > - prlog(PR_ERR, > - "OCAPI: Link %d on chip %u failed to train\n", > - dev->index, n->chip_id); > - prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", > - get_odl_status(n->chip_id, dev->index)); > + rc = train_link(n->chip_id, dev); > + if (rc) > goto failed; > - } > > otl_enabletx(n->chip_id, n->xscom_base, dev->index); > - > slot = npu2_opencapi_slot_create(&dev->phb_ocapi); > if (!slot) { > /** > diff --git a/include/platform.h b/include/platform.h > index a7776446..9a04ab37 100644 > --- a/include/platform.h > +++ b/include/platform.h > @@ -51,6 +51,10 @@ struct platform_ocapi { > uint32_t i2c_offset[3]; /* Offsets on I2C device */ > uint8_t i2c_odl0_data[3]; /* Data to reset ODL0 */ > uint8_t i2c_odl1_data[3]; /* Data to reset ODL1 */ > + uint8_t i2c_presence_addr; /* I2C address for presence detection */ > + uint8_t i2c_presence_odl0; /* I2C pin to read to detect ODL0 */ > + uint8_t i2c_presence_odl1; /* I2C pin to read to detect ODL1 */ We use these as a mask rather than a pin index, maybe note that explicitly > + bool force_presence; /* don't use i2c detection */ > bool odl_phy_swap; /* Swap ODL1 to use brick 2 rather than > * brick 1 lanes */ > }; > diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c > index 7678ad12..279fbf8b 100644 > --- a/platforms/astbmc/zaius.c > +++ b/platforms/astbmc/zaius.c > @@ -24,13 +24,20 @@ > > #include "astbmc.h" > > +#define OCAPI_I2C_PRESENCE_ADDR 0x20 > +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) > +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) > + > const struct platform_ocapi zaius_ocapi = { > .i2c_engine = 1, > .i2c_port = 4, > .i2c_offset = { 0x3, 0x1, 0x1 }, > .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, > .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, > - .odl_phy_swap = true, > + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, > + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, > + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, > + .odl_phy_swap = true, > }; > > #define NPU_BASE 0x5011000 > diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c > index 9a849290..66f29d44 100644 > --- a/platforms/ibm-fsp/zz.c > +++ b/platforms/ibm-fsp/zz.c > @@ -27,6 +27,10 @@ > #include "ibm-fsp.h" > #include "lxvpd.h" > > +#define OCAPI_I2C_PRESENCE_ADDR 0x20 > +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) > +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) > + > /* We don't yet create NPU device nodes on ZZ, but these values are correct */ > const struct platform_ocapi zz_ocapi = { > .i2c_engine = 1, > @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = { > .i2c_offset = { 0x3, 0x1, 0x1 }, > .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, > .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, > - .odl_phy_swap = true, > + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, > + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, > + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, > + /* > + * i2c presence detection is broken on ZZ planar < v4 so we > + * force the presence until all our systems are upgraded > + */ > + .force_presence = true, > + .odl_phy_swap = true, > }; > > static bool zz_probe(void) >
Le 13/04/2018 à 09:24, Andrew Donnellan a écrit : > On 12/04/18 22:45, Frederic Barrat wrote: >> From: Frederic Barrat <fbarrat@linux.vnet.ibm.com> >> >> Presence detection is not part of the opencapi specification. So each >> platform may choose to implement it the way it wants. >> >> All current platforms implement it through an i2c device where we can >> query a pin to know if a device is connected or not. ZZ and Zaius have >> a similar design and even use the same i2c information and pin >> numbers. >> However, presence detection on older ZZ planar (older than v4) doesn't >> work, so we don't activate it for now, until our lab systems are >> upgraded and it's better tested. >> >> Presence detection on witherspoon is still being worked on, but is >> very likely to follow something similar, i.e. an i2c pin to query per >> device. >> >> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > > A few comments below. Smoketested this on one of our zaiuses, which > let's be honest was probably one of the same machines you tested on... :-) I had borrowed some other zaius to test several configurations. I've tested a cpu with only the "top" slot busy. Another cpu with only the "bottom" slot busy. "Our" zaius has a cpu with the 2 slots busy. It was all looking good, the i2c operation is giving the expected results. On zaius. On ZZ, it's a different story, as indicated in the patch. > Apart from comments below: > > Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > >> --- >> core/platform.c | 14 +++++- >> hw/npu2-opencapi.c | 121 >> ++++++++++++++++++++++++++++++++++++++--------- >> include/platform.h | 4 ++ >> platforms/astbmc/zaius.c | 9 +++- >> platforms/ibm-fsp/zz.c | 14 +++++- >> 5 files changed, 137 insertions(+), 25 deletions(-) >> >> diff --git a/core/platform.c b/core/platform.c >> index f09ea3c1..174235d9 100644 >> --- a/core/platform.c >> +++ b/core/platform.c >> @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, >> OPAL_PLATFORM_ERR_EVT, OPAL_CEC, >> OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT, >> OPAL_ABNORMAL_POWER_OFF); >> +#define OCAPI_I2C_PRESENCE_ADDR 0x20 >> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) >> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) >> + > > I'm not convinced there's an advantage to having these as #defines > rather than literals in platform_ocapi like all the other stuff already > there. So you're not going to like the next patch, where I align the reset parameters to look the same. I can get rid of the macros, I don't feel strongly about it. > In any case, it should be noted that these values, like everything else > in generic_ocapi, are for a ZZ. True. I'll wait for the review of the other patches to send a respin. Fred >> /* >> * Various wrappers for platform functions >> */ >> @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = { >> .i2c_offset = { 0x3, 0x1, 0x1 }, >> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, >> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, >> - .odl_phy_swap = true, >> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, >> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, >> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, >> + /* >> + * i2c presence detection is broken on ZZ planar < v4 so we >> + * force the presence until all our systems are upgraded >> + */ >> + .force_presence = true, >> + .odl_phy_swap = true, >> }; >> static struct bmc_platform generic_bmc = { >> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c >> index 2cc776c7..b2bc0628 100644 >> --- a/hw/npu2-opencapi.c >> +++ b/hw/npu2-opencapi.c >> @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev) >> } >> } >> +static bool i2c_presence_detect(struct npu2_dev *dev) >> +{ >> + uint8_t state, data; >> + int rc; >> + >> + /* >> + * Opencapi presence detection is done through i2c >> + * >> + * Lagrange platforms (ZZ, Zaius) use the same default mechanism. >> + * Witherspoon will need a specific implementation, TBD. >> + */ >> + rc = i2c_request_send(dev->i2c_port_id_ocapi, >> + platform.ocapi->i2c_presence_addr, >> + SMBUS_READ, 0, 1, >> + &state, 1, 120); >> + if (rc) { >> + prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n", >> + rc); >> + return true; /* assume link exists */ >> + } >> + >> + prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state); >> + >> + switch (dev->index) { >> + case 2: >> + data = platform.ocapi->i2c_presence_odl0; >> + break; >> + case 3: >> + data = platform.ocapi->i2c_presence_odl1; >> + break; >> + default: >> + prlog(PR_ERR, "OCAPI: presence detection on invalid link\n"); >> + return true; >> + } >> + /* Presence detect bits are active low */ >> + return !(state & data); >> +} >> + >> static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev >> *dev) >> { >> uint64_t reg, config_xscom; >> @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t >> index, struct npu2_dev *dev) >> return OPAL_HARDWARE; >> } >> +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot, >> + uint8_t *val) >> +{ >> + bool present; >> + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); >> + >> + if (platform.ocapi->force_presence) >> + present = true; >> + else >> + present = i2c_presence_detect(dev); >> + *val = present; >> + return OPAL_SUCCESS; >> +} >> + >> static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, >> uint8_t *val) >> { >> struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); >> @@ -926,9 +978,9 @@ static struct pci_slot >> *npu2_opencapi_slot_create(struct phb *phb) >> return slot; >> /* TODO: Figure out other slot functions */ >> - slot->ops.get_presence_state = NULL; >> - slot->ops.get_link_state = npu2_opencapi_get_link_state; >> - slot->ops.get_power_state = NULL; >> + slot->ops.get_presence_state = npu2_opencapi_get_presence_state; >> + slot->ops.get_link_state = npu2_opencapi_get_link_state; >> + slot->ops.get_power_state = NULL; >> slot->ops.get_attention_state = NULL; >> slot->ops.get_latch_state = NULL; >> slot->ops.set_power_state = NULL; >> @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p) >> #define LINK_TRAINING_RETRIES 5 >> +static int train_link(int chip_id, struct npu2_dev *dev) >> +{ >> + bool train; >> + int rc; >> + int retries = LINK_TRAINING_RETRIES; >> + >> + if (platform.ocapi->force_presence) >> + train = true; >> + else >> + train = i2c_presence_detect(dev); >> + if (!train) { >> + /* >> + * FIXME: if there's no card on the link, we should consider >> + * powering off the unused lanes to save energy >> + */ >> + prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n", >> + dev->index, chip_id); >> + return -1; >> + } >> + >> + do { >> + rc = odl_train(chip_id, dev->index, dev); >> + } while (rc != OPAL_SUCCESS && --retries); >> + >> + if (rc != OPAL_SUCCESS && retries == 0) { >> + /** >> + * @fwts-label OCAPILinkTrainingFailed >> + * @fwts-advice The OpenCAPI link training procedure failed. >> + * This indicates a hardware or firmware bug. OpenCAPI >> + * functionality will not be available on this link. >> + */ >> + prlog(PR_ERR, >> + "OCAPI: Link %d on chip %u failed to train\n", >> + dev->index, chip_id); >> + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", >> + get_odl_status(chip_id, dev->index)); >> + return -1; >> + } >> + >> + return 0; >> +} >> + >> static void npu2_opencapi_setup_device(struct dt_node *dn_link, >> struct npu2 *n, >> struct npu2_dev *dev) >> { >> @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct >> dt_node *dn_link, struct npu2 *n, >> struct pci_slot *slot; >> char port_name[17]; >> uint64_t mm_win[2]; >> - int retries = LINK_TRAINING_RETRIES; >> int rc; >> dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); >> @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct >> dt_node *dn_link, struct npu2 *n, >> break; >> case NPU2_TRAIN_DEFAULT: >> - do { >> - rc = odl_train(n->chip_id, dev->index, dev); >> - } while (rc != OPAL_SUCCESS && --retries); >> - >> - if (rc != OPAL_SUCCESS && retries == 0) { >> - /** >> - * @fwts-label OCAPILinkTrainingFailed >> - * @fwts-advice The OpenCAPI link training procedure failed. >> - * This indicates a hardware or firmware bug. OpenCAPI >> - * functionality will not be available on this link. >> - */ >> - prlog(PR_ERR, >> - "OCAPI: Link %d on chip %u failed to train\n", >> - dev->index, n->chip_id); >> - prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", >> - get_odl_status(n->chip_id, dev->index)); >> + rc = train_link(n->chip_id, dev); >> + if (rc) >> goto failed; >> - } >> otl_enabletx(n->chip_id, n->xscom_base, dev->index); >> - >> slot = npu2_opencapi_slot_create(&dev->phb_ocapi); >> if (!slot) { >> /** >> diff --git a/include/platform.h b/include/platform.h >> index a7776446..9a04ab37 100644 >> --- a/include/platform.h >> +++ b/include/platform.h >> @@ -51,6 +51,10 @@ struct platform_ocapi { >> uint32_t i2c_offset[3]; /* Offsets on I2C device */ >> uint8_t i2c_odl0_data[3]; /* Data to reset ODL0 */ >> uint8_t i2c_odl1_data[3]; /* Data to reset ODL1 */ >> + uint8_t i2c_presence_addr; /* I2C address for presence >> detection */ >> + uint8_t i2c_presence_odl0; /* I2C pin to read to detect ODL0 */ >> + uint8_t i2c_presence_odl1; /* I2C pin to read to detect ODL1 */ > > We use these as a mask rather than a pin index, maybe note that explicitly > >> + bool force_presence; /* don't use i2c detection */ >> bool odl_phy_swap; /* Swap ODL1 to use brick 2 rather than >> * brick 1 lanes */ >> }; >> diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c >> index 7678ad12..279fbf8b 100644 >> --- a/platforms/astbmc/zaius.c >> +++ b/platforms/astbmc/zaius.c >> @@ -24,13 +24,20 @@ >> #include "astbmc.h" >> +#define OCAPI_I2C_PRESENCE_ADDR 0x20 >> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) >> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) >> + >> const struct platform_ocapi zaius_ocapi = { >> .i2c_engine = 1, >> .i2c_port = 4, >> .i2c_offset = { 0x3, 0x1, 0x1 }, >> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, >> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, >> - .odl_phy_swap = true, >> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, >> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, >> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, >> + .odl_phy_swap = true, >> }; >> #define NPU_BASE 0x5011000 >> diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c >> index 9a849290..66f29d44 100644 >> --- a/platforms/ibm-fsp/zz.c >> +++ b/platforms/ibm-fsp/zz.c >> @@ -27,6 +27,10 @@ >> #include "ibm-fsp.h" >> #include "lxvpd.h" >> +#define OCAPI_I2C_PRESENCE_ADDR 0x20 >> +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) >> +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) >> + >> /* We don't yet create NPU device nodes on ZZ, but these values are >> correct */ >> const struct platform_ocapi zz_ocapi = { >> .i2c_engine = 1, >> @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = { >> .i2c_offset = { 0x3, 0x1, 0x1 }, >> .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, >> .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, >> - .odl_phy_swap = true, >> + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, >> + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, >> + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, >> + /* >> + * i2c presence detection is broken on ZZ planar < v4 so we >> + * force the presence until all our systems are upgraded >> + */ >> + .force_presence = true, >> + .odl_phy_swap = true, >> }; >> static bool zz_probe(void) >> >
Frederic Barrat <fbarrat@linux.ibm.com> writes:
> I'll wait for the review of the other patches to send a respin.
I'll wait for it before merging then :)
diff --git a/core/platform.c b/core/platform.c index f09ea3c1..174235d9 100644 --- a/core/platform.c +++ b/core/platform.c @@ -34,6 +34,10 @@ DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC, OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT, OPAL_ABNORMAL_POWER_OFF); +#define OCAPI_I2C_PRESENCE_ADDR 0x20 +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) + /* * Various wrappers for platform functions */ @@ -175,7 +179,15 @@ const struct platform_ocapi generic_ocapi = { .i2c_offset = { 0x3, 0x1, 0x1 }, .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, - .odl_phy_swap = true, + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, + /* + * i2c presence detection is broken on ZZ planar < v4 so we + * force the presence until all our systems are upgraded + */ + .force_presence = true, + .odl_phy_swap = true, }; static struct bmc_platform generic_bmc = { diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index 2cc776c7..b2bc0628 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -829,6 +829,44 @@ static void reset_ocapi_device(struct npu2_dev *dev) } } +static bool i2c_presence_detect(struct npu2_dev *dev) +{ + uint8_t state, data; + int rc; + + /* + * Opencapi presence detection is done through i2c + * + * Lagrange platforms (ZZ, Zaius) use the same default mechanism. + * Witherspoon will need a specific implementation, TBD. + */ + rc = i2c_request_send(dev->i2c_port_id_ocapi, + platform.ocapi->i2c_presence_addr, + SMBUS_READ, 0, 1, + &state, 1, 120); + if (rc) { + prlog(PR_ERR, "OCAPI: error detecting link presence: %d\n", + rc); + return true; /* assume link exists */ + } + + prlog(PR_DEBUG, "OCAPI: I2C presence detect: 0x%x\n", state); + + switch (dev->index) { + case 2: + data = platform.ocapi->i2c_presence_odl0; + break; + case 3: + data = platform.ocapi->i2c_presence_odl1; + break; + default: + prlog(PR_ERR, "OCAPI: presence detection on invalid link\n"); + return true; + } + /* Presence detect bits are active low */ + return !(state & data); +} + static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) { uint64_t reg, config_xscom; @@ -896,6 +934,20 @@ static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) return OPAL_HARDWARE; } +static int64_t npu2_opencapi_get_presence_state(struct pci_slot *slot, + uint8_t *val) +{ + bool present; + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); + + if (platform.ocapi->force_presence) + present = true; + else + present = i2c_presence_detect(dev); + *val = present; + return OPAL_SUCCESS; +} + static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val) { struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); @@ -926,9 +978,9 @@ static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb) return slot; /* TODO: Figure out other slot functions */ - slot->ops.get_presence_state = NULL; - slot->ops.get_link_state = npu2_opencapi_get_link_state; - slot->ops.get_power_state = NULL; + slot->ops.get_presence_state = npu2_opencapi_get_presence_state; + slot->ops.get_link_state = npu2_opencapi_get_link_state; + slot->ops.get_power_state = NULL; slot->ops.get_attention_state = NULL; slot->ops.get_latch_state = NULL; slot->ops.set_power_state = NULL; @@ -1264,6 +1316,48 @@ static int setup_irq(struct npu2 *p) #define LINK_TRAINING_RETRIES 5 +static int train_link(int chip_id, struct npu2_dev *dev) +{ + bool train; + int rc; + int retries = LINK_TRAINING_RETRIES; + + if (platform.ocapi->force_presence) + train = true; + else + train = i2c_presence_detect(dev); + if (!train) { + /* + * FIXME: if there's no card on the link, we should consider + * powering off the unused lanes to save energy + */ + prlog(PR_INFO, "OCAPI: no card detected on link %d, chip %d\n", + dev->index, chip_id); + return -1; + } + + do { + rc = odl_train(chip_id, dev->index, dev); + } while (rc != OPAL_SUCCESS && --retries); + + if (rc != OPAL_SUCCESS && retries == 0) { + /** + * @fwts-label OCAPILinkTrainingFailed + * @fwts-advice The OpenCAPI link training procedure failed. + * This indicates a hardware or firmware bug. OpenCAPI + * functionality will not be available on this link. + */ + prlog(PR_ERR, + "OCAPI: Link %d on chip %u failed to train\n", + dev->index, chip_id); + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", + get_odl_status(chip_id, dev->index)); + return -1; + } + + return 0; +} + static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, struct npu2_dev *dev) { @@ -1272,7 +1366,6 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, struct pci_slot *slot; char port_name[17]; uint64_t mm_win[2]; - int retries = LINK_TRAINING_RETRIES; int rc; dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); @@ -1367,27 +1460,11 @@ static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, break; case NPU2_TRAIN_DEFAULT: - do { - rc = odl_train(n->chip_id, dev->index, dev); - } while (rc != OPAL_SUCCESS && --retries); - - if (rc != OPAL_SUCCESS && retries == 0) { - /** - * @fwts-label OCAPILinkTrainingFailed - * @fwts-advice The OpenCAPI link training procedure failed. - * This indicates a hardware or firmware bug. OpenCAPI - * functionality will not be available on this link. - */ - prlog(PR_ERR, - "OCAPI: Link %d on chip %u failed to train\n", - dev->index, n->chip_id); - prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", - get_odl_status(n->chip_id, dev->index)); + rc = train_link(n->chip_id, dev); + if (rc) goto failed; - } otl_enabletx(n->chip_id, n->xscom_base, dev->index); - slot = npu2_opencapi_slot_create(&dev->phb_ocapi); if (!slot) { /** diff --git a/include/platform.h b/include/platform.h index a7776446..9a04ab37 100644 --- a/include/platform.h +++ b/include/platform.h @@ -51,6 +51,10 @@ struct platform_ocapi { uint32_t i2c_offset[3]; /* Offsets on I2C device */ uint8_t i2c_odl0_data[3]; /* Data to reset ODL0 */ uint8_t i2c_odl1_data[3]; /* Data to reset ODL1 */ + uint8_t i2c_presence_addr; /* I2C address for presence detection */ + uint8_t i2c_presence_odl0; /* I2C pin to read to detect ODL0 */ + uint8_t i2c_presence_odl1; /* I2C pin to read to detect ODL1 */ + bool force_presence; /* don't use i2c detection */ bool odl_phy_swap; /* Swap ODL1 to use brick 2 rather than * brick 1 lanes */ }; diff --git a/platforms/astbmc/zaius.c b/platforms/astbmc/zaius.c index 7678ad12..279fbf8b 100644 --- a/platforms/astbmc/zaius.c +++ b/platforms/astbmc/zaius.c @@ -24,13 +24,20 @@ #include "astbmc.h" +#define OCAPI_I2C_PRESENCE_ADDR 0x20 +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) + const struct platform_ocapi zaius_ocapi = { .i2c_engine = 1, .i2c_port = 4, .i2c_offset = { 0x3, 0x1, 0x1 }, .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, - .odl_phy_swap = true, + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, + .odl_phy_swap = true, }; #define NPU_BASE 0x5011000 diff --git a/platforms/ibm-fsp/zz.c b/platforms/ibm-fsp/zz.c index 9a849290..66f29d44 100644 --- a/platforms/ibm-fsp/zz.c +++ b/platforms/ibm-fsp/zz.c @@ -27,6 +27,10 @@ #include "ibm-fsp.h" #include "lxvpd.h" +#define OCAPI_I2C_PRESENCE_ADDR 0x20 +#define OCAPI_I2C_PRESENCE_BOTTOM (1 << 2) +#define OCAPI_I2C_PRESENCE_TOP (1 << 7) + /* We don't yet create NPU device nodes on ZZ, but these values are correct */ const struct platform_ocapi zz_ocapi = { .i2c_engine = 1, @@ -34,7 +38,15 @@ const struct platform_ocapi zz_ocapi = { .i2c_offset = { 0x3, 0x1, 0x1 }, .i2c_odl0_data = { 0xFD, 0xFD, 0xFF }, .i2c_odl1_data = { 0xBF, 0xBF, 0xFF }, - .odl_phy_swap = true, + .i2c_presence_addr = OCAPI_I2C_PRESENCE_ADDR, + .i2c_presence_odl0 = OCAPI_I2C_PRESENCE_BOTTOM, + .i2c_presence_odl1 = OCAPI_I2C_PRESENCE_TOP, + /* + * i2c presence detection is broken on ZZ planar < v4 so we + * force the presence until all our systems are upgraded + */ + .force_presence = true, + .odl_phy_swap = true, }; static bool zz_probe(void)