Message ID | 20171215025942.3983-1-bsingharora@gmail.com |
---|---|
State | Accepted |
Headers | show |
Series | hw/npu2: support creset of npu2 devices | expand |
On Friday, 15 December 2017 1:59:42 PM AEDT Balbir Singh wrote: > creset calls in the hw procedure that resets the PHY, we don't > take them out of reset, just put them in reset. > > Signed-off-by: Balbir Singh <bsingharora@gmail.com> > --- > hw/npu2-hw-procedures.c | 2 +- > hw/npu2.c | 21 ++++++++++++++++++++- > include/npu2.h | 1 + > 3 files changed, 22 insertions(+), 2 deletions(-) > > diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c > index 1318e867..dfff4a2a 100644 > --- a/hw/npu2-hw-procedures.c > +++ b/hw/npu2-hw-procedures.c > @@ -233,7 +233,7 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val) > } > > /* Procedure 1.2.1 - Reset NPU/NDL */ > -static uint32_t reset_ntl(struct npu2_dev *ndev) > +uint32_t reset_ntl(struct npu2_dev *ndev) > { > uint64_t val; > > diff --git a/hw/npu2.c b/hw/npu2.c > index 7ffb0941..ee75b6fd 100644 > --- a/hw/npu2.c > +++ b/hw/npu2.c > @@ -1170,6 +1170,25 @@ static int64_t npu2_freset(struct pci_slot *slot __unused) > return OPAL_SUCCESS; > } > > +static int64_t npu2_creset(struct pci_slot *slot) > +{ > + struct npu2 *p; > + int i; > + struct npu2_dev *ndev; > + > + p = phb_to_npu2(slot->phb); > + NPU2INF(p, "Creset PHB state\n"); > + > + for (i = 0; i < p->total_devices; i++) { Wont this reset every link on the NPU rather than just the given pci_slot? We need to make sure this resets just the specified device. > + ndev = &p->devices[i]; > + if (ndev) { > + NPU2DEVINF(ndev, "Resetting device\n"); > + reset_ntl(ndev); What is the motivation for implementing this? I suspect we should also reset the DL and PHY. Regards, Alistair > + } > + } > + return OPAL_SUCCESS; > +} > + > static struct pci_slot *npu2_slot_create(struct phb *phb) > { > struct pci_slot *slot; > @@ -1191,7 +1210,7 @@ static struct pci_slot *npu2_slot_create(struct phb *phb) > slot->ops.poll_link = NULL; > slot->ops.hreset = npu2_hreset; > slot->ops.freset = npu2_freset; > - slot->ops.creset = NULL; > + slot->ops.creset = npu2_creset; > > return slot; > } > diff --git a/include/npu2.h b/include/npu2.h > index dae152a6..c1f39616 100644 > --- a/include/npu2.h > +++ b/include/npu2.h > @@ -162,5 +162,6 @@ int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, > void npu2_dev_procedure_reset(struct npu2_dev *dev); > void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag); > void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag); > +uint32_t reset_ntl(struct npu2_dev *ndev); > extern int nv_zcal_nominal; > #endif /* __NPU2_H */ >
On Fri, Dec 15, 2017 at 2:38 PM, Alistair Popple <alistair@popple.id.au> wrote: > On Friday, 15 December 2017 1:59:42 PM AEDT Balbir Singh wrote: >> creset calls in the hw procedure that resets the PHY, we don't >> take them out of reset, just put them in reset. >> >> Signed-off-by: Balbir Singh <bsingharora@gmail.com> >> --- >> hw/npu2-hw-procedures.c | 2 +- >> hw/npu2.c | 21 ++++++++++++++++++++- >> include/npu2.h | 1 + >> 3 files changed, 22 insertions(+), 2 deletions(-) >> >> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c >> index 1318e867..dfff4a2a 100644 >> --- a/hw/npu2-hw-procedures.c >> +++ b/hw/npu2-hw-procedures.c >> @@ -233,7 +233,7 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val) >> } >> >> /* Procedure 1.2.1 - Reset NPU/NDL */ >> -static uint32_t reset_ntl(struct npu2_dev *ndev) >> +uint32_t reset_ntl(struct npu2_dev *ndev) >> { >> uint64_t val; >> >> diff --git a/hw/npu2.c b/hw/npu2.c >> index 7ffb0941..ee75b6fd 100644 >> --- a/hw/npu2.c >> +++ b/hw/npu2.c >> @@ -1170,6 +1170,25 @@ static int64_t npu2_freset(struct pci_slot *slot __unused) >> return OPAL_SUCCESS; >> } >> >> +static int64_t npu2_creset(struct pci_slot *slot) >> +{ >> + struct npu2 *p; >> + int i; >> + struct npu2_dev *ndev; >> + >> + p = phb_to_npu2(slot->phb); >> + NPU2INF(p, "Creset PHB state\n"); >> + >> + for (i = 0; i < p->total_devices; i++) { > > Wont this reset every link on the NPU rather than just the given pci_slot? We > need to make sure this resets just the specified device. We have a slot for each of the struct npu2 and total_devices are the total number of devices on that phb. In the tests on a 4 GPU system, I did not find anything missing or extra. I'll double check > >> + ndev = &p->devices[i]; >> + if (ndev) { >> + NPU2DEVINF(ndev, "Resetting device\n"); >> + reset_ntl(ndev); > > What is the motivation for implementing this? I suspect we should also reset the > DL and PHY. > A creset should set the links to reset state, I'm just following the procedure I am aware of. I do not want to bring these links out of reset > Regards, > > Alistair > Thanks for the review! Balbir Singh. >> + } >> + } >> + return OPAL_SUCCESS; >> +} >> + >> static struct pci_slot *npu2_slot_create(struct phb *phb) >> { >> struct pci_slot *slot; >> @@ -1191,7 +1210,7 @@ static struct pci_slot *npu2_slot_create(struct phb *phb) >> slot->ops.poll_link = NULL; >> slot->ops.hreset = npu2_hreset; >> slot->ops.freset = npu2_freset; >> - slot->ops.creset = NULL; >> + slot->ops.creset = npu2_creset; >> >> return slot; >> } >> diff --git a/include/npu2.h b/include/npu2.h >> index dae152a6..c1f39616 100644 >> --- a/include/npu2.h >> +++ b/include/npu2.h >> @@ -162,5 +162,6 @@ int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, >> void npu2_dev_procedure_reset(struct npu2_dev *dev); >> void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag); >> void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag); >> +uint32_t reset_ntl(struct npu2_dev *ndev); >> extern int nv_zcal_nominal; >> #endif /* __NPU2_H */ >> > >
On Sun, Dec 17, 2017 at 3:56 PM, Balbir Singh <bsingharora@gmail.com> wrote: > On Fri, Dec 15, 2017 at 2:38 PM, Alistair Popple <alistair@popple.id.au> wrote: >> On Friday, 15 December 2017 1:59:42 PM AEDT Balbir Singh wrote: >>> creset calls in the hw procedure that resets the PHY, we don't >>> take them out of reset, just put them in reset. >>> >>> Signed-off-by: Balbir Singh <bsingharora@gmail.com> >>> --- >>> hw/npu2-hw-procedures.c | 2 +- >>> hw/npu2.c | 21 ++++++++++++++++++++- >>> include/npu2.h | 1 + >>> 3 files changed, 22 insertions(+), 2 deletions(-) >>> >>> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c >>> index 1318e867..dfff4a2a 100644 >>> --- a/hw/npu2-hw-procedures.c >>> +++ b/hw/npu2-hw-procedures.c >>> @@ -233,7 +233,7 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val) >>> } >>> >>> /* Procedure 1.2.1 - Reset NPU/NDL */ >>> -static uint32_t reset_ntl(struct npu2_dev *ndev) >>> +uint32_t reset_ntl(struct npu2_dev *ndev) >>> { >>> uint64_t val; >>> >>> diff --git a/hw/npu2.c b/hw/npu2.c >>> index 7ffb0941..ee75b6fd 100644 >>> --- a/hw/npu2.c >>> +++ b/hw/npu2.c >>> @@ -1170,6 +1170,25 @@ static int64_t npu2_freset(struct pci_slot *slot __unused) >>> return OPAL_SUCCESS; >>> } >>> >>> +static int64_t npu2_creset(struct pci_slot *slot) >>> +{ >>> + struct npu2 *p; >>> + int i; >>> + struct npu2_dev *ndev; >>> + >>> + p = phb_to_npu2(slot->phb); >>> + NPU2INF(p, "Creset PHB state\n"); >>> + >>> + for (i = 0; i < p->total_devices; i++) { >> >> Wont this reset every link on the NPU rather than just the given pci_slot? We >> need to make sure this resets just the specified device. > > We have a slot for each of the struct npu2 and total_devices are the > total number of devices on that phb. In the tests on a 4 GPU system, I > did not find anything missing or extra. I'll double check > >> >>> + ndev = &p->devices[i]; >>> + if (ndev) { >>> + NPU2DEVINF(ndev, "Resetting device\n"); >>> + reset_ntl(ndev); >> >> What is the motivation for implementing this? I suspect we should also reset the >> DL and PHY. >> > > A creset should set the links to reset state, I'm just following the > procedure I am aware of. I do not want to bring these links out of > reset > > >> Regards, >> >> Alistair We discussed this offline as well, did you want to see any more changes? Balbir
Acked-by: Alistair Popple <alistair@popple.id.au> On Wednesday, 17 January 2018 10:19:32 AM AEDT Balbir Singh wrote: > On Sun, Dec 17, 2017 at 3:56 PM, Balbir Singh <bsingharora@gmail.com> wrote: > > On Fri, Dec 15, 2017 at 2:38 PM, Alistair Popple <alistair@popple.id.au> wrote: > >> On Friday, 15 December 2017 1:59:42 PM AEDT Balbir Singh wrote: > >>> creset calls in the hw procedure that resets the PHY, we don't > >>> take them out of reset, just put them in reset. > >>> > >>> Signed-off-by: Balbir Singh <bsingharora@gmail.com> > >>> --- > >>> hw/npu2-hw-procedures.c | 2 +- > >>> hw/npu2.c | 21 ++++++++++++++++++++- > >>> include/npu2.h | 1 + > >>> 3 files changed, 22 insertions(+), 2 deletions(-) > >>> > >>> diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c > >>> index 1318e867..dfff4a2a 100644 > >>> --- a/hw/npu2-hw-procedures.c > >>> +++ b/hw/npu2-hw-procedures.c > >>> @@ -233,7 +233,7 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val) > >>> } > >>> > >>> /* Procedure 1.2.1 - Reset NPU/NDL */ > >>> -static uint32_t reset_ntl(struct npu2_dev *ndev) > >>> +uint32_t reset_ntl(struct npu2_dev *ndev) > >>> { > >>> uint64_t val; > >>> > >>> diff --git a/hw/npu2.c b/hw/npu2.c > >>> index 7ffb0941..ee75b6fd 100644 > >>> --- a/hw/npu2.c > >>> +++ b/hw/npu2.c > >>> @@ -1170,6 +1170,25 @@ static int64_t npu2_freset(struct pci_slot *slot __unused) > >>> return OPAL_SUCCESS; > >>> } > >>> > >>> +static int64_t npu2_creset(struct pci_slot *slot) > >>> +{ > >>> + struct npu2 *p; > >>> + int i; > >>> + struct npu2_dev *ndev; > >>> + > >>> + p = phb_to_npu2(slot->phb); > >>> + NPU2INF(p, "Creset PHB state\n"); > >>> + > >>> + for (i = 0; i < p->total_devices; i++) { > >> > >> Wont this reset every link on the NPU rather than just the given pci_slot? We > >> need to make sure this resets just the specified device. > > > > We have a slot for each of the struct npu2 and total_devices are the > > total number of devices on that phb. In the tests on a 4 GPU system, I > > did not find anything missing or extra. I'll double check > > > >> > >>> + ndev = &p->devices[i]; > >>> + if (ndev) { > >>> + NPU2DEVINF(ndev, "Resetting device\n"); > >>> + reset_ntl(ndev); > >> > >> What is the motivation for implementing this? I suspect we should also reset the > >> DL and PHY. > >> > > > > A creset should set the links to reset state, I'm just following the > > procedure I am aware of. I do not want to bring these links out of > > reset > > > > > >> Regards, > >> > >> Alistair > > We discussed this offline as well, did you want to see any more changes? > > Balbir >
Balbir Singh <bsingharora@gmail.com> writes: > creset calls in the hw procedure that resets the PHY, we don't > take them out of reset, just put them in reset. > > Signed-off-by: Balbir Singh <bsingharora@gmail.com> > --- > hw/npu2-hw-procedures.c | 2 +- > hw/npu2.c | 21 ++++++++++++++++++++- > include/npu2.h | 1 + > 3 files changed, 22 insertions(+), 2 deletions(-) Thanks! (and sorry for the delay merging). Merged to master as of 4133e973b4f2bf61d387ac4528bed3acd12b13d9
diff --git a/hw/npu2-hw-procedures.c b/hw/npu2-hw-procedures.c index 1318e867..dfff4a2a 100644 --- a/hw/npu2-hw-procedures.c +++ b/hw/npu2-hw-procedures.c @@ -233,7 +233,7 @@ static bool poll_fence_status(struct npu2_dev *ndev, uint64_t val) } /* Procedure 1.2.1 - Reset NPU/NDL */ -static uint32_t reset_ntl(struct npu2_dev *ndev) +uint32_t reset_ntl(struct npu2_dev *ndev) { uint64_t val; diff --git a/hw/npu2.c b/hw/npu2.c index 7ffb0941..ee75b6fd 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -1170,6 +1170,25 @@ static int64_t npu2_freset(struct pci_slot *slot __unused) return OPAL_SUCCESS; } +static int64_t npu2_creset(struct pci_slot *slot) +{ + struct npu2 *p; + int i; + struct npu2_dev *ndev; + + p = phb_to_npu2(slot->phb); + NPU2INF(p, "Creset PHB state\n"); + + for (i = 0; i < p->total_devices; i++) { + ndev = &p->devices[i]; + if (ndev) { + NPU2DEVINF(ndev, "Resetting device\n"); + reset_ntl(ndev); + } + } + return OPAL_SUCCESS; +} + static struct pci_slot *npu2_slot_create(struct phb *phb) { struct pci_slot *slot; @@ -1191,7 +1210,7 @@ static struct pci_slot *npu2_slot_create(struct phb *phb) slot->ops.poll_link = NULL; slot->ops.hreset = npu2_hreset; slot->ops.freset = npu2_freset; - slot->ops.creset = NULL; + slot->ops.creset = npu2_creset; return slot; } diff --git a/include/npu2.h b/include/npu2.h index dae152a6..c1f39616 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -162,5 +162,6 @@ int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf, void npu2_dev_procedure_reset(struct npu2_dev *dev); void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag); void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag); +uint32_t reset_ntl(struct npu2_dev *ndev); extern int nv_zcal_nominal; #endif /* __NPU2_H */
creset calls in the hw procedure that resets the PHY, we don't take them out of reset, just put them in reset. Signed-off-by: Balbir Singh <bsingharora@gmail.com> --- hw/npu2-hw-procedures.c | 2 +- hw/npu2.c | 21 ++++++++++++++++++++- include/npu2.h | 1 + 3 files changed, 22 insertions(+), 2 deletions(-)