Message ID | 20170501005331.20131-3-alastair@au1.ibm.com (mailing list archive) |
---|---|
State | Accepted |
Commit | a715626a8e904e7226915d1bc4885317ea9da141 |
Headers | show |
On 01/05/17 10:53, Alastair D'Silva wrote: > From: Alastair D'Silva <alastair@d-silva.org> > > In some situations, a faulty AFU slice may create an interrupt storm of > slice errors, rendering the machine unusable. Since these interrupts are > informational only, present the interrupt once, then mask it off to > prevent it from being retriggered until the AFU is reset. > > Signed-off-by: Alastair D'Silva <alastair@d-silva.org> Thanks for the fixups. Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > --- > Changelog: > v4: > Fix duplicate/missing entries in aggregate macros > Minor textual changes > v3 > Add CXL_PSL_SERR_An_IRQS, CXL_PSL_SERR_An_IRQ_MASKS macros > Explicitly reenable masked interrupts after reset > Issue an info line that subsequent interrupts will be masked > v2 > Rebase against linux-next > --- > drivers/misc/cxl/cxl.h | 18 ++++++++++++++++++ > drivers/misc/cxl/native.c | 19 +++++++++++++++++-- > 2 files changed, 35 insertions(+), 2 deletions(-) > > diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h > index 452e209..c8568ea 100644 > --- a/drivers/misc/cxl/cxl.h > +++ b/drivers/misc/cxl/cxl.h > @@ -228,6 +228,24 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; > #define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) > #define CXL_PSL_SERR_An_afupar (1ull << (63-7)) > #define CXL_PSL_SERR_An_afudup (1ull << (63-8)) > +#define CXL_PSL_SERR_An_IRQS ( \ > + CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \ > + CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \ > + CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup) > +#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32)) > +#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33)) > +#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34)) > +#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35)) > +#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36)) > +#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37)) > +#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38)) > +#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39)) > +#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40)) > +#define CXL_PSL_SERR_An_IRQ_MASKS ( \ > + CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \ > + CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \ > + CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask) > + > #define CXL_PSL_SERR_An_AE (1ull << (63-30)) > > /****** CXL_PSL_SCNTL_An ****************************************************/ > diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c > index 194c58e..290950b 100644 > --- a/drivers/misc/cxl/native.c > +++ b/drivers/misc/cxl/native.c > @@ -95,12 +95,23 @@ int cxl_afu_disable(struct cxl_afu *afu) > /* This will disable as well as reset */ > static int native_afu_reset(struct cxl_afu *afu) > { > + int rc; > + u64 serr; > + > pr_devel("AFU reset request\n"); > > - return afu_control(afu, CXL_AFU_Cntl_An_RA, 0, > + rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0, > CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, > CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, > false); > + > + /* Re-enable any masked interrupts */ > + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); > + serr &= ~CXL_PSL_SERR_An_IRQ_MASKS; > + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); > + > + > + return rc; > } > > static int native_afu_check_and_enable(struct cxl_afu *afu) > @@ -1205,7 +1216,7 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) > { > struct cxl_afu *afu = data; > u64 errstat, serr, afu_error, dsisr; > - u64 fir_slice, afu_debug; > + u64 fir_slice, afu_debug, irq_mask; > > /* > * slice err interrupt is only used with full PSL (no XSL) > @@ -1226,7 +1237,11 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) > dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); > dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); > > + /* mask off the IRQ so it won't retrigger until the AFU is reset */ > + irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32; > + serr |= irq_mask; > cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); > + dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n"); > > return IRQ_HANDLED; > } >
Thanks for upadating the patch with review comments Alastair. Alastair D'Silva <alastair@au1.ibm.com> writes: > From: Alastair D'Silva <alastair@d-silva.org> > > In some situations, a faulty AFU slice may create an interrupt storm of > slice errors, rendering the machine unusable. Since these interrupts are > informational only, present the interrupt once, then mask it off to > prevent it from being retriggered until the AFU is reset. > > Signed-off-by: Alastair D'Silva <alastair@d-silva.org> > --- Reviewed-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Le 01/05/2017 à 02:53, Alastair D'Silva a écrit : > From: Alastair D'Silva <alastair@d-silva.org> > > In some situations, a faulty AFU slice may create an interrupt storm of > slice errors, rendering the machine unusable. Since these interrupts are > informational only, present the interrupt once, then mask it off to > prevent it from being retriggered until the AFU is reset. > > Signed-off-by: Alastair D'Silva <alastair@d-silva.org> > --- Thanks! Acked-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > Changelog: > v4: > Fix duplicate/missing entries in aggregate macros > Minor textual changes > v3 > Add CXL_PSL_SERR_An_IRQS, CXL_PSL_SERR_An_IRQ_MASKS macros > Explicitly reenable masked interrupts after reset > Issue an info line that subsequent interrupts will be masked > v2 > Rebase against linux-next > --- > drivers/misc/cxl/cxl.h | 18 ++++++++++++++++++ > drivers/misc/cxl/native.c | 19 +++++++++++++++++-- > 2 files changed, 35 insertions(+), 2 deletions(-) > > diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h > index 452e209..c8568ea 100644 > --- a/drivers/misc/cxl/cxl.h > +++ b/drivers/misc/cxl/cxl.h > @@ -228,6 +228,24 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; > #define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) > #define CXL_PSL_SERR_An_afupar (1ull << (63-7)) > #define CXL_PSL_SERR_An_afudup (1ull << (63-8)) > +#define CXL_PSL_SERR_An_IRQS ( \ > + CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \ > + CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \ > + CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup) > +#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32)) > +#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33)) > +#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34)) > +#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35)) > +#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36)) > +#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37)) > +#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38)) > +#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39)) > +#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40)) > +#define CXL_PSL_SERR_An_IRQ_MASKS ( \ > + CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \ > + CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \ > + CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask) > + > #define CXL_PSL_SERR_An_AE (1ull << (63-30)) > > /****** CXL_PSL_SCNTL_An ****************************************************/ > diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c > index 194c58e..290950b 100644 > --- a/drivers/misc/cxl/native.c > +++ b/drivers/misc/cxl/native.c > @@ -95,12 +95,23 @@ int cxl_afu_disable(struct cxl_afu *afu) > /* This will disable as well as reset */ > static int native_afu_reset(struct cxl_afu *afu) > { > + int rc; > + u64 serr; > + > pr_devel("AFU reset request\n"); > > - return afu_control(afu, CXL_AFU_Cntl_An_RA, 0, > + rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0, > CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, > CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, > false); > + > + /* Re-enable any masked interrupts */ > + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); > + serr &= ~CXL_PSL_SERR_An_IRQ_MASKS; > + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); > + > + > + return rc; > } > > static int native_afu_check_and_enable(struct cxl_afu *afu) > @@ -1205,7 +1216,7 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) > { > struct cxl_afu *afu = data; > u64 errstat, serr, afu_error, dsisr; > - u64 fir_slice, afu_debug; > + u64 fir_slice, afu_debug, irq_mask; > > /* > * slice err interrupt is only used with full PSL (no XSL) > @@ -1226,7 +1237,11 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) > dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); > dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); > > + /* mask off the IRQ so it won't retrigger until the AFU is reset */ > + irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32; > + serr |= irq_mask; > cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); > + dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n"); > > return IRQ_HANDLED; > } >
On Mon, 2017-05-01 at 00:53:31 UTC, Alastair D'Silva wrote: > From: Alastair D'Silva <alastair@d-silva.org> > > In some situations, a faulty AFU slice may create an interrupt storm of > slice errors, rendering the machine unusable. Since these interrupts are > informational only, present the interrupt once, then mask it off to > prevent it from being retriggered until the AFU is reset. > > Signed-off-by: Alastair D'Silva <alastair@d-silva.org> > Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > Reviewed-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com> Applied to powerpc next, thanks. https://git.kernel.org/powerpc/c/a715626a8e904e7226915d1bc48853 cheers
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 452e209..c8568ea 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -228,6 +228,24 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PSL_SERR_An_llcmdto (1ull << (63-6)) #define CXL_PSL_SERR_An_afupar (1ull << (63-7)) #define CXL_PSL_SERR_An_afudup (1ull << (63-8)) +#define CXL_PSL_SERR_An_IRQS ( \ + CXL_PSL_SERR_An_afuto | CXL_PSL_SERR_An_afudis | CXL_PSL_SERR_An_afuov | \ + CXL_PSL_SERR_An_badsrc | CXL_PSL_SERR_An_badctx | CXL_PSL_SERR_An_llcmdis | \ + CXL_PSL_SERR_An_llcmdto | CXL_PSL_SERR_An_afupar | CXL_PSL_SERR_An_afudup) +#define CXL_PSL_SERR_An_afuto_mask (1ull << (63-32)) +#define CXL_PSL_SERR_An_afudis_mask (1ull << (63-33)) +#define CXL_PSL_SERR_An_afuov_mask (1ull << (63-34)) +#define CXL_PSL_SERR_An_badsrc_mask (1ull << (63-35)) +#define CXL_PSL_SERR_An_badctx_mask (1ull << (63-36)) +#define CXL_PSL_SERR_An_llcmdis_mask (1ull << (63-37)) +#define CXL_PSL_SERR_An_llcmdto_mask (1ull << (63-38)) +#define CXL_PSL_SERR_An_afupar_mask (1ull << (63-39)) +#define CXL_PSL_SERR_An_afudup_mask (1ull << (63-40)) +#define CXL_PSL_SERR_An_IRQ_MASKS ( \ + CXL_PSL_SERR_An_afuto_mask | CXL_PSL_SERR_An_afudis_mask | CXL_PSL_SERR_An_afuov_mask | \ + CXL_PSL_SERR_An_badsrc_mask | CXL_PSL_SERR_An_badctx_mask | CXL_PSL_SERR_An_llcmdis_mask | \ + CXL_PSL_SERR_An_llcmdto_mask | CXL_PSL_SERR_An_afupar_mask | CXL_PSL_SERR_An_afudup_mask) + #define CXL_PSL_SERR_An_AE (1ull << (63-30)) /****** CXL_PSL_SCNTL_An ****************************************************/ diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 194c58e..290950b 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -95,12 +95,23 @@ int cxl_afu_disable(struct cxl_afu *afu) /* This will disable as well as reset */ static int native_afu_reset(struct cxl_afu *afu) { + int rc; + u64 serr; + pr_devel("AFU reset request\n"); - return afu_control(afu, CXL_AFU_Cntl_An_RA, 0, + rc = afu_control(afu, CXL_AFU_Cntl_An_RA, 0, CXL_AFU_Cntl_An_RS_Complete | CXL_AFU_Cntl_An_ES_Disabled, CXL_AFU_Cntl_An_RS_MASK | CXL_AFU_Cntl_An_ES_MASK, false); + + /* Re-enable any masked interrupts */ + serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); + serr &= ~CXL_PSL_SERR_An_IRQ_MASKS; + cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + + + return rc; } static int native_afu_check_and_enable(struct cxl_afu *afu) @@ -1205,7 +1216,7 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) { struct cxl_afu *afu = data; u64 errstat, serr, afu_error, dsisr; - u64 fir_slice, afu_debug; + u64 fir_slice, afu_debug, irq_mask; /* * slice err interrupt is only used with full PSL (no XSL) @@ -1226,7 +1237,11 @@ static irqreturn_t native_slice_irq_err(int irq, void *data) dev_crit(&afu->dev, "AFU_ERR_An: 0x%.16llx\n", afu_error); dev_crit(&afu->dev, "PSL_DSISR_An: 0x%.16llx\n", dsisr); + /* mask off the IRQ so it won't retrigger until the AFU is reset */ + irq_mask = (serr & CXL_PSL_SERR_An_IRQS) >> 32; + serr |= irq_mask; cxl_p1n_write(afu, CXL_PSL_SERR_An, serr); + dev_info(&afu->dev, "Further such interrupts will be masked until the AFU is reset\n"); return IRQ_HANDLED; }