Message ID | 20210617121307.792386-1-mperttunen@nvidia.com |
---|---|
State | Accepted |
Headers | show |
Series | [v2] soc: tegra: Add Tegra186 ARI driver | expand |
On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote: > Add a driver to hook into panic notifiers and print machine check > status for debugging. Status information is retrieved via SMC. This > is supported by upstream ARM Trusted Firmware. > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > --- > v2: > * Changed to use panic notifier instead of serror hook > --- > drivers/soc/tegra/Makefile | 1 + > drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++ > 2 files changed, 81 insertions(+) > create mode 100644 drivers/soc/tegra/ari-tegra186.c > > diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile > index 9c809c1814bd..054e862b63d8 100644 > --- a/drivers/soc/tegra/Makefile > +++ b/drivers/soc/tegra/Makefile > @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o > obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o > obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o > obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o > +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o > diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c > new file mode 100644 > index 000000000000..02577853ec49 > --- /dev/null > +++ b/drivers/soc/tegra/ari-tegra186.c > @@ -0,0 +1,80 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. > + */ > + > +#include <linux/arm-smccc.h> > +#include <linux/kernel.h> > +#include <linux/of.h> > +#include <linux/panic_notifier.h> > + > +#define SMC_SIP_INVOKE_MCE 0xc2ffff00 > +#define MCE_SMC_READ_MCA 12 > + > +#define MCA_ARI_CMD_RD_SERR 1 > + > +#define MCA_ARI_RW_SUBIDX_STAT 1 > +#define SERR_STATUS_VAL BIT_ULL(63) > + > +#define MCA_ARI_RW_SUBIDX_ADDR 2 > +#define MCA_ARI_RW_SUBIDX_MSC1 3 > +#define MCA_ARI_RW_SUBIDX_MSC2 4 > + > +static const char * const bank_names[] = { > + "SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU", > +}; > + > +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data) > +{ > + struct arm_smccc_res res; > + > + arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA, > + ((u64)inst << 24) | ((u64)idx << 16) | > + ((u64)subidx << 8) | ((u64)cmd << 0), > + 0, 0, 0, 0, 0, 0, &res); > + > + *data = res.a2; > +} > + > +static int tegra186_ari_panic_handler(struct notifier_block *nb, > + unsigned long code, void *unused) > +{ > + u64 status; > + int i; > + > + for (i = 0; i < ARRAY_SIZE(bank_names); i++) { > + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT, > + 0, &status); > + > + if (status & SERR_STATUS_VAL) { > + u64 addr, misc1, misc2; > + > + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, > + MCA_ARI_RW_SUBIDX_ADDR, 0, &addr); > + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, > + MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1); > + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, > + MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2); > + > + pr_crit("Machine Check Error in %s\n" > + " status=0x%llx addr=0x%llx\n" > + " msc1=0x%llx msc2=0x%llx\n", > + bank_names[i], status, addr, misc1, misc2); This still looks rather cryptic to me. Is there some way to further decode things like the status and MSC registers? Or is this something that people are supposed to know how to intepret? Also, I'm not sure it's evident what those various banks are. Is there some way we can provide a description for these? Additional information doesn't necessarily have to go into code, but it'd be nice if at least there was some sort of comment somewhere that goes into a bit more detail so that people know how to use this. Or perhaps this is documented in the TRM? If so, perhaps provide a reference to that so that people know where to find the information. Thierry
On 6/18/21 3:03 PM, Thierry Reding wrote: > On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote: >> Add a driver to hook into panic notifiers and print machine check >> status for debugging. Status information is retrieved via SMC. This >> is supported by upstream ARM Trusted Firmware. >> >> Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> >> --- >> v2: >> * Changed to use panic notifier instead of serror hook >> --- >> drivers/soc/tegra/Makefile | 1 + >> drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++ >> 2 files changed, 81 insertions(+) >> create mode 100644 drivers/soc/tegra/ari-tegra186.c >> >> diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile >> index 9c809c1814bd..054e862b63d8 100644 >> --- a/drivers/soc/tegra/Makefile >> +++ b/drivers/soc/tegra/Makefile >> @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o >> obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o >> obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o >> obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o >> +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o >> diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c >> new file mode 100644 >> index 000000000000..02577853ec49 >> --- /dev/null >> +++ b/drivers/soc/tegra/ari-tegra186.c >> @@ -0,0 +1,80 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. >> + */ >> + >> +#include <linux/arm-smccc.h> >> +#include <linux/kernel.h> >> +#include <linux/of.h> >> +#include <linux/panic_notifier.h> >> + >> +#define SMC_SIP_INVOKE_MCE 0xc2ffff00 >> +#define MCE_SMC_READ_MCA 12 >> + >> +#define MCA_ARI_CMD_RD_SERR 1 >> + >> +#define MCA_ARI_RW_SUBIDX_STAT 1 >> +#define SERR_STATUS_VAL BIT_ULL(63) >> + >> +#define MCA_ARI_RW_SUBIDX_ADDR 2 >> +#define MCA_ARI_RW_SUBIDX_MSC1 3 >> +#define MCA_ARI_RW_SUBIDX_MSC2 4 >> + >> +static const char * const bank_names[] = { >> + "SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU", >> +}; >> + >> +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data) >> +{ >> + struct arm_smccc_res res; >> + >> + arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA, >> + ((u64)inst << 24) | ((u64)idx << 16) | >> + ((u64)subidx << 8) | ((u64)cmd << 0), >> + 0, 0, 0, 0, 0, 0, &res); >> + >> + *data = res.a2; >> +} >> + >> +static int tegra186_ari_panic_handler(struct notifier_block *nb, >> + unsigned long code, void *unused) >> +{ >> + u64 status; >> + int i; >> + >> + for (i = 0; i < ARRAY_SIZE(bank_names); i++) { >> + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT, >> + 0, &status); >> + >> + if (status & SERR_STATUS_VAL) { >> + u64 addr, misc1, misc2; >> + >> + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, >> + MCA_ARI_RW_SUBIDX_ADDR, 0, &addr); >> + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, >> + MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1); >> + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, >> + MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2); >> + >> + pr_crit("Machine Check Error in %s\n" >> + " status=0x%llx addr=0x%llx\n" >> + " msc1=0x%llx msc2=0x%llx\n", >> + bank_names[i], status, addr, misc1, misc2); > > This still looks rather cryptic to me. Is there some way to further > decode things like the status and MSC registers? Or is this something > that people are supposed to know how to intepret? Indeed, it is very cryptic. The corresponding downstream driver does a lot of work to print more human-readable descriptions of these - that's what I used as a decoding reference as well myself. I was thinking that having the barebones here is good to at least have the error data to decode manually, and we can add the more human-readable decoding afterwards, or perhaps provide some script to decode it. > > Also, I'm not sure it's evident what those various banks are. Is there > some way we can provide a description for these? I don't know if the bank names themselves are very useful - each bank is decoded differently and the decoded information is the useful part. > > Additional information doesn't necessarily have to go into code, but > it'd be nice if at least there was some sort of comment somewhere that > goes into a bit more detail so that people know how to use this. Or > perhaps this is documented in the TRM? If so, perhaps provide a > reference to that so that people know where to find the information. I don't know if the TRM has this information. How to decode these values can be seen in https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=drivers/platform/tegra/ari_mca.c;h=040d05978ca49755a68365bebe7c46f6628c5162;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1 and https://nv-tegra.nvidia.com/gitweb/?p=linux-nvidia.git;a=blob;f=include/linux/platform/tegra/ari_mca.h;h=e6e4ac3abf674a1fc93f7b0dbcac4d6e672772d6;hb=6dc57fec39c444e4c4448be61ddd19c55693daf1 Not sure if these kinds of links are very appropriate as references though. > > Thierry > Thanks, Mikko
On Thu, Jun 17, 2021 at 03:13:07PM +0300, Mikko Perttunen wrote: > Add a driver to hook into panic notifiers and print machine check > status for debugging. Status information is retrieved via SMC. This > is supported by upstream ARM Trusted Firmware. > > Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> > --- > v2: > * Changed to use panic notifier instead of serror hook > --- > drivers/soc/tegra/Makefile | 1 + > drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++ > 2 files changed, 81 insertions(+) > create mode 100644 drivers/soc/tegra/ari-tegra186.c Applied, thanks. Thierry
diff --git a/drivers/soc/tegra/Makefile b/drivers/soc/tegra/Makefile index 9c809c1814bd..054e862b63d8 100644 --- a/drivers/soc/tegra/Makefile +++ b/drivers/soc/tegra/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_SOC_TEGRA_PMC) += pmc.o obj-$(CONFIG_SOC_TEGRA_POWERGATE_BPMP) += powergate-bpmp.o obj-$(CONFIG_SOC_TEGRA20_VOLTAGE_COUPLER) += regulators-tegra20.o obj-$(CONFIG_SOC_TEGRA30_VOLTAGE_COUPLER) += regulators-tegra30.o +obj-$(CONFIG_ARCH_TEGRA_186_SOC) += ari-tegra186.o diff --git a/drivers/soc/tegra/ari-tegra186.c b/drivers/soc/tegra/ari-tegra186.c new file mode 100644 index 000000000000..02577853ec49 --- /dev/null +++ b/drivers/soc/tegra/ari-tegra186.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + */ + +#include <linux/arm-smccc.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/panic_notifier.h> + +#define SMC_SIP_INVOKE_MCE 0xc2ffff00 +#define MCE_SMC_READ_MCA 12 + +#define MCA_ARI_CMD_RD_SERR 1 + +#define MCA_ARI_RW_SUBIDX_STAT 1 +#define SERR_STATUS_VAL BIT_ULL(63) + +#define MCA_ARI_RW_SUBIDX_ADDR 2 +#define MCA_ARI_RW_SUBIDX_MSC1 3 +#define MCA_ARI_RW_SUBIDX_MSC2 4 + +static const char * const bank_names[] = { + "SYS:DPMU", "ROC:IOB", "ROC:MCB", "ROC:CCE", "ROC:CQX", "ROC:CTU", +}; + +static void read_uncore_mca(u8 cmd, u8 idx, u8 subidx, u8 inst, u64 *data) +{ + struct arm_smccc_res res; + + arm_smccc_smc(SMC_SIP_INVOKE_MCE | MCE_SMC_READ_MCA, + ((u64)inst << 24) | ((u64)idx << 16) | + ((u64)subidx << 8) | ((u64)cmd << 0), + 0, 0, 0, 0, 0, 0, &res); + + *data = res.a2; +} + +static int tegra186_ari_panic_handler(struct notifier_block *nb, + unsigned long code, void *unused) +{ + u64 status; + int i; + + for (i = 0; i < ARRAY_SIZE(bank_names); i++) { + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, MCA_ARI_RW_SUBIDX_STAT, + 0, &status); + + if (status & SERR_STATUS_VAL) { + u64 addr, misc1, misc2; + + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, + MCA_ARI_RW_SUBIDX_ADDR, 0, &addr); + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, + MCA_ARI_RW_SUBIDX_MSC1, 0, &misc1); + read_uncore_mca(MCA_ARI_CMD_RD_SERR, i, + MCA_ARI_RW_SUBIDX_MSC2, 0, &misc2); + + pr_crit("Machine Check Error in %s\n" + " status=0x%llx addr=0x%llx\n" + " msc1=0x%llx msc2=0x%llx\n", + bank_names[i], status, addr, misc1, misc2); + } + } + + return NOTIFY_DONE; +} + +static struct notifier_block tegra186_ari_panic_nb = { + .notifier_call = tegra186_ari_panic_handler, +}; + +static int __init tegra186_ari_init(void) +{ + if (of_machine_is_compatible("nvidia,tegra186")) + atomic_notifier_chain_register(&panic_notifier_list, &tegra186_ari_panic_nb); + + return 0; +} +early_initcall(tegra186_ari_init);
Add a driver to hook into panic notifiers and print machine check status for debugging. Status information is retrieved via SMC. This is supported by upstream ARM Trusted Firmware. Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com> --- v2: * Changed to use panic notifier instead of serror hook --- drivers/soc/tegra/Makefile | 1 + drivers/soc/tegra/ari-tegra186.c | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 drivers/soc/tegra/ari-tegra186.c