Patchwork [2/3] aerdrv: Enhanced AER logging

login
register
mail settings
Submitter Lance Ortiz
Date Nov. 29, 2012, 9:54 p.m.
Message ID <20121129215450.5483.26562.stgit@grignak.americas.hpqcorp.net>
Download mbox | patch
Permalink /patch/202831/
State Changes Requested
Headers show

Comments

Lance Ortiz - Nov. 29, 2012, 9:54 p.m.
This patch will provide a more reliable and easy way for user-space
applications to have access to AER logs rather than reading them from the
message buffer. It also provides a way to notify user-space when an AER
event occurs.

The aer driver is updated to generate a trace event of function 'aer_event'
when an AER occurs.  The trace event was added to both the interrupt
based aer path and the firmware first path

Signed-off-by: Lance Ortiz <lance.ortiz@hp.com>
---

 drivers/acpi/apei/cper.c               |   11 +++++++++--
 drivers/pci/pcie/aer/aerdrv_errprint.c |   11 ++++++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Joe Perches - Nov. 29, 2012, 10:11 p.m.
On Thu, 2012-11-29 at 14:54 -0700, Lance Ortiz wrote:
> This patch will provide a more reliable and easy way for user-space
> applications to have access to AER logs rather than reading them from the
> message buffer. It also provides a way to notify user-space when an AER
> event occurs.
[]
> diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
> index e6defd8..ef1e1c0 100644
> --- a/drivers/acpi/apei/cper.c
> +++ b/drivers/acpi/apei/cper.c
> @@ -281,9 +281,16 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
>  	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
>  	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
>  #ifdef CONFIG_ACPI_APEI_PCIEAER
> -	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> +	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
> +			pcie->device_id.bus, pcie->device_id.function);
> +	if (!dev)
> +		pr_info(KERN_INFO, "PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
> +			domain, bus, slot, func);

You've not tried this with CONFIG_ACPI_APEI_PCIEAER enabled.

		pr_info("PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
			domain, bus, slot, func);


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ortiz, Lance E - Nov. 29, 2012, 10:21 p.m.
Yup.  You are right.  I thought I had it enabled, I will send out the new patch soon.

Lance

> -----Original Message-----
> From: Joe Perches [mailto:joe@perches.com]
> Sent: Thursday, November 29, 2012 3:12 PM
> To: Ortiz, Lance E
> Cc: bhelgaas@google.com; lance_ortiz@hotmail.com; jiang.liu@huawei.com;
> tony.luck@intel.com; bp@alien8.de; rostedt@goodmis.org;
> mchehab@redhat.com; linux-acpi@vger.kernel.org; linux-
> pci@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH 2/3] aerdrv: Enhanced AER logging
> 
> On Thu, 2012-11-29 at 14:54 -0700, Lance Ortiz wrote:
> > This patch will provide a more reliable and easy way for user-space
> > applications to have access to AER logs rather than reading them from
> the
> > message buffer. It also provides a way to notify user-space when an
> AER
> > event occurs.
> []
> > diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
> > index e6defd8..ef1e1c0 100644
> > --- a/drivers/acpi/apei/cper.c
> > +++ b/drivers/acpi/apei/cper.c
> > @@ -281,9 +281,16 @@ static void cper_print_pcie(const char *pfx,
> const struct cper_sec_pcie *pcie,
> >  	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
> >  	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
> >  #ifdef CONFIG_ACPI_APEI_PCIEAER
> > -	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> > +	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
> > +			pcie->device_id.bus, pcie->device_id.function);
> > +	if (!dev)
> > +		pr_info(KERN_INFO, "PCI AER Cannot get PCI device
> %04x:%02x:%02x.%d\n",
> > +			domain, bus, slot, func);
> 
> You've not tried this with CONFIG_ACPI_APEI_PCIEAER enabled.
> 
> 		pr_info("PCI AER Cannot get PCI device
> %04x:%02x:%02x.%d\n",
> 			domain, bus, slot, func);
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steven Rostedt - Nov. 30, 2012, 1:53 a.m.
On Thu, 2012-11-29 at 14:54 -0700, Lance Ortiz wrote:

> --- a/drivers/pci/pcie/aer/aerdrv_errprint.c
> +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
> @@ -23,6 +23,10 @@
>  
>  #include "aerdrv.h"
>  
> +#define CREATE_TRACE_POINTS
> +#define TRACE_INCLUDE_PATH ../../../../include/ras
> +#include <ras/aer_event.h>
> +

Yuck yuck yuck!

This header should be in the same directory, and you should have in that
same header:

#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH .

and remove the definition here.

-- Steve

>  #define AER_AGENT_RECEIVER		0
>  #define AER_AGENT_REQUESTER		1
>  #define AER_AGENT_COMPLETER		2
> @@ -194,6 +198,8 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>  	if (info->id && info->error_dev_num > 1 && info->id == id)
>  		printk("%s""  Error of this Agent(%04x) is reported first\n",
>  			prefix, id);
> +	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
> +			info->severity);
>  }
>  


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index e6defd8..ef1e1c0 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -281,9 +281,16 @@  static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	"%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO) {
+	dev = pci_get_domain_bus_and_slot(pcie->device_id.segment,
+			pcie->device_id.bus, pcie->device_id.function);
+	if (!dev)
+		pr_info(KERN_INFO, "PCI AER Cannot get PCI device %04x:%02x:%02x.%d\n",
+			domain, bus, slot, func);
+
+	if (pcie->validation_bits & CPER_PCIE_VALID_AER_INFO && dev) {
 		struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
-		cper_print_aer(pfx, gdata->error_severity, aer_regs);
+		cper_print_aer(dev, gdata->error_severity, aer_regs);
+		pci_dev_put(dev);
 	}
 #endif
 }
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 3ea5173..6354e50 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -23,6 +23,10 @@ 
 
 #include "aerdrv.h"
 
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../../../include/ras
+#include <ras/aer_event.h>
+
 #define AER_AGENT_RECEIVER		0
 #define AER_AGENT_REQUESTER		1
 #define AER_AGENT_COMPLETER		2
@@ -194,6 +198,8 @@  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
 	if (info->id && info->error_dev_num > 1 && info->id == id)
 		printk("%s""  Error of this Agent(%04x) is reported first\n",
 			prefix, id);
+	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
+			info->severity);
 }
 
 void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info)
@@ -217,12 +223,13 @@  int cper_severity_to_aer(int cper_severity)
 }
 EXPORT_SYMBOL_GPL(cper_severity_to_aer);
 
-void cper_print_aer(const char *prefix, int cper_severity,
+void cper_print_aer(struct pci_dev *dev, int cper_severity,
 		    struct aer_capability_regs *aer)
 {
 	int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0;
 	u32 status, mask;
 	const char **status_strs;
+	char *prefix = NULL;
 
 	aer_severity = cper_severity_to_aer(cper_severity);
 	if (aer_severity == AER_CORRECTABLE) {
@@ -259,5 +266,7 @@  void cper_print_aer(const char *prefix, int cper_severity,
 			*(tlp + 8), *(tlp + 15), *(tlp + 14),
 			*(tlp + 13), *(tlp + 12));
 	}
+	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
+			aer_severity);
 }
 #endif