diff mbox

[v1,2/3] powerpc/powernv: Support PCI error injection

Message ID 1403489682-14841-3-git-send-email-gwshan@linux.vnet.ibm.com
State New, archived
Headers show

Commit Message

Gavin Shan June 23, 2014, 2:14 a.m. UTC
The patch implements one OPAL firmware sysfs file to support PCI error
injection: "/sys/firmware/opal/errinjct", which will be used like the
way described as follows.

According to PAPR spec, there are 3 RTAS calls related to error injection:
"ibm,open-errinjct": allocate token prior to doing error injection.
"ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
"ibm,errinjct": do error injection.

Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
format "ei_token ...". For now, we only support 32-bits and 64-bits
PCI error injection and they should have following strings written to
/sys/firmware/opal/errinjct as follows. We don't have corresponding
sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
means that we rely on userland to maintain the token by itself.

32-bits PCI error: "7:addr:mask:iommu_group_id:function".
64-bits PCI error: "8:addr:mask:iommu_group_id:function".

The above "7" and "8" represent 32-bits and 64-bits PCI error seperately
and "function" is one of the specific PCI errors (e.g. MMIO access address
parity error), which are defined by PAPR spec.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                |   1 +
 arch/powerpc/platforms/powernv/Makefile        |   2 +-
 arch/powerpc/platforms/powernv/opal-errinjct.c | 184 +++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal.c          |   2 +
 4 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-errinjct.c

Comments

Michael Neuling June 23, 2014, 6:36 a.m. UTC | #1
On Mon, 2014-06-23 at 12:14 +1000, Gavin Shan wrote:
> The patch implements one OPAL firmware sysfs file to support PCI error
> injection: "/sys/firmware/opal/errinjct", which will be used like the
> way described as follows.
> 
> According to PAPR spec, there are 3 RTAS calls related to error injection:
> "ibm,open-errinjct": allocate token prior to doing error injection.
> "ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
> "ibm,errinjct": do error injection.
> 
> Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
> format "ei_token ...". For now, we only support 32-bits and 64-bits
> PCI error injection and they should have following strings written to
> /sys/firmware/opal/errinjct as follows. We don't have corresponding
> sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
> means that we rely on userland to maintain the token by itself.

This sounds cool.  

Can you document the sysfs interface in Documentation/powerpc?

Mikey

> 
> 32-bits PCI error: "7:addr:mask:iommu_group_id:function".
> 64-bits PCI error: "8:addr:mask:iommu_group_id:function".
> 
> The above "7" and "8" represent 32-bits and 64-bits PCI error seperately
> and "function" is one of the specific PCI errors (e.g. MMIO access address
> parity error), which are defined by PAPR spec.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/opal.h                |   1 +
>  arch/powerpc/platforms/powernv/Makefile        |   2 +-
>  arch/powerpc/platforms/powernv/opal-errinjct.c | 184 +++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/opal.c          |   2 +
>  4 files changed, 188 insertions(+), 1 deletion(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-errinjct.c
> 
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index d982bb8..bf280d9 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -985,6 +985,7 @@ extern int opal_elog_init(void);
>  extern void opal_platform_dump_init(void);
>  extern void opal_sys_param_init(void);
>  extern void opal_msglog_init(void);
> +extern void opal_errinjct_init(void);
>  
>  extern int opal_machine_check(struct pt_regs *regs);
>  extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index 63cebb9..4711de8 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -1,7 +1,7 @@
>  obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
>  obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
>  obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
> -obj-y			+= opal-msglog.o
> +obj-y			+= opal-msglog.o opal-errinjct.o
>  
>  obj-$(CONFIG_SMP)	+= smp.o
>  obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
> diff --git a/arch/powerpc/platforms/powernv/opal-errinjct.c b/arch/powerpc/platforms/powernv/opal-errinjct.c
> new file mode 100644
> index 0000000..29c9e83
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-errinjct.c
> @@ -0,0 +1,184 @@
> +/*
> + * The file supports error injection, which works based on OPAL API.
> + * For now, we only support PCI error injection. We need support
> + * injecting other types of errors in future.
> + *
> + * Copyright Gavin Shan, IBM Corporation 2014.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/msi.h>
> +#include <linux/pci.h>
> +#include <linux/iommu.h>
> +#include <linux/random.h>
> +#include <linux/slab.h>
> +#include <linux/sysfs.h>
> +#include <linux/fs.h>
> +#include <linux/fcntl.h>
> +#include <linux/kobject.h>
> +
> +#include <asm/msi_bitmap.h>
> +#include <asm/iommu.h>
> +#include <asm/pci-bridge.h>
> +#include <asm/ppc-pci.h>
> +#include <asm/opal.h>
> +
> +#include "powernv.h"
> +#include "pci.h"
> +
> +static DEFINE_MUTEX(errinjct_mutex);
> +
> +static int errinjct_iommu_group_to_phb_and_pe(uint32_t iommu_grp_id,
> +					      uint64_t *phb_id,
> +					      uint32_t *pe_num)
> +{
> +#ifdef CONFIG_IOMMU_API
> +	struct iommu_group *iommu_grp;
> +	struct iommu_table *tbl;
> +	struct pnv_ioda_pe *pe;
> +
> +	iommu_grp = iommu_group_get_by_id(iommu_grp_id);
> +	if (!iommu_grp)
> +		return -ENODEV;
> +
> +	tbl = iommu_group_get_iommudata(iommu_grp);
> +	if (!tbl)
> +		return -ENODEV;
> +
> +	pe = container_of(tbl, struct pnv_ioda_pe, tce32_table);
> +	if (!pe->phb)
> +		return -ENODEV;
> +
> +	*phb_id = pe->phb->opal_id;
> +	*pe_num = pe->pe_number;
> +
> +	return 0;
> +#endif
> +
> +	return -ENXIO;
> +}
> +
> +static int errinjct_ioa_bus_error(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameters */
> +	ret = sscanf(buf, "%x:%x:%x:%x:%x",
> +		     &ei->type, &ei->ioa.addr,
> +		     &ei->ioa.mask, &iommu_grp_id, ei->ioa.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa.phb_id,
> +						 &ei->ioa.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int errinjct_ioa_bus_error64(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameter */
> +	ret = sscanf(buf, "%x:%llx:%llx:%x:%x",
> +		     &ei->type, &ei->ioa64.addr,
> +		     &ei->ioa64.mask, &iommu_grp_id, &ei->ioa64.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa64.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa64.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa64.phb_id,
> +						 &ei->ioa64.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static ssize_t errinjct_store(struct kobject *kobj,
> +			      struct kobj_attribute *attr,
> +			      const char *buf, size_t count)
> +{
> +	struct OpalErrinjct ei;
> +	int ret;
> +	long rc;
> +
> +	/* Extract common parameters */
> +	ret = sscanf(buf, "%x", &ei.type);
> +	if (ret != 1)
> +		return -EINVAL;
> +
> +	/* Error injection might be in progress */
> +	if (!mutex_trylock(&errinjct_mutex))
> +		return -EAGAIN;
> +
> +	switch (ei.type) {
> +	case OpalErrinjctTypeIoaBusError:
> +		ret = errinjct_ioa_bus_error(buf, &ei);
> +		break;
> +	case OpalErrinjctTypeIoaBusError64:
> +		ret = errinjct_ioa_bus_error64(buf, &ei);
> +		break;
> +	default:
> +		ret = -ERANGE;
> +	}
> +
> +	/* Invalid parameters ? */
> +	if (ret)
> +		goto mutex_unlock_exit;
> +
> +	/* OPAL call */
> +	rc = opal_err_injct(&ei);
> +	if (rc == OPAL_SUCCESS)
> +		ret = count;
> +	else
> +		ret = -EIO;
> +
> +mutex_unlock_exit:
> +	mutex_unlock(&errinjct_mutex);
> +	return ret;
> +}
> +
> +static struct kobj_attribute errinjct_attr =
> +	__ATTR(errinjct, 0600, NULL, errinjct_store);
> +
> +void __init opal_errinjct_init(void)
> +{
> +	int ret;
> +
> +	/* Make sure /sys/firmware/opal directory is created */
> +	if (!opal_kobj) {
> +		pr_warn("%s: opal kobject is not available\n",
> +			__func__);
> +		return;
> +	}
> +
> +	/* Create the sysfs files */
> +	ret = sysfs_create_file(opal_kobj, &errinjct_attr.attr);
> +	if (ret)
> +		pr_warn("%s: Cannot create sysfs file (%d)\n",
> +			__func__, ret);
> +}
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 360ad80c..cb29bb5 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -604,6 +604,8 @@ static int __init opal_init(void)
>  		opal_sys_param_init();
>  		/* Setup message log interface. */
>  		opal_msglog_init();
> +		/* Setup error injection interface */
> +		opal_errinjct_init();
>  	}
>  
>  	return 0;

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt June 23, 2014, 9:05 p.m. UTC | #2
On Mon, 2014-06-23 at 12:14 +1000, Gavin Shan wrote:
> The patch implements one OPAL firmware sysfs file to support PCI error
> injection: "/sys/firmware/opal/errinjct", which will be used like the
> way described as follows.
> 
> According to PAPR spec, there are 3 RTAS calls related to error injection:
> "ibm,open-errinjct": allocate token prior to doing error injection.
> "ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
> "ibm,errinjct": do error injection.
> 
> Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
> format "ei_token ...". For now, we only support 32-bits and 64-bits
> PCI error injection and they should have following strings written to
> /sys/firmware/opal/errinjct as follows. We don't have corresponding
> sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
> means that we rely on userland to maintain the token by itself.

Should we instead look into adding a file in the existing sysfs
directory of the specific PHB ?

Cheers,
Ben.

> 32-bits PCI error: "7:addr:mask:iommu_group_id:function".
> 64-bits PCI error: "8:addr:mask:iommu_group_id:function".
> 
> The above "7" and "8" represent 32-bits and 64-bits PCI error seperately
> and "function" is one of the specific PCI errors (e.g. MMIO access address
> parity error), which are defined by PAPR spec.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/opal.h                |   1 +
>  arch/powerpc/platforms/powernv/Makefile        |   2 +-
>  arch/powerpc/platforms/powernv/opal-errinjct.c | 184 +++++++++++++++++++++++++
>  arch/powerpc/platforms/powernv/opal.c          |   2 +
>  4 files changed, 188 insertions(+), 1 deletion(-)
>  create mode 100644 arch/powerpc/platforms/powernv/opal-errinjct.c
> 
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index d982bb8..bf280d9 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -985,6 +985,7 @@ extern int opal_elog_init(void);
>  extern void opal_platform_dump_init(void);
>  extern void opal_sys_param_init(void);
>  extern void opal_msglog_init(void);
> +extern void opal_errinjct_init(void);
>  
>  extern int opal_machine_check(struct pt_regs *regs);
>  extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index 63cebb9..4711de8 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -1,7 +1,7 @@
>  obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
>  obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
>  obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
> -obj-y			+= opal-msglog.o
> +obj-y			+= opal-msglog.o opal-errinjct.o
>  
>  obj-$(CONFIG_SMP)	+= smp.o
>  obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
> diff --git a/arch/powerpc/platforms/powernv/opal-errinjct.c b/arch/powerpc/platforms/powernv/opal-errinjct.c
> new file mode 100644
> index 0000000..29c9e83
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-errinjct.c
> @@ -0,0 +1,184 @@
> +/*
> + * The file supports error injection, which works based on OPAL API.
> + * For now, we only support PCI error injection. We need support
> + * injecting other types of errors in future.
> + *
> + * Copyright Gavin Shan, IBM Corporation 2014.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/msi.h>
> +#include <linux/pci.h>
> +#include <linux/iommu.h>
> +#include <linux/random.h>
> +#include <linux/slab.h>
> +#include <linux/sysfs.h>
> +#include <linux/fs.h>
> +#include <linux/fcntl.h>
> +#include <linux/kobject.h>
> +
> +#include <asm/msi_bitmap.h>
> +#include <asm/iommu.h>
> +#include <asm/pci-bridge.h>
> +#include <asm/ppc-pci.h>
> +#include <asm/opal.h>
> +
> +#include "powernv.h"
> +#include "pci.h"
> +
> +static DEFINE_MUTEX(errinjct_mutex);
> +
> +static int errinjct_iommu_group_to_phb_and_pe(uint32_t iommu_grp_id,
> +					      uint64_t *phb_id,
> +					      uint32_t *pe_num)
> +{
> +#ifdef CONFIG_IOMMU_API
> +	struct iommu_group *iommu_grp;
> +	struct iommu_table *tbl;
> +	struct pnv_ioda_pe *pe;
> +
> +	iommu_grp = iommu_group_get_by_id(iommu_grp_id);
> +	if (!iommu_grp)
> +		return -ENODEV;
> +
> +	tbl = iommu_group_get_iommudata(iommu_grp);
> +	if (!tbl)
> +		return -ENODEV;
> +
> +	pe = container_of(tbl, struct pnv_ioda_pe, tce32_table);
> +	if (!pe->phb)
> +		return -ENODEV;
> +
> +	*phb_id = pe->phb->opal_id;
> +	*pe_num = pe->pe_number;
> +
> +	return 0;
> +#endif
> +
> +	return -ENXIO;
> +}
> +
> +static int errinjct_ioa_bus_error(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameters */
> +	ret = sscanf(buf, "%x:%x:%x:%x:%x",
> +		     &ei->type, &ei->ioa.addr,
> +		     &ei->ioa.mask, &iommu_grp_id, ei->ioa.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa.phb_id,
> +						 &ei->ioa.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int errinjct_ioa_bus_error64(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameter */
> +	ret = sscanf(buf, "%x:%llx:%llx:%x:%x",
> +		     &ei->type, &ei->ioa64.addr,
> +		     &ei->ioa64.mask, &iommu_grp_id, &ei->ioa64.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa64.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa64.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa64.phb_id,
> +						 &ei->ioa64.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static ssize_t errinjct_store(struct kobject *kobj,
> +			      struct kobj_attribute *attr,
> +			      const char *buf, size_t count)
> +{
> +	struct OpalErrinjct ei;
> +	int ret;
> +	long rc;
> +
> +	/* Extract common parameters */
> +	ret = sscanf(buf, "%x", &ei.type);
> +	if (ret != 1)
> +		return -EINVAL;
> +
> +	/* Error injection might be in progress */
> +	if (!mutex_trylock(&errinjct_mutex))
> +		return -EAGAIN;
> +
> +	switch (ei.type) {
> +	case OpalErrinjctTypeIoaBusError:
> +		ret = errinjct_ioa_bus_error(buf, &ei);
> +		break;
> +	case OpalErrinjctTypeIoaBusError64:
> +		ret = errinjct_ioa_bus_error64(buf, &ei);
> +		break;
> +	default:
> +		ret = -ERANGE;
> +	}
> +
> +	/* Invalid parameters ? */
> +	if (ret)
> +		goto mutex_unlock_exit;
> +
> +	/* OPAL call */
> +	rc = opal_err_injct(&ei);
> +	if (rc == OPAL_SUCCESS)
> +		ret = count;
> +	else
> +		ret = -EIO;
> +
> +mutex_unlock_exit:
> +	mutex_unlock(&errinjct_mutex);
> +	return ret;
> +}
> +
> +static struct kobj_attribute errinjct_attr =
> +	__ATTR(errinjct, 0600, NULL, errinjct_store);
> +
> +void __init opal_errinjct_init(void)
> +{
> +	int ret;
> +
> +	/* Make sure /sys/firmware/opal directory is created */
> +	if (!opal_kobj) {
> +		pr_warn("%s: opal kobject is not available\n",
> +			__func__);
> +		return;
> +	}
> +
> +	/* Create the sysfs files */
> +	ret = sysfs_create_file(opal_kobj, &errinjct_attr.attr);
> +	if (ret)
> +		pr_warn("%s: Cannot create sysfs file (%d)\n",
> +			__func__, ret);
> +}
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 360ad80c..cb29bb5 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -604,6 +604,8 @@ static int __init opal_init(void)
>  		opal_sys_param_init();
>  		/* Setup message log interface. */
>  		opal_msglog_init();
> +		/* Setup error injection interface */
> +		opal_errinjct_init();
>  	}
>  
>  	return 0;


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu June 24, 2014, 6:18 a.m. UTC | #3
On 06/23/2014 10:14 AM, Gavin Shan wrote:
> The patch implements one OPAL firmware sysfs file to support PCI error
> injection: "/sys/firmware/opal/errinjct", which will be used like the
> way described as follows.
>
> According to PAPR spec, there are 3 RTAS calls related to error injection:
> "ibm,open-errinjct": allocate token prior to doing error injection.
> "ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
> "ibm,errinjct": do error injection.
>
> Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
> format "ei_token ...". For now, we only support 32-bits and 64-bits
> PCI error injection and they should have following strings written to
> /sys/firmware/opal/errinjct as follows. We don't have corresponding
> sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
> means that we rely on userland to maintain the token by itself.
>
> 32-bits PCI error: "7:addr:mask:iommu_group_id:function".
> 64-bits PCI error: "8:addr:mask:iommu_group_id:function".
>
> The above "7" and "8" represent 32-bits and 64-bits PCI error seperately
> and "function" is one of the specific PCI errors (e.g. MMIO access address
> parity error), which are defined by PAPR spec.
>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/opal.h                |   1 +
>   arch/powerpc/platforms/powernv/Makefile        |   2 +-
>   arch/powerpc/platforms/powernv/opal-errinjct.c | 184 +++++++++++++++++++++++++
>   arch/powerpc/platforms/powernv/opal.c          |   2 +
>   4 files changed, 188 insertions(+), 1 deletion(-)
>   create mode 100644 arch/powerpc/platforms/powernv/opal-errinjct.c
>
> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
> index d982bb8..bf280d9 100644
> --- a/arch/powerpc/include/asm/opal.h
> +++ b/arch/powerpc/include/asm/opal.h
> @@ -985,6 +985,7 @@ extern int opal_elog_init(void);
>   extern void opal_platform_dump_init(void);
>   extern void opal_sys_param_init(void);
>   extern void opal_msglog_init(void);
> +extern void opal_errinjct_init(void);
>
>   extern int opal_machine_check(struct pt_regs *regs);
>   extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
> index 63cebb9..4711de8 100644
> --- a/arch/powerpc/platforms/powernv/Makefile
> +++ b/arch/powerpc/platforms/powernv/Makefile
> @@ -1,7 +1,7 @@
>   obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
>   obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
>   obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
> -obj-y			+= opal-msglog.o
> +obj-y			+= opal-msglog.o opal-errinjct.o
>
>   obj-$(CONFIG_SMP)	+= smp.o
>   obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
> diff --git a/arch/powerpc/platforms/powernv/opal-errinjct.c b/arch/powerpc/platforms/powernv/opal-errinjct.c
> new file mode 100644
> index 0000000..29c9e83
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-errinjct.c
> @@ -0,0 +1,184 @@
> +/*
> + * The file supports error injection, which works based on OPAL API.
> + * For now, we only support PCI error injection. We need support
> + * injecting other types of errors in future.
> + *
> + * Copyright Gavin Shan, IBM Corporation 2014.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/msi.h>
> +#include <linux/pci.h>
> +#include <linux/iommu.h>
> +#include <linux/random.h>
> +#include <linux/slab.h>
> +#include <linux/sysfs.h>
> +#include <linux/fs.h>
> +#include <linux/fcntl.h>
> +#include <linux/kobject.h>
> +
> +#include <asm/msi_bitmap.h>
> +#include <asm/iommu.h>
> +#include <asm/pci-bridge.h>
> +#include <asm/ppc-pci.h>
> +#include <asm/opal.h>
> +
> +#include "powernv.h"
> +#include "pci.h"
> +
> +static DEFINE_MUTEX(errinjct_mutex);
> +
> +static int errinjct_iommu_group_to_phb_and_pe(uint32_t iommu_grp_id,
> +					      uint64_t *phb_id,
> +					      uint32_t *pe_num)
> +{
> +#ifdef CONFIG_IOMMU_API

Is it reasonable to do error injection with "CONFIG_IOMMU_API" ?

That means if use default config(CONFIG_IOMMU_API = n),  we can not do 
error injection to pci devices?

Thanks
Mike
> +	struct iommu_group *iommu_grp;
> +	struct iommu_table *tbl;
> +	struct pnv_ioda_pe *pe;
> +
> +	iommu_grp = iommu_group_get_by_id(iommu_grp_id);
> +	if (!iommu_grp)
> +		return -ENODEV;
> +
> +	tbl = iommu_group_get_iommudata(iommu_grp);
> +	if (!tbl)
> +		return -ENODEV;
> +
> +	pe = container_of(tbl, struct pnv_ioda_pe, tce32_table);
> +	if (!pe->phb)
> +		return -ENODEV;
> +
> +	*phb_id = pe->phb->opal_id;
> +	*pe_num = pe->pe_number;
> +
> +	return 0;
> +#endif
> +
> +	return -ENXIO;
> +}
> +
> +static int errinjct_ioa_bus_error(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameters */
> +	ret = sscanf(buf, "%x:%x:%x:%x:%x",
> +		     &ei->type, &ei->ioa.addr,
> +		     &ei->ioa.mask, &iommu_grp_id, ei->ioa.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa.phb_id,
> +						 &ei->ioa.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int errinjct_ioa_bus_error64(const char *buf, struct OpalErrinjct *ei)
> +{
> +	uint32_t iommu_grp_id;
> +	int ret;
> +
> +	/* Extract parameter */
> +	ret = sscanf(buf, "%x:%llx:%llx:%x:%x",
> +		     &ei->type, &ei->ioa64.addr,
> +		     &ei->ioa64.mask, &iommu_grp_id, &ei->ioa64.function);
> +	if (ret != 5)
> +		return -EINVAL;
> +
> +	/* Invalid function ? */
> +	if (ei->ioa64.function < OpalEitIoaLoadMemAddr ||
> +	    ei->ioa64.function > OpalEitIoaDmaWriteMemTarget)
> +		return -ERANGE;
> +
> +	/* Retrieve PHB ID and PE number */
> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
> +						 &ei->ioa64.phb_id,
> +						 &ei->ioa64.pe);
> +	if (ret)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static ssize_t errinjct_store(struct kobject *kobj,
> +			      struct kobj_attribute *attr,
> +			      const char *buf, size_t count)
> +{
> +	struct OpalErrinjct ei;
> +	int ret;
> +	long rc;
> +
> +	/* Extract common parameters */
> +	ret = sscanf(buf, "%x", &ei.type);
> +	if (ret != 1)
> +		return -EINVAL;
> +
> +	/* Error injection might be in progress */
> +	if (!mutex_trylock(&errinjct_mutex))
> +		return -EAGAIN;
> +
> +	switch (ei.type) {
> +	case OpalErrinjctTypeIoaBusError:
> +		ret = errinjct_ioa_bus_error(buf, &ei);
> +		break;
> +	case OpalErrinjctTypeIoaBusError64:
> +		ret = errinjct_ioa_bus_error64(buf, &ei);
> +		break;
> +	default:
> +		ret = -ERANGE;
> +	}
> +
> +	/* Invalid parameters ? */
> +	if (ret)
> +		goto mutex_unlock_exit;
> +
> +	/* OPAL call */
> +	rc = opal_err_injct(&ei);
> +	if (rc == OPAL_SUCCESS)
> +		ret = count;
> +	else
> +		ret = -EIO;
> +
> +mutex_unlock_exit:
> +	mutex_unlock(&errinjct_mutex);
> +	return ret;
> +}
> +
> +static struct kobj_attribute errinjct_attr =
> +	__ATTR(errinjct, 0600, NULL, errinjct_store);
> +
> +void __init opal_errinjct_init(void)
> +{
> +	int ret;
> +
> +	/* Make sure /sys/firmware/opal directory is created */
> +	if (!opal_kobj) {
> +		pr_warn("%s: opal kobject is not available\n",
> +			__func__);
> +		return;
> +	}
> +
> +	/* Create the sysfs files */
> +	ret = sysfs_create_file(opal_kobj, &errinjct_attr.attr);
> +	if (ret)
> +		pr_warn("%s: Cannot create sysfs file (%d)\n",
> +			__func__, ret);
> +}
> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
> index 360ad80c..cb29bb5 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -604,6 +604,8 @@ static int __init opal_init(void)
>   		opal_sys_param_init();
>   		/* Setup message log interface. */
>   		opal_msglog_init();
> +		/* Setup error injection interface */
> +		opal_errinjct_init();
>   	}
>
>   	return 0;

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt June 24, 2014, 6:36 a.m. UTC | #4
> Is it reasonable to do error injection with "CONFIG_IOMMU_API" ?
> 
> That means if use default config(CONFIG_IOMMU_API = n),  we can not do 
> error injection to pci devices?

Well we can't pass them through either so ...

In any case, this is not a priority. First we need to implement a solid
error injection facility for the *host*. The guest one is really really
low on the list.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu June 24, 2014, 6:57 a.m. UTC | #5
On 06/24/2014 02:36 PM, Benjamin Herrenschmidt wrote:
>> Is it reasonable to do error injection with "CONFIG_IOMMU_API" ?
>>
>> That means if use default config(CONFIG_IOMMU_API = n),  we can not do
>> error injection to pci devices?
> Well we can't pass them through either so ...
> In any case, this is not a priority. First we need to implement a solid
> error injection facility for the *host*. The guest one is really really

OK.

Is that mean *host* side error injection should base on 
"CONFIG_IOMMU_API" ? If it is just host side(no guest, no pass through), 
can't we do error inject?

Maybe I misunderstand :)

Thanks
Mike
> low on the list.
>
> Cheers,
> Ben.
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt June 24, 2014, 7 a.m. UTC | #6
On Tue, 2014-06-24 at 14:57 +0800, Mike Qiu wrote:
> Is that mean *host* side error injection should base on 
> "CONFIG_IOMMU_API" ? If it is just host side(no guest, no pass through), 
> can't we do error inject?
> 
> Maybe I misunderstand :)

Ah no, make different patches, we don't want to use IOMMU group ID, just
PE numbers. Maybe we should expose in sysfs the PEs from the platform
code with the error injection files underneath ... 

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan June 25, 2014, 12:03 a.m. UTC | #7
On Tue, Jun 24, 2014 at 05:00:52PM +1000, Benjamin Herrenschmidt wrote:
>On Tue, 2014-06-24 at 14:57 +0800, Mike Qiu wrote:
>> Is that mean *host* side error injection should base on 
>> "CONFIG_IOMMU_API" ? If it is just host side(no guest, no pass through), 
>> can't we do error inject?
>> 
>> Maybe I misunderstand :)
>
>Ah no, make different patches, we don't want to use IOMMU group ID, just
>PE numbers. Maybe we should expose in sysfs the PEs from the platform
>code with the error injection files underneath ... 
>

Yeah, "errinjct" needs grab PCI_domain_nr+PE number from sysfs. We
already had PE number sysfs file:

[root@ltcfbl8eb 0000:01:00.1]# pwd
/sys/bus/pci/devices/0000:01:00.1
[root@ltcfbl8eb 0000:01:00.1]# cat eeh_pe_config_addr 
0x1

For guest support, we will rely on VFIO group ioctl command, which
naturally depends on pass-through.

---

We probably implement it like this. If there're anything wrong, please
correct me:

- Introduce EEH callback struct eeh_ops::err_inject(), which will be
  implemented for PowerNV (NULL for pSeries) by calling the PCI error
  injection dedicated OPAL API (opal_pci_err_inject()).
- Introduce global function eeh.c::eeh_err_inject(), which calls to
  eeh_ops::err_inject() and newly introduced VFIO EEH operation
  will be implemented based on this function.
- Introduce debugfs /sys/kernel/debug/powerpc/PCIxxxx/errinjct, which
  receives PCI error injection parameters from "errinjct". It could
  have format: "ei_token:addr:mask:PCI_domain_nr:PE_num:function".
  Eventually, eeh_err_inject() is invoked to call the corresponding
  OPAL API.

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan June 25, 2014, 12:05 a.m. UTC | #8
On Mon, Jun 23, 2014 at 04:36:44PM +1000, Michael Neuling wrote:
>On Mon, 2014-06-23 at 12:14 +1000, Gavin Shan wrote:
>> The patch implements one OPAL firmware sysfs file to support PCI error
>> injection: "/sys/firmware/opal/errinjct", which will be used like the
>> way described as follows.
>> 
>> According to PAPR spec, there are 3 RTAS calls related to error injection:
>> "ibm,open-errinjct": allocate token prior to doing error injection.
>> "ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
>> "ibm,errinjct": do error injection.
>> 
>> Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
>> format "ei_token ...". For now, we only support 32-bits and 64-bits
>> PCI error injection and they should have following strings written to
>> /sys/firmware/opal/errinjct as follows. We don't have corresponding
>> sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
>> means that we rely on userland to maintain the token by itself.
>
>This sounds cool.  
>
>Can you document the sysfs interface in Documentation/powerpc?
>

Yeah, Documentation/powerpc/eeh-pci-error-recovery.txt needs update
as Ben suggested. It's something in my list :-)

Thanks,
Gavin

>> 
>> 32-bits PCI error: "7:addr:mask:iommu_group_id:function".
>> 64-bits PCI error: "8:addr:mask:iommu_group_id:function".
>> 
>> The above "7" and "8" represent 32-bits and 64-bits PCI error seperately
>> and "function" is one of the specific PCI errors (e.g. MMIO access address
>> parity error), which are defined by PAPR spec.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/include/asm/opal.h                |   1 +
>>  arch/powerpc/platforms/powernv/Makefile        |   2 +-
>>  arch/powerpc/platforms/powernv/opal-errinjct.c | 184 +++++++++++++++++++++++++
>>  arch/powerpc/platforms/powernv/opal.c          |   2 +
>>  4 files changed, 188 insertions(+), 1 deletion(-)
>>  create mode 100644 arch/powerpc/platforms/powernv/opal-errinjct.c
>> 
>> diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
>> index d982bb8..bf280d9 100644
>> --- a/arch/powerpc/include/asm/opal.h
>> +++ b/arch/powerpc/include/asm/opal.h
>> @@ -985,6 +985,7 @@ extern int opal_elog_init(void);
>>  extern void opal_platform_dump_init(void);
>>  extern void opal_sys_param_init(void);
>>  extern void opal_msglog_init(void);
>> +extern void opal_errinjct_init(void);
>>  
>>  extern int opal_machine_check(struct pt_regs *regs);
>>  extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
>> diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
>> index 63cebb9..4711de8 100644
>> --- a/arch/powerpc/platforms/powernv/Makefile
>> +++ b/arch/powerpc/platforms/powernv/Makefile
>> @@ -1,7 +1,7 @@
>>  obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
>>  obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
>>  obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
>> -obj-y			+= opal-msglog.o
>> +obj-y			+= opal-msglog.o opal-errinjct.o
>>  
>>  obj-$(CONFIG_SMP)	+= smp.o
>>  obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
>> diff --git a/arch/powerpc/platforms/powernv/opal-errinjct.c b/arch/powerpc/platforms/powernv/opal-errinjct.c
>> new file mode 100644
>> index 0000000..29c9e83
>> --- /dev/null
>> +++ b/arch/powerpc/platforms/powernv/opal-errinjct.c
>> @@ -0,0 +1,184 @@
>> +/*
>> + * The file supports error injection, which works based on OPAL API.
>> + * For now, we only support PCI error injection. We need support
>> + * injecting other types of errors in future.
>> + *
>> + * Copyright Gavin Shan, IBM Corporation 2014.
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/init.h>
>> +#include <linux/msi.h>
>> +#include <linux/pci.h>
>> +#include <linux/iommu.h>
>> +#include <linux/random.h>
>> +#include <linux/slab.h>
>> +#include <linux/sysfs.h>
>> +#include <linux/fs.h>
>> +#include <linux/fcntl.h>
>> +#include <linux/kobject.h>
>> +
>> +#include <asm/msi_bitmap.h>
>> +#include <asm/iommu.h>
>> +#include <asm/pci-bridge.h>
>> +#include <asm/ppc-pci.h>
>> +#include <asm/opal.h>
>> +
>> +#include "powernv.h"
>> +#include "pci.h"
>> +
>> +static DEFINE_MUTEX(errinjct_mutex);
>> +
>> +static int errinjct_iommu_group_to_phb_and_pe(uint32_t iommu_grp_id,
>> +					      uint64_t *phb_id,
>> +					      uint32_t *pe_num)
>> +{
>> +#ifdef CONFIG_IOMMU_API
>> +	struct iommu_group *iommu_grp;
>> +	struct iommu_table *tbl;
>> +	struct pnv_ioda_pe *pe;
>> +
>> +	iommu_grp = iommu_group_get_by_id(iommu_grp_id);
>> +	if (!iommu_grp)
>> +		return -ENODEV;
>> +
>> +	tbl = iommu_group_get_iommudata(iommu_grp);
>> +	if (!tbl)
>> +		return -ENODEV;
>> +
>> +	pe = container_of(tbl, struct pnv_ioda_pe, tce32_table);
>> +	if (!pe->phb)
>> +		return -ENODEV;
>> +
>> +	*phb_id = pe->phb->opal_id;
>> +	*pe_num = pe->pe_number;
>> +
>> +	return 0;
>> +#endif
>> +
>> +	return -ENXIO;
>> +}
>> +
>> +static int errinjct_ioa_bus_error(const char *buf, struct OpalErrinjct *ei)
>> +{
>> +	uint32_t iommu_grp_id;
>> +	int ret;
>> +
>> +	/* Extract parameters */
>> +	ret = sscanf(buf, "%x:%x:%x:%x:%x",
>> +		     &ei->type, &ei->ioa.addr,
>> +		     &ei->ioa.mask, &iommu_grp_id, ei->ioa.function);
>> +	if (ret != 5)
>> +		return -EINVAL;
>> +
>> +	/* Invalid function ? */
>> +	if (ei->ioa.function < OpalEitIoaLoadMemAddr ||
>> +	    ei->ioa.function > OpalEitIoaDmaWriteMemTarget)
>> +		return -ERANGE;
>> +
>> +	/* Retrieve PHB ID and PE number */
>> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
>> +						 &ei->ioa.phb_id,
>> +						 &ei->ioa.pe);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return 0;
>> +}
>> +
>> +static int errinjct_ioa_bus_error64(const char *buf, struct OpalErrinjct *ei)
>> +{
>> +	uint32_t iommu_grp_id;
>> +	int ret;
>> +
>> +	/* Extract parameter */
>> +	ret = sscanf(buf, "%x:%llx:%llx:%x:%x",
>> +		     &ei->type, &ei->ioa64.addr,
>> +		     &ei->ioa64.mask, &iommu_grp_id, &ei->ioa64.function);
>> +	if (ret != 5)
>> +		return -EINVAL;
>> +
>> +	/* Invalid function ? */
>> +	if (ei->ioa64.function < OpalEitIoaLoadMemAddr ||
>> +	    ei->ioa64.function > OpalEitIoaDmaWriteMemTarget)
>> +		return -ERANGE;
>> +
>> +	/* Retrieve PHB ID and PE number */
>> +	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
>> +						 &ei->ioa64.phb_id,
>> +						 &ei->ioa64.pe);
>> +	if (ret)
>> +		return ret;
>> +
>> +	return 0;
>> +}
>> +
>> +static ssize_t errinjct_store(struct kobject *kobj,
>> +			      struct kobj_attribute *attr,
>> +			      const char *buf, size_t count)
>> +{
>> +	struct OpalErrinjct ei;
>> +	int ret;
>> +	long rc;
>> +
>> +	/* Extract common parameters */
>> +	ret = sscanf(buf, "%x", &ei.type);
>> +	if (ret != 1)
>> +		return -EINVAL;
>> +
>> +	/* Error injection might be in progress */
>> +	if (!mutex_trylock(&errinjct_mutex))
>> +		return -EAGAIN;
>> +
>> +	switch (ei.type) {
>> +	case OpalErrinjctTypeIoaBusError:
>> +		ret = errinjct_ioa_bus_error(buf, &ei);
>> +		break;
>> +	case OpalErrinjctTypeIoaBusError64:
>> +		ret = errinjct_ioa_bus_error64(buf, &ei);
>> +		break;
>> +	default:
>> +		ret = -ERANGE;
>> +	}
>> +
>> +	/* Invalid parameters ? */
>> +	if (ret)
>> +		goto mutex_unlock_exit;
>> +
>> +	/* OPAL call */
>> +	rc = opal_err_injct(&ei);
>> +	if (rc == OPAL_SUCCESS)
>> +		ret = count;
>> +	else
>> +		ret = -EIO;
>> +
>> +mutex_unlock_exit:
>> +	mutex_unlock(&errinjct_mutex);
>> +	return ret;
>> +}
>> +
>> +static struct kobj_attribute errinjct_attr =
>> +	__ATTR(errinjct, 0600, NULL, errinjct_store);
>> +
>> +void __init opal_errinjct_init(void)
>> +{
>> +	int ret;
>> +
>> +	/* Make sure /sys/firmware/opal directory is created */
>> +	if (!opal_kobj) {
>> +		pr_warn("%s: opal kobject is not available\n",
>> +			__func__);
>> +		return;
>> +	}
>> +
>> +	/* Create the sysfs files */
>> +	ret = sysfs_create_file(opal_kobj, &errinjct_attr.attr);
>> +	if (ret)
>> +		pr_warn("%s: Cannot create sysfs file (%d)\n",
>> +			__func__, ret);
>> +}
>> diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
>> index 360ad80c..cb29bb5 100644
>> --- a/arch/powerpc/platforms/powernv/opal.c
>> +++ b/arch/powerpc/platforms/powernv/opal.c
>> @@ -604,6 +604,8 @@ static int __init opal_init(void)
>>  		opal_sys_param_init();
>>  		/* Setup message log interface. */
>>  		opal_msglog_init();
>> +		/* Setup error injection interface */
>> +		opal_errinjct_init();
>>  	}
>>  
>>  	return 0;
>
>--
>To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu June 25, 2014, 3:05 a.m. UTC | #9
On 06/25/2014 08:03 AM, Gavin Shan wrote:
> On Tue, Jun 24, 2014 at 05:00:52PM +1000, Benjamin Herrenschmidt wrote:
>> On Tue, 2014-06-24 at 14:57 +0800, Mike Qiu wrote:
>>> Is that mean *host* side error injection should base on
>>> "CONFIG_IOMMU_API" ? If it is just host side(no guest, no pass through),
>>> can't we do error inject?
>>>
>>> Maybe I misunderstand :)
>> Ah no, make different patches, we don't want to use IOMMU group ID, just
>> PE numbers. Maybe we should expose in sysfs the PEs from the platform
>> code with the error injection files underneath ...
>>
> Yeah, "errinjct" needs grab PCI_domain_nr+PE number from sysfs. We
> already had PE number sysfs file:
>
> [root@ltcfbl8eb 0000:01:00.1]# pwd
> /sys/bus/pci/devices/0000:01:00.1
> [root@ltcfbl8eb 0000:01:00.1]# cat eeh_pe_config_addr
> 0x1
>
> For guest support, we will rely on VFIO group ioctl command, which
> naturally depends on pass-through.
>
> ---
>
> We probably implement it like this. If there're anything wrong, please
> correct me:
>
> - Introduce EEH callback struct eeh_ops::err_inject(), which will be
>    implemented for PowerNV (NULL for pSeries) by calling the PCI error
>    injection dedicated OPAL API (opal_pci_err_inject()).
> - Introduce global function eeh.c::eeh_err_inject(), which calls to
>    eeh_ops::err_inject() and newly introduced VFIO EEH operation
>    will be implemented based on this function.
> - Introduce debugfs /sys/kernel/debug/powerpc/PCIxxxx/errinjct, which

Here maybe  "/sys/kernel/debug/powerpc/errinjct" is better, because it 
will supply "PCI_domain_nr" in parameters, so no need supply errinjct 
for each PCI domain.

Another reason is error inject not only for PCI(in future), so better 
not in PCI domain entry.

Also it simple for userland tools to has a fixed path.

Thanks
Mike

>    receives PCI error injection parameters from "errinjct". It could
>    have format: "ei_token:addr:mask:PCI_domain_nr:PE_num:function".
>    Eventually, eeh_err_inject() is invoked to call the corresponding
>    OPAL API.
>
> Thanks,
> Gavin
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt June 25, 2014, 3:19 a.m. UTC | #10
On Wed, 2014-06-25 at 11:05 +0800, Mike Qiu wrote:
> Here maybe  "/sys/kernel/debug/powerpc/errinjct" is better, because
> it 
> will supply "PCI_domain_nr" in parameters, so no need supply errinjct 
> for each PCI domain.
> 
> Another reason is error inject not only for PCI(in future), so better 
> not in PCI domain entry.
> 
> Also it simple for userland tools to has a fixed path.

I don't like this. I much prefer have dedicated error injection files
in their respective locations, something for PCI under the corresponding
PCI bridge etc...

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stewart Smith June 26, 2014, 4:48 a.m. UTC | #11
Gavin Shan <gwshan@linux.vnet.ibm.com> writes:
> On Mon, Jun 23, 2014 at 04:36:44PM +1000, Michael Neuling wrote:
>>On Mon, 2014-06-23 at 12:14 +1000, Gavin Shan wrote:
>>> The patch implements one OPAL firmware sysfs file to support PCI error
>>> injection: "/sys/firmware/opal/errinjct", which will be used like the
>>> way described as follows.
>>> 
>>> According to PAPR spec, there are 3 RTAS calls related to error injection:
>>> "ibm,open-errinjct": allocate token prior to doing error injection.
>>> "ibm,close-errinjct": release the token allocated from "ibm,open-errinjct".
>>> "ibm,errinjct": do error injection.
>>> 
>>> Sysfs file /sys/firmware/opal/errinjct accepts strings that have fixed
>>> format "ei_token ...". For now, we only support 32-bits and 64-bits
>>> PCI error injection and they should have following strings written to
>>> /sys/firmware/opal/errinjct as follows. We don't have corresponding
>>> sysfs files for "ibm,open-errinjct" and "ibm,close-errinjct", which
>>> means that we rely on userland to maintain the token by itself.
>>
>>This sounds cool.  
>>
>>Can you document the sysfs interface in Documentation/powerpc?
>>
>
> Yeah, Documentation/powerpc/eeh-pci-error-recovery.txt needs update
> as Ben suggested. It's something in my list :-)

It should probably also/instead be in
Documentation/ABI/(testing|stable)/sysfs-firmware-opal-errinjct  as this
seems to be where sysfs bits get documented.

Also, considering that we're specifically looking at PCI error
injection, should the sysfs name be /sys/firmware/opal/pci-error-inject
instead?

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stewart Smith June 26, 2014, 4:52 a.m. UTC | #12
Gavin Shan <gwshan@linux.vnet.ibm.com> writes:
> +static struct kobj_attribute errinjct_attr =
> +	__ATTR(errinjct, 0600, NULL, errinjct_store);

May also be good to have a read method that either lists current
injected errors? I guess it depends on if they're one time errors or
persistent errors too.

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu July 21, 2014, 8:06 a.m. UTC | #13
On 06/25/2014 11:19 AM, Benjamin Herrenschmidt wrote:
> On Wed, 2014-06-25 at 11:05 +0800, Mike Qiu wrote:
>> Here maybe  "/sys/kernel/debug/powerpc/errinjct" is better, because
>> it
>> will supply "PCI_domain_nr" in parameters, so no need supply errinjct
>> for each PCI domain.
>>
>> Another reason is error inject not only for PCI(in future), so better
>> not in PCI domain entry.
>>
>> Also it simple for userland tools to has a fixed path.
> I don't like this. I much prefer have dedicated error injection files
> in their respective locations, something for PCI under the corresponding
> PCI bridge etc...

So PowerNV error injection will be designed rely on debugfs been 
configured, right?

Thanks,
Mike
>
> Cheers,
> Ben.
>
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt July 21, 2014, 10:49 p.m. UTC | #14
On Mon, 2014-07-21 at 16:06 +0800, Mike Qiu wrote:
> > I don't like this. I much prefer have dedicated error injection files
> > in their respective locations, something for PCI under the corresponding
> > PCI bridge etc...
> 
> So PowerNV error injection will be designed rely on debugfs been 
> configured, right?

Not necessarily. If we create a better debugfs layout for our PHBs, then
yes. It might be useful to provide more info in there for example access
to some of the counters ...

But on the other hand, for error injection in general, I wonder if we should
be under sysfs instead... something to study a bit.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu July 22, 2014, 3:10 a.m. UTC | #15
On 07/22/2014 06:49 AM, Benjamin Herrenschmidt wrote:
> On Mon, 2014-07-21 at 16:06 +0800, Mike Qiu wrote:
>>> I don't like this. I much prefer have dedicated error injection files
>>> in their respective locations, something for PCI under the corresponding
>>> PCI bridge etc...
>> So PowerNV error injection will be designed rely on debugfs been
>> configured, right?
> Not necessarily. If we create a better debugfs layout for our PHBs, then
> yes. It might be useful to provide more info in there for example access
> to some of the counters ...
>
> But on the other hand, for error injection in general, I wonder if we should
> be under sysfs instead... something to study a bit.

In pHyp, general error injection use syscall:

     #define __NR_rtas        255

I don't know if it is a good idea to reuse this syscall for PowerNV.

At least, it is another choice without sysfs rely.

Thanks,
Mike

>
> Cheers,
> Ben.
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt July 22, 2014, 3:21 a.m. UTC | #16
On Tue, 2014-07-22 at 11:10 +0800, Mike Qiu wrote:
> On 07/22/2014 06:49 AM, Benjamin Herrenschmidt wrote:
> > On Mon, 2014-07-21 at 16:06 +0800, Mike Qiu wrote:
> >>> I don't like this. I much prefer have dedicated error injection files
> >>> in their respective locations, something for PCI under the corresponding
> >>> PCI bridge etc...
> >> So PowerNV error injection will be designed rely on debugfs been
> >> configured, right?
> > Not necessarily. If we create a better debugfs layout for our PHBs, then
> > yes. It might be useful to provide more info in there for example access
> > to some of the counters ...
> >
> > But on the other hand, for error injection in general, I wonder if we should
> > be under sysfs instead... something to study a bit.
> 
> In pHyp, general error injection use syscall:
> 
>      #define __NR_rtas        255
> 
> I don't know if it is a good idea to reuse this syscall for PowerNV.
> 
> At least, it is another choice without sysfs rely.

No, we certainly don't want that RTAS stuff. I though Linux had some
kind of error injection infrastructure nowadays... somebody needs to
have a look.

Cheers,
Ben.


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan July 22, 2014, 3:26 a.m. UTC | #17
On Tue, Jul 22, 2014 at 11:10:42AM +0800, Mike Qiu wrote:
>On 07/22/2014 06:49 AM, Benjamin Herrenschmidt wrote:
>>On Mon, 2014-07-21 at 16:06 +0800, Mike Qiu wrote:
>>>>I don't like this. I much prefer have dedicated error injection files
>>>>in their respective locations, something for PCI under the corresponding
>>>>PCI bridge etc...
>>>So PowerNV error injection will be designed rely on debugfs been
>>>configured, right?
>>Not necessarily. If we create a better debugfs layout for our PHBs, then
>>yes. It might be useful to provide more info in there for example access
>>to some of the counters ...
>>
>>But on the other hand, for error injection in general, I wonder if we should
>>be under sysfs instead... something to study a bit.
>
>In pHyp, general error injection use syscall:
>
>    #define __NR_rtas        255
>
>I don't know if it is a good idea to reuse this syscall for PowerNV.
>
>At least, it is another choice without sysfs rely.
>

We won't use syscall for routing the error injection on PowerNV any more.
Generally speaking, we will use ioctl commands or subcode of EEH ioctl
command, which was invented for EEH support for VFIO devices to suport
QEMU. For the utility (errinjct) running on PowerNV, we will use debugfs
entries. I have premature code for that, but don't have chance to polish
it yet. Let me send you that so that you can start working from there.

Thanks,
Gavin 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mike Qiu July 22, 2014, 4 a.m. UTC | #18
On 07/22/2014 11:26 AM, Gavin Shan wrote:
> On Tue, Jul 22, 2014 at 11:10:42AM +0800, Mike Qiu wrote:
>> On 07/22/2014 06:49 AM, Benjamin Herrenschmidt wrote:
>>> On Mon, 2014-07-21 at 16:06 +0800, Mike Qiu wrote:
>>>>> I don't like this. I much prefer have dedicated error injection files
>>>>> in their respective locations, something for PCI under the corresponding
>>>>> PCI bridge etc...
>>>> So PowerNV error injection will be designed rely on debugfs been
>>>> configured, right?
>>> Not necessarily. If we create a better debugfs layout for our PHBs, then
>>> yes. It might be useful to provide more info in there for example access
>>> to some of the counters ...
>>>
>>> But on the other hand, for error injection in general, I wonder if we should
>>> be under sysfs instead... something to study a bit.
>> In pHyp, general error injection use syscall:
>>
>>     #define __NR_rtas        255
>>
>> I don't know if it is a good idea to reuse this syscall for PowerNV.
>>
>> At least, it is another choice without sysfs rely.
>>
> We won't use syscall for routing the error injection on PowerNV any more.
> Generally speaking, we will use ioctl commands or subcode of EEH ioctl
> command, which was invented for EEH support for VFIO devices to suport
> QEMU. For the utility (errinjct) running on PowerNV, we will use debugfs
> entries. I have premature code for that, but don't have chance to polish
> it yet. Let me send you that so that you can start working from there.

OK, thanks
> Thanks,
> Gavin
>
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index d982bb8..bf280d9 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -985,6 +985,7 @@  extern int opal_elog_init(void);
 extern void opal_platform_dump_init(void);
 extern void opal_sys_param_init(void);
 extern void opal_msglog_init(void);
+extern void opal_errinjct_init(void);
 
 extern int opal_machine_check(struct pt_regs *regs);
 extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 63cebb9..4711de8 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -1,7 +1,7 @@ 
 obj-y			+= setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
-obj-y			+= opal-msglog.o
+obj-y			+= opal-msglog.o opal-errinjct.o
 
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
diff --git a/arch/powerpc/platforms/powernv/opal-errinjct.c b/arch/powerpc/platforms/powernv/opal-errinjct.c
new file mode 100644
index 0000000..29c9e83
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-errinjct.c
@@ -0,0 +1,184 @@ 
+/*
+ * The file supports error injection, which works based on OPAL API.
+ * For now, we only support PCI error injection. We need support
+ * injecting other types of errors in future.
+ *
+ * Copyright Gavin Shan, IBM Corporation 2014.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/kobject.h>
+
+#include <asm/msi_bitmap.h>
+#include <asm/iommu.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+static DEFINE_MUTEX(errinjct_mutex);
+
+static int errinjct_iommu_group_to_phb_and_pe(uint32_t iommu_grp_id,
+					      uint64_t *phb_id,
+					      uint32_t *pe_num)
+{
+#ifdef CONFIG_IOMMU_API
+	struct iommu_group *iommu_grp;
+	struct iommu_table *tbl;
+	struct pnv_ioda_pe *pe;
+
+	iommu_grp = iommu_group_get_by_id(iommu_grp_id);
+	if (!iommu_grp)
+		return -ENODEV;
+
+	tbl = iommu_group_get_iommudata(iommu_grp);
+	if (!tbl)
+		return -ENODEV;
+
+	pe = container_of(tbl, struct pnv_ioda_pe, tce32_table);
+	if (!pe->phb)
+		return -ENODEV;
+
+	*phb_id = pe->phb->opal_id;
+	*pe_num = pe->pe_number;
+
+	return 0;
+#endif
+
+	return -ENXIO;
+}
+
+static int errinjct_ioa_bus_error(const char *buf, struct OpalErrinjct *ei)
+{
+	uint32_t iommu_grp_id;
+	int ret;
+
+	/* Extract parameters */
+	ret = sscanf(buf, "%x:%x:%x:%x:%x",
+		     &ei->type, &ei->ioa.addr,
+		     &ei->ioa.mask, &iommu_grp_id, ei->ioa.function);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Invalid function ? */
+	if (ei->ioa.function < OpalEitIoaLoadMemAddr ||
+	    ei->ioa.function > OpalEitIoaDmaWriteMemTarget)
+		return -ERANGE;
+
+	/* Retrieve PHB ID and PE number */
+	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
+						 &ei->ioa.phb_id,
+						 &ei->ioa.pe);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int errinjct_ioa_bus_error64(const char *buf, struct OpalErrinjct *ei)
+{
+	uint32_t iommu_grp_id;
+	int ret;
+
+	/* Extract parameter */
+	ret = sscanf(buf, "%x:%llx:%llx:%x:%x",
+		     &ei->type, &ei->ioa64.addr,
+		     &ei->ioa64.mask, &iommu_grp_id, &ei->ioa64.function);
+	if (ret != 5)
+		return -EINVAL;
+
+	/* Invalid function ? */
+	if (ei->ioa64.function < OpalEitIoaLoadMemAddr ||
+	    ei->ioa64.function > OpalEitIoaDmaWriteMemTarget)
+		return -ERANGE;
+
+	/* Retrieve PHB ID and PE number */
+	ret = errinjct_iommu_group_to_phb_and_pe(iommu_grp_id,
+						 &ei->ioa64.phb_id,
+						 &ei->ioa64.pe);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static ssize_t errinjct_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t count)
+{
+	struct OpalErrinjct ei;
+	int ret;
+	long rc;
+
+	/* Extract common parameters */
+	ret = sscanf(buf, "%x", &ei.type);
+	if (ret != 1)
+		return -EINVAL;
+
+	/* Error injection might be in progress */
+	if (!mutex_trylock(&errinjct_mutex))
+		return -EAGAIN;
+
+	switch (ei.type) {
+	case OpalErrinjctTypeIoaBusError:
+		ret = errinjct_ioa_bus_error(buf, &ei);
+		break;
+	case OpalErrinjctTypeIoaBusError64:
+		ret = errinjct_ioa_bus_error64(buf, &ei);
+		break;
+	default:
+		ret = -ERANGE;
+	}
+
+	/* Invalid parameters ? */
+	if (ret)
+		goto mutex_unlock_exit;
+
+	/* OPAL call */
+	rc = opal_err_injct(&ei);
+	if (rc == OPAL_SUCCESS)
+		ret = count;
+	else
+		ret = -EIO;
+
+mutex_unlock_exit:
+	mutex_unlock(&errinjct_mutex);
+	return ret;
+}
+
+static struct kobj_attribute errinjct_attr =
+	__ATTR(errinjct, 0600, NULL, errinjct_store);
+
+void __init opal_errinjct_init(void)
+{
+	int ret;
+
+	/* Make sure /sys/firmware/opal directory is created */
+	if (!opal_kobj) {
+		pr_warn("%s: opal kobject is not available\n",
+			__func__);
+		return;
+	}
+
+	/* Create the sysfs files */
+	ret = sysfs_create_file(opal_kobj, &errinjct_attr.attr);
+	if (ret)
+		pr_warn("%s: Cannot create sysfs file (%d)\n",
+			__func__, ret);
+}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 360ad80c..cb29bb5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -604,6 +604,8 @@  static int __init opal_init(void)
 		opal_sys_param_init();
 		/* Setup message log interface. */
 		opal_msglog_init();
+		/* Setup error injection interface */
+		opal_errinjct_init();
 	}
 
 	return 0;