diff mbox series

[RFC,linux] powerpc/powernv: Add support for OPAL_DEBUG_READ/WRITE

Message ID 20200917163544.142593-8-clg@kaod.org
State Not Applicable
Headers show
Series [RFC,linux] powerpc/powernv: Add support for OPAL_DEBUG_READ/WRITE | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (d362ae4f4c521a7faffb1befe2fbba467f2c4d18)

Commit Message

Cédric Le Goater Sept. 17, 2020, 4:35 p.m. UTC
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 arch/powerpc/include/asm/opal-api.h         |   4 +-
 arch/powerpc/include/asm/opal.h             |   2 +
 arch/powerpc/platforms/powernv/opal-call.c  |   2 +
 arch/powerpc/platforms/powernv/opal-debug.c | 124 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal.c       |   2 +
 arch/powerpc/platforms/powernv/Makefile     |   1 +
 6 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-debug.c

Comments

Oliver O'Halloran Sept. 28, 2020, 12:30 a.m. UTC | #1
On Fri, Sep 18, 2020 at 2:35 AM Cédric Le Goater <clg@kaod.org> wrote:
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> diff --git a/arch/powerpc/platforms/powernv/opal-debug.c b/arch/powerpc/platforms/powernv/opal-debug.c
> new file mode 100644
> index 000000000000..59fe9ea310a1
> --- /dev/null
> +++ b/arch/powerpc/platforms/powernv/opal-debug.c
> @@ -0,0 +1,124 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * PowerNV OPAL debugfs interface
> + *
> + * Copyright 2020 IBM Corp.
> + */
> +
> +#define pr_fmt(fmt)     "opal-stat: " fmt
> +
> +#include <asm/io.h>
> +#include <asm/opal.h>
> +#include <asm/debugfs.h>
> +#include <linux/debugfs.h>
> +#include <linux/seq_file.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/of_platform.h>
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +
> +static int opal_debug_show(struct seq_file *m, void *p)
> +{
> +       u64 id = (u64) m->private;
> +       int size = PAGE_SIZE;

If we're going to do this then we really need to be able to support
arbitrarily large outputs IMO. That would probably require us to
support seeking on the OPAL side since we can't spend forever inside
of OPAL with interrupts off just printing stuff. That said, it'd be
good if we could avoid over complicating all this.

Maybe we can get away with allocating a single large buffer at boot
and having all the opal_debug_read() calls use that under a mutex. It
probably wouldn't help with the interrupt problem, but maybe it
doesn't matter. Printing text is reasonably fast provided you aren't
actually doing IO so it might not matter.

> +       char *buf;
> +       int rc;
> +
> +       buf = kzalloc(size, GFP_KERNEL);
> +       if (!buf)
> +               return -ENOMEM;
> +
> +       rc = opal_debug_read(id, __pa(buf), size);
> +       if (rc < 0) {
> +               rc = opal_error_code(rc);
> +       } else {
> +               seq_puts(m, buf);
> +               rc = 0;
> +       }
> +
> +       kfree(buf);
> +       return rc;
> +}
> +
> +static int opal_debug_open(struct inode *inode, struct file *file)
> +{
> +       return single_open(file, opal_debug_show, inode->i_private);
> +}
> +
> +static ssize_t opal_debug_do_write(struct file *file, const char __user *user_buf,
> +                              size_t count, loff_t *ppos)
> +{
> +       u64 id = (u64) file_inode(file)->i_private;
> +       char buf[8] = { 0 };
> +       char *data;
> +       size_t size;
> +       int rc;
> +
> +       size = min(sizeof(buf) - 1, count);
> +       if (copy_from_user(buf, user_buf, size))
> +               return -EFAULT;
> +       data = strim(buf);
> +
> +       rc = opal_debug_write(id, __pa(data), size);
> +       if (rc < 0)
> +               return opal_error_code(rc);
> +
> +       return count;
> +}
> +
> +static const struct file_operations opal_debug_ops = {
> +       .open           = opal_debug_open,
> +       .write          = opal_debug_do_write,
> +       .read           = seq_read,
> +       .llseek         = no_llseek,
> +       .release        = single_release,
> +};
> +
> +static struct dentry *debug_dir;
> +
> +static int opal_debug_probe(struct platform_device *pdev)
> +{
> +       struct device_node *node = pdev->dev.of_node;
> +       u32 reg;
> +       const char *label;
> +       const char *name;
> +       u32 chip_id;
> +
> +       if (of_property_read_u32(node, "reg", &reg)) {
> +               pr_warn("OPAL: 'reg' property not found\n");
> +               return -ENODEV;
> +       }
> +
> +       if (of_property_read_string(node, "label", &label))
> +               label = node->name;
> +
> +       if (!of_property_read_u32(node, "ibm,chip-id", &chip_id))
> +               name = kasprintf(GFP_KERNEL, "%s-%x", label, chip_id);
> +       else
> +               name = kasprintf(GFP_KERNEL, "%s", label);
> +
> +       if (!debug_dir)
> +               debug_dir = debugfs_create_dir("opal-debug",
> +                                              powerpc_debugfs_root);
> +
> +       debugfs_create_file(name, 0644, debug_dir, (void *) (u64) reg,
> +                           &opal_debug_ops);

I think having all these in one directory is going to get a bit
unwieldy. Each PHB has 13 IODA tables and I'd like to have a seperate
debugfs file for each. Each chip has 6x PHBs so we'd have 13 * 6 * 2
(156) files for PHB debug alone in the same directory. Maybe add an
extra string to the DT which specifies a subdirectory name?

That said, for the PHBs we already have a debugfs directory for each
of them at /sys/kernel/debug/PCI<xxxx>/. It might be nice if we could
insert these opal debug files under the existing PHB debugfs directory
rather than having a seperate set of files elsewhere. If we're going
to do that we might need to stop building platform devices and just
have a helper built into the powernv platform which parses the
ibm,opal-debug node and inserts debugfs files. Having the helper set
OF_POPULATED so it doesn't get re-added might also work.

Oliver
Cédric Le Goater Sept. 28, 2020, 7:42 a.m. UTC | #2
On 9/28/20 2:30 AM, Oliver O'Halloran wrote:
> On Fri, Sep 18, 2020 at 2:35 AM Cédric Le Goater <clg@kaod.org> wrote:
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>> diff --git a/arch/powerpc/platforms/powernv/opal-debug.c b/arch/powerpc/platforms/powernv/opal-debug.c
>> new file mode 100644
>> index 000000000000..59fe9ea310a1
>> --- /dev/null
>> +++ b/arch/powerpc/platforms/powernv/opal-debug.c
>> @@ -0,0 +1,124 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * PowerNV OPAL debugfs interface
>> + *
>> + * Copyright 2020 IBM Corp.
>> + */
>> +
>> +#define pr_fmt(fmt)     "opal-stat: " fmt
>> +
>> +#include <asm/io.h>
>> +#include <asm/opal.h>
>> +#include <asm/debugfs.h>
>> +#include <linux/debugfs.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/of.h>
>> +#include <linux/of_address.h>
>> +#include <linux/of_platform.h>
>> +#include <linux/types.h>
>> +#include <linux/slab.h>
>> +
>> +static int opal_debug_show(struct seq_file *m, void *p)
>> +{
>> +       u64 id = (u64) m->private;
>> +       int size = PAGE_SIZE;
> 
> If we're going to do this then we really need to be able to support
> arbitrarily large outputs IMO. That would probably require us to
> support seeking on the OPAL side since we can't spend forever inside
> of OPAL with interrupts off just printing stuff. That said, it'd be
> good if we could avoid over complicating all this.

Fixing OPAL snprintf() would give more flexibility. we could initiate
the read with medium size buffers and retry from Linux if more space is 
needed. 

That said, the XIVE internal tables output can be quite big and even
one page is not enough. One idea could be to add a property hint on
the DT node for the initial allocation. 

> Maybe we can get away with allocating a single large buffer at boot
> and having all the opal_debug_read() calls use that under a mutex.

ok, and, if we start to have contention, we can always put in place a 
smarter scheme.

> It probably wouldn't help with the interrupt problem, but maybe it
> doesn't matter. Printing text is reasonably fast provided you aren't
> actually doing IO so it might not matter.

yes. the read side should just really dump values but locking in the 
OPAL driver could slow things a bit may be.

> 
>> +       char *buf;
>> +       int rc;
>> +
>> +       buf = kzalloc(size, GFP_KERNEL);
>> +       if (!buf)
>> +               return -ENOMEM;
>> +
>> +       rc = opal_debug_read(id, __pa(buf), size);
>> +       if (rc < 0) {
>> +               rc = opal_error_code(rc);
>> +       } else {
>> +               seq_puts(m, buf);
>> +               rc = 0;
>> +       }
>> +
>> +       kfree(buf);
>> +       return rc;
>> +}
>> +
>> +static int opal_debug_open(struct inode *inode, struct file *file)
>> +{
>> +       return single_open(file, opal_debug_show, inode->i_private);
>> +}
>> +
>> +static ssize_t opal_debug_do_write(struct file *file, const char __user *user_buf,
>> +                              size_t count, loff_t *ppos)
>> +{
>> +       u64 id = (u64) file_inode(file)->i_private;
>> +       char buf[8] = { 0 };
>> +       char *data;
>> +       size_t size;
>> +       int rc;
>> +
>> +       size = min(sizeof(buf) - 1, count);
>> +       if (copy_from_user(buf, user_buf, size))
>> +               return -EFAULT;
>> +       data = strim(buf);
>> +
>> +       rc = opal_debug_write(id, __pa(data), size);
>> +       if (rc < 0)
>> +               return opal_error_code(rc);
>> +
>> +       return count;
>> +}
>> +
>> +static const struct file_operations opal_debug_ops = {
>> +       .open           = opal_debug_open,
>> +       .write          = opal_debug_do_write,
>> +       .read           = seq_read,
>> +       .llseek         = no_llseek,
>> +       .release        = single_release,
>> +};
>> +
>> +static struct dentry *debug_dir;
>> +
>> +static int opal_debug_probe(struct platform_device *pdev)
>> +{
>> +       struct device_node *node = pdev->dev.of_node;
>> +       u32 reg;
>> +       const char *label;
>> +       const char *name;
>> +       u32 chip_id;
>> +
>> +       if (of_property_read_u32(node, "reg", &reg)) {
>> +               pr_warn("OPAL: 'reg' property not found\n");
>> +               return -ENODEV;
>> +       }
>> +
>> +       if (of_property_read_string(node, "label", &label))
>> +               label = node->name;
>> +
>> +       if (!of_property_read_u32(node, "ibm,chip-id", &chip_id))
>> +               name = kasprintf(GFP_KERNEL, "%s-%x", label, chip_id);
>> +       else
>> +               name = kasprintf(GFP_KERNEL, "%s", label);
>> +
>> +       if (!debug_dir)
>> +               debug_dir = debugfs_create_dir("opal-debug",
>> +                                              powerpc_debugfs_root);
>> +
>> +       debugfs_create_file(name, 0644, debug_dir, (void *) (u64) reg,
>> +                           &opal_debug_ops);
> 
> I think having all these in one directory is going to get a bit
> unwieldy. Each PHB has 13 IODA tables and I'd like to have a seperate
> debugfs file for each. Each chip has 6x PHBs so we'd have 13 * 6 * 2
> (156) files for PHB debug alone in the same directory. 

We need some kind hierarchy. I have hacked my way through for XIVE with 
a chip-id prefix but that doesn't scale well.

> Maybe add an extra string to the DT which specifies a subdirectory name?

Yes. I suppose the name could be an OPAL device name or an OPAL subsystem.

> That said, for the PHBs we already have a debugfs directory for each
> of them at /sys/kernel/debug/PCI<xxxx>/. It might be nice if we could
> insert these opal debug files under the existing PHB debugfs directory
> rather than having a seperate set of files elsewhere. If we're going> to do that we might need to stop building platform devices and just
> have a helper built into the powernv platform which parses the
> ibm,opal-debug node and inserts debugfs files. Having the helper set
> OF_POPULATED so it doesn't get re-added might also work.
 
That would probably fit better with the existing Linux PowerNV code  
which already adds debugfs dirs and files. The drivers would be
responsible for parsing their 'ibm,opal-debug' nodes and inserting 
the associated debugfs files where ever they want.

In that case, we don't need a "debug" directory in the DT containing 
all the 'ibm,opal-debug' nodes. They can be under the DT node of the 
device directly.


Thanks,

C.
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 1dffa3cb16ba..1a4ac8d1d944 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -214,7 +214,9 @@ 
 #define OPAL_SECVAR_GET				176
 #define OPAL_SECVAR_GET_NEXT			177
 #define OPAL_SECVAR_ENQUEUE_UPDATE		178
-#define OPAL_LAST				178
+#define OPAL_DEBUG_READ				181
+#define OPAL_DEBUG_WRITE			182
+#define OPAL_LAST				182
 
 #define QUIESCE_HOLD			1 /* Spin all calls at entry */
 #define QUIESCE_REJECT			2 /* Fail all calls with OPAL_BUSY */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac34b8e2..97f236180abc 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -311,6 +311,8 @@  s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
 
 s64 opal_signal_system_reset(s32 cpu);
 s64 opal_quiesce(u64 shutdown_type, s32 cpu);
+int64_t opal_debug_read(uint64_t id, uint64_t buf, uint64_t size);
+int64_t opal_debug_write(uint64_t id, uint64_t buf, uint64_t size);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52d258f..c7e109220956 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -293,3 +293,5 @@  OPAL_CALL(opal_mpipl_query_tag,			OPAL_MPIPL_QUERY_TAG);
 OPAL_CALL(opal_secvar_get,			OPAL_SECVAR_GET);
 OPAL_CALL(opal_secvar_get_next,			OPAL_SECVAR_GET_NEXT);
 OPAL_CALL(opal_secvar_enqueue_update,		OPAL_SECVAR_ENQUEUE_UPDATE);
+OPAL_CALL(opal_debug_read,			OPAL_DEBUG_READ);
+OPAL_CALL(opal_debug_write,			OPAL_DEBUG_WRITE);
diff --git a/arch/powerpc/platforms/powernv/opal-debug.c b/arch/powerpc/platforms/powernv/opal-debug.c
new file mode 100644
index 000000000000..59fe9ea310a1
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-debug.c
@@ -0,0 +1,124 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerNV OPAL debugfs interface
+ *
+ * Copyright 2020 IBM Corp.
+ */
+
+#define pr_fmt(fmt)     "opal-stat: " fmt
+
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/debugfs.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+
+static int opal_debug_show(struct seq_file *m, void *p)
+{
+	u64 id = (u64) m->private;
+	int size = PAGE_SIZE;
+	char *buf;
+	int rc;
+
+	buf = kzalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	rc = opal_debug_read(id, __pa(buf), size);
+	if (rc < 0) {
+		rc = opal_error_code(rc);
+	} else {
+		seq_puts(m, buf);
+		rc = 0;
+	}
+
+	kfree(buf);
+	return rc;
+}
+
+static int opal_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, opal_debug_show, inode->i_private);
+}
+
+static ssize_t opal_debug_do_write(struct file *file, const char __user *user_buf,
+			       size_t count, loff_t *ppos)
+{
+	u64 id = (u64) file_inode(file)->i_private;
+	char buf[8] = { 0 };
+	char *data;
+	size_t size;
+	int rc;
+
+	size = min(sizeof(buf) - 1, count);
+	if (copy_from_user(buf, user_buf, size))
+		return -EFAULT;
+	data = strim(buf);
+
+	rc = opal_debug_write(id, __pa(data), size);
+	if (rc < 0)
+		return opal_error_code(rc);
+
+	return count;
+}
+
+static const struct file_operations opal_debug_ops = {
+	.open		= opal_debug_open,
+	.write		= opal_debug_do_write,
+	.read		= seq_read,
+	.llseek		= no_llseek,
+	.release	= single_release,
+};
+
+static struct dentry *debug_dir;
+
+static int opal_debug_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	u32 reg;
+	const char *label;
+	const char *name;
+	u32 chip_id;
+
+	if (of_property_read_u32(node, "reg", &reg)) {
+		pr_warn("OPAL: 'reg' property not found\n");
+		return -ENODEV;
+	}
+
+	if (of_property_read_string(node, "label", &label))
+		label = node->name;
+
+	if (!of_property_read_u32(node, "ibm,chip-id", &chip_id))
+		name = kasprintf(GFP_KERNEL, "%s-%x", label, chip_id);
+	else
+		name = kasprintf(GFP_KERNEL, "%s", label);
+
+	if (!debug_dir)
+		debug_dir = debugfs_create_dir("opal-debug",
+					       powerpc_debugfs_root);
+
+	debugfs_create_file(name, 0644, debug_dir, (void *) (u64) reg,
+			    &opal_debug_ops);
+	kfree(name);
+	return 0;
+}
+
+static const struct of_device_id opal_debug_of_match[] = {
+	{ .compatible = "ibm,opal-debug" },
+	{ }
+};
+
+static struct platform_driver opal_debug_driver = {
+	.probe = opal_debug_probe,
+	.driver = {
+		.name = "opal-debug",
+		.of_match_table = opal_debug_of_match,
+	},
+};
+
+builtin_platform_driver(opal_debug_driver);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index d95954ad4c0a..c883cf0db655 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -1093,6 +1093,8 @@  static int __init opal_init(void)
 	/* Initialize OPAL secure variables */
 	opal_pdev_init("ibm,secvar-backend");
 
+	opal_pdev_init("ibm,opal-debug");
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 2eb6ae150d1f..9cfd3942e0e8 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,6 +5,7 @@  obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
 obj-y			+= ultravisor.o
+obj-y			+= opal-debug.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_FA_DUMP)	+= opal-fadump.o