diff mbox series

[RFC,v2,1/3] Add sysfs attribute for CXL 1.1 device link status

Message ID 20240227083313.87699-2-kobayashi.da-06@fujitsu.com
State New
Headers show
Series Display cxl1.1 device link status | expand

Commit Message

Kobayashi,Daisuke Feb. 27, 2024, 8:33 a.m. UTC
This patch implements a process to output the link status information 
of the CXL1.1 device to sysfs. The values of the registers related to 
the link status are outputted into three separate files.

In CXL1.1, the link status of the device is included in the RCRB mapped to
the memory mapped register area. This function accesses the address where
the device's RCRB is mapped.

Signed-off-by: "Kobayashi,Daisuke" <kobayashi.da-06@fujitsu.com>
---
 drivers/cxl/pci.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)

Comments

Bjorn Helgaas Feb. 27, 2024, 4:43 p.m. UTC | #1
On Tue, Feb 27, 2024 at 05:33:11PM +0900, Kobayashi,Daisuke wrote:
> This patch implements a process to output the link status information 
> of the CXL1.1 device to sysfs. The values of the registers related to 
> the link status are outputted into three separate files.

> +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb)
> +{
> +	void __iomem *addr;
> +	u8 offset = 0;

Unnecessary initialization.  Also, readw() returns u16.

> +	u32 cap_hdr;
> +	u32 linkcap = 0;

Ditto.

> +
> +	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
> +		return 0;
> +
> +	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
> +		return 0;
> +
> +	addr = ioremap(rcrb, SZ_4K);
> +	if (!addr)
> +		goto out;
> +
> +	offset = readw(addr + PCI_CAPABILITY_LIST);
> +	offset = offset & 0x00ff;
> +	cap_hdr = readl(addr + offset);
> +	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
> +		offset = (cap_hdr >> 8) & 0x000000ff;
> +		if (!offset)
> +			break;
> +		cap_hdr = readl(addr + offset);
> +	}

Hmmm, it's a shame we have to reimplement pci_find_capability() here.
I see the problem though -- pci_find_capability() does config reads
and this is in MMIO space, not config space.

But you need this several times, so maybe a helper function would
still be useful so you don't have to repeat the code.

> +	if (offset)
> +		dev_dbg(dev, "found PCIe capability (0x%x)\n", offset);

Testing "offset" acknowledges the possibility that it may be NULL, and
in that case, the readl() below is a NULL pointer dereference.

> +	linkcap = readl(addr + offset + PCI_EXP_LNKCAP);
> +	iounmap(addr);
> +out:
> +	release_mem_region(rcrb, SZ_4K);
> +
> +	return linkcap;
> +}

> @@ -806,6 +1003,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (IS_ERR(mds))
>  		return PTR_ERR(mds);
>  	cxlds = &mds->cxlds;
> +	device_create_file(&pdev->dev, &dev_attr_rcd_link_cap);
> +	device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl);
> +	device_create_file(&pdev->dev, &dev_attr_rcd_link_status);

Is there a removal issue here?  What if "pdev" is removed?  Or what if
this module is unloaded?  Do these sysfs files get cleaned up
automagically somehow?

Bjorn
Kobayashi,Daisuke Feb. 28, 2024, 8:28 a.m. UTC | #2
> -----Original Message-----
> From: Bjorn Helgaas <helgaas@kernel.org>
> Sent: Wednesday, February 28, 2024 1:44 AM
> To: Kobayashi, Daisuke/小林 大介 <kobayashi.da-06@fujitsu.com>
> Cc: Kobayashi, Daisuke/小林 大介 <kobayashi.da-06@fujitsu.com>;
> linux-cxl@vger.kernel.org; Gotou, Yasunori/五島 康文 <y-goto@fujitsu.com>;
> linux-pci@vger.kernel.org; mj@ucw.cz; dan.j.williams@intel.com
> Subject: Re: [RFC PATCH v2 1/3] Add sysfs attribute for CXL 1.1 device link
> status
> 
> On Tue, Feb 27, 2024 at 05:33:11PM +0900, Kobayashi,Daisuke wrote:
> > This patch implements a process to output the link status information
> > of the CXL1.1 device to sysfs. The values of the registers related to
> > the link status are outputted into three separate files.
> 
> > +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb)
> > +{
> > +	void __iomem *addr;
> > +	u8 offset = 0;
> 
> Unnecessary initialization.  Also, readw() returns u16.
> 
> > +	u32 cap_hdr;
> > +	u32 linkcap = 0;
> 
> Ditto.
> 

Thank you, I will fix them.

> > +
> > +	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
> > +		return 0;
> > +
> > +	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
> > +		return 0;
> > +
> > +	addr = ioremap(rcrb, SZ_4K);
> > +	if (!addr)
> > +		goto out;
> > +
> > +	offset = readw(addr + PCI_CAPABILITY_LIST);
> > +	offset = offset & 0x00ff;
> > +	cap_hdr = readl(addr + offset);
> > +	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
> > +		offset = (cap_hdr >> 8) & 0x000000ff;
> > +		if (!offset)
> > +			break;
> > +		cap_hdr = readl(addr + offset);
> > +	}
> 
> Hmmm, it's a shame we have to reimplement pci_find_capability() here.
> I see the problem though -- pci_find_capability() does config reads
> and this is in MMIO space, not config space.
> 
> But you need this several times, so maybe a helper function would
> still be useful so you don't have to repeat the code.
>

I'll take your suggestion and create a helper function.

> > +	if (offset)
> > +		dev_dbg(dev, "found PCIe capability (0x%x)\n", offset);
> 
> Testing "offset" acknowledges the possibility that it may be NULL, and
> in that case, the readl() below is a NULL pointer dereference.
> 

I will check them.

> > +	linkcap = readl(addr + offset + PCI_EXP_LNKCAP);
> > +	iounmap(addr);
> > +out:
> > +	release_mem_region(rcrb, SZ_4K);
> > +
> > +	return linkcap;
> > +}
> 
> > @@ -806,6 +1003,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const
> struct pci_device_id *id)
> >  	if (IS_ERR(mds))
> >  		return PTR_ERR(mds);
> >  	cxlds = &mds->cxlds;
> > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_cap);
> > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl);
> > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_status);
> 
> Is there a removal issue here?  What if "pdev" is removed?  Or what if
> this module is unloaded?  Do these sysfs files get cleaned up
> automagically somehow?
> 
> Bjorn

Thank you, I overlooked my consideration of the removal issue.
I will check current code and add a cleanup process.
Dan Williams March 2, 2024, 2:22 a.m. UTC | #3
Daisuke Kobayashi (Fujitsu) wrote:
> 
> 
> > -----Original Message-----
> > From: Bjorn Helgaas <helgaas@kernel.org>
> > Sent: Wednesday, February 28, 2024 1:44 AM
> > To: Kobayashi, Daisuke/小林 大介 <kobayashi.da-06@fujitsu.com>
> > Cc: Kobayashi, Daisuke/小林 大介 <kobayashi.da-06@fujitsu.com>;
> > linux-cxl@vger.kernel.org; Gotou, Yasunori/五島 康文 <y-goto@fujitsu.com>;
> > linux-pci@vger.kernel.org; mj@ucw.cz; dan.j.williams@intel.com
> > Subject: Re: [RFC PATCH v2 1/3] Add sysfs attribute for CXL 1.1 device link
> > status
> > 
> > On Tue, Feb 27, 2024 at 05:33:11PM +0900, Kobayashi,Daisuke wrote:
> > > This patch implements a process to output the link status information
> > > of the CXL1.1 device to sysfs. The values of the registers related to
> > > the link status are outputted into three separate files.
> > 
> > > +static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb)
> > > +{
> > > +	void __iomem *addr;
> > > +	u8 offset = 0;
> > 
> > Unnecessary initialization.  Also, readw() returns u16.
> > 
> > > +	u32 cap_hdr;
> > > +	u32 linkcap = 0;
> > 
> > Ditto.
> > 
> 
> Thank you, I will fix them.
> 
> > > +
> > > +	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
> > > +		return 0;
> > > +
> > > +	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
> > > +		return 0;
> > > +
> > > +	addr = ioremap(rcrb, SZ_4K);
> > > +	if (!addr)
> > > +		goto out;
> > > +
> > > +	offset = readw(addr + PCI_CAPABILITY_LIST);
> > > +	offset = offset & 0x00ff;
> > > +	cap_hdr = readl(addr + offset);
> > > +	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
> > > +		offset = (cap_hdr >> 8) & 0x000000ff;
> > > +		if (!offset)
> > > +			break;
> > > +		cap_hdr = readl(addr + offset);
> > > +	}
> > 
> > Hmmm, it's a shame we have to reimplement pci_find_capability() here.
> > I see the problem though -- pci_find_capability() does config reads
> > and this is in MMIO space, not config space.
> > 
> > But you need this several times, so maybe a helper function would
> > still be useful so you don't have to repeat the code.
> >
> 
> I'll take your suggestion and create a helper function.

There is already a cxl_rcrb_to_aer() in the CXL core, I would recommend
just converting that function to one that take a capability id.

[..]
> > > @@ -806,6 +1003,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const
> > struct pci_device_id *id)
> > >  	if (IS_ERR(mds))
> > >  		return PTR_ERR(mds);
> > >  	cxlds = &mds->cxlds;
> > > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_cap);
> > > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl);
> > > +	device_create_file(&pdev->dev, &dev_attr_rcd_link_status);
> > 
> > Is there a removal issue here?  What if "pdev" is removed?  Or what if
> > this module is unloaded?  Do these sysfs files get cleaned up
> > automagically somehow?
> > 
> > Bjorn
> 
> Thank you, I overlooked my consideration of the removal issue.
> I will check current code and add a cleanup process.
> 

No, the proper way to register sysfs attributes already includes cleanup
tied to the device lifetime. Since these can only show up once the driver
is loaded they are so-called driver "dev_groups" attributes. The way to
declare them is something like this (UNTESTED!):

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 2ff361e756d6..c8535078c74f 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -964,6 +964,30 @@ static const struct pci_error_handlers cxl_error_handlers = {
        .cor_error_detected     = cxl_cor_error_detected,
 };
 
+static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       if (!is_cxl_restricted(pdev))
+               return 0;
+       return a->mode;
+}
+
+static struct attribute *cxl_rcd_attrs[] = {
+       &dev_attr_rcd_link_cap,
+       &dev_attr_rcd_link_ctrl,
+       &dev_attr_rcd_link_status,
+       NULL,
+};
+
+static struct attribute_group cxl_rcd_group = {
+       .attrs = cxl_rcd_attrs,
+       .is_visible = cxl_rcd_visible,
+};
+
+__ATTRIBUTE_GROUPS(cxl_rcd);
+
 static struct pci_driver cxl_pci_driver = {
        .name                   = KBUILD_MODNAME,
        .id_table               = cxl_mem_pci_tbl,
@@ -971,6 +995,7 @@ static struct pci_driver cxl_pci_driver = {
        .err_handler            = &cxl_error_handlers,
        .driver = {
                .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
+               .dev_groups     = &cxl_rcd_groups,
        },
 };
diff mbox series

Patch

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4fd1f207c84e..8302249819d0 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -780,6 +780,203 @@  static int cxl_event_config(struct pci_host_bridge *host_bridge,
 
 	return 0;
 }
+static u32 cxl_rcrb_to_linkcap(struct device *dev, resource_size_t rcrb)
+{
+	void __iomem *addr;
+	u8 offset = 0;
+	u32 cap_hdr;
+	u32 linkcap = 0;
+
+	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
+		return 0;
+
+	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
+		return 0;
+
+	addr = ioremap(rcrb, SZ_4K);
+	if (!addr)
+		goto out;
+
+	offset = readw(addr + PCI_CAPABILITY_LIST);
+	offset = offset & 0x00ff;
+	cap_hdr = readl(addr + offset);
+	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
+		offset = (cap_hdr >> 8) & 0x000000ff;
+		if (!offset)
+			break;
+		cap_hdr = readl(addr + offset);
+	}
+
+	if (offset)
+		dev_dbg(dev, "found PCIe capability (0x%x)\n", offset);
+
+	linkcap = readl(addr + offset + PCI_EXP_LNKCAP);
+	iounmap(addr);
+out:
+	release_mem_region(rcrb, SZ_4K);
+
+	return linkcap;
+}
+
+static ssize_t rcd_link_cap_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct cxl_port *port;
+	struct cxl_dport *dport;
+	struct device *parent = dev->parent;
+	struct pci_dev *parent_pdev = to_pci_dev(parent);
+	u32 linkcap;
+
+	port = cxl_pci_find_port(parent_pdev, &dport);
+	if (!port)
+		return -EINVAL;
+
+	linkcap = cxl_rcrb_to_linkcap(dev, dport->rcrb.base + SZ_4K);
+	return sysfs_emit(buf, "%x\n", linkcap);
+}
+static DEVICE_ATTR_RO(rcd_link_cap);
+
+static u16 cxl_rcrb_to_linkctr(struct device *dev, resource_size_t rcrb)
+{
+	void __iomem *addr;
+	u8 offset = 0;
+	u16 linkctrl = 0;
+	u32 cap_hdr;
+
+	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
+		return 0;
+
+	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
+		return 0;
+
+	addr = ioremap(rcrb, SZ_4K);
+	if (!addr)
+		goto out;
+
+	offset = readw(addr + PCI_CAPABILITY_LIST);
+	offset = offset & 0x00ff;
+	cap_hdr = readl(addr + offset);
+	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
+		offset = (cap_hdr >> 8) & 0x000000ff;
+		if (!offset)
+			break;
+		cap_hdr = readl(addr + offset);
+	}
+
+	if (offset)
+		dev_dbg(dev, "found PCIe capability (0x%x)\n", offset);
+	linkctrl = readw(addr + offset + PCI_EXP_LNKCTL);
+
+	iounmap(addr);
+out:
+	release_mem_region(rcrb, SZ_4K);
+
+	return linkctrl;
+}
+
+static ssize_t rcd_link_ctrl_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct cxl_port *port;
+	struct cxl_dport *dport;
+	struct device *parent = dev->parent;
+	struct pci_dev *parent_pdev = to_pci_dev(parent);
+	u16 linkctrl;
+
+	port = cxl_pci_find_port(parent_pdev, &dport);
+	if (!port)
+		return -EINVAL;
+
+
+	linkctrl = cxl_rcrb_to_linkctr(dev, dport->rcrb.base + SZ_4K);
+
+	return sysfs_emit(buf, "%x\n", linkctrl);
+}
+static DEVICE_ATTR_RO(rcd_link_ctrl);
+
+static u16 cxl_rcrb_to_linkstatus(struct device *dev, resource_size_t rcrb)
+{
+	void __iomem *addr;
+	u8 offset = 0;
+	u16 linksta = 0;
+	u32 cap_hdr;
+
+	if (WARN_ON_ONCE(rcrb == CXL_RESOURCE_NONE))
+		return 0;
+
+	if (!request_mem_region(rcrb, SZ_4K, dev_name(dev)))
+		return 0;
+
+	addr = ioremap(rcrb, SZ_4K);
+	if (!addr)
+		goto out;
+
+	offset = readw(addr + PCI_CAPABILITY_LIST);
+	offset = offset & 0x00ff;
+	cap_hdr = readl(addr + offset);
+	while ((cap_hdr & 0x000000ff) != PCI_CAP_ID_EXP) {
+		offset = (cap_hdr >> 8) & 0x000000ff;
+		if (!offset)
+			break;
+		cap_hdr = readl(addr + offset);
+	}
+
+	if (offset)
+		dev_dbg(dev, "found PCIe capability (0x%x)\n", offset);
+
+	linksta = readw(addr + offset + PCI_EXP_LNKSTA);
+	iounmap(addr);
+out:
+	release_mem_region(rcrb, SZ_4K);
+
+	return linksta;
+}
+
+static ssize_t rcd_link_status_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct cxl_port *port;
+	struct cxl_dport *dport;
+	struct device *parent = dev->parent;
+	struct pci_dev *parent_pdev = to_pci_dev(parent);
+	u16 linkstatus;
+
+	port = cxl_pci_find_port(parent_pdev, &dport);
+	if (!port)
+		return -EINVAL;
+
+
+	linkstatus = cxl_rcrb_to_linkstatus(dev, dport->rcrb.base + SZ_4K);
+
+	return sysfs_emit(buf, "%x\n", linkstatus);
+}
+static DEVICE_ATTR_RO(rcd_link_status);
+
+static struct attribute *cxl_rcd_attrs[] = {
+		&dev_attr_rcd_link_cap.attr,
+		&dev_attr_rcd_link_ctrl.attr,
+		&dev_attr_rcd_link_status.attr,
+		NULL
+};
+
+static umode_t cxl_rcd_visible(struct kobject *kobj,
+					  struct attribute *a, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (is_cxl_restricted(pdev))
+		return a->mode;
+
+	return 0;
+}
+
+static struct attribute_group cxl_rcd_group = {
+		.attrs = cxl_rcd_attrs,
+		.is_visible = cxl_rcd_visible,
+};
+
+__ATTRIBUTE_GROUPS(cxl_rcd);
 
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -806,6 +1003,9 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (IS_ERR(mds))
 		return PTR_ERR(mds);
 	cxlds = &mds->cxlds;
+	device_create_file(&pdev->dev, &dev_attr_rcd_link_cap);
+	device_create_file(&pdev->dev, &dev_attr_rcd_link_ctrl);
+	device_create_file(&pdev->dev, &dev_attr_rcd_link_status);
 	pci_set_drvdata(pdev, cxlds);
 
 	cxlds->rcd = is_cxl_restricted(pdev);
@@ -967,6 +1167,7 @@  static struct pci_driver cxl_pci_driver = {
 	.err_handler		= &cxl_error_handlers,
 	.driver	= {
 		.probe_type	= PROBE_PREFER_ASYNCHRONOUS,
+		.dev_groups	= cxl_rcd_groups,
 	},
 };