diff mbox

[X/Y,SRU] cxl: Flush PSL cache before resetting the adapter

Message ID 1476127740-19139-1-git-send-email-tim.gardner@canonical.com
State New
Headers show

Commit Message

Tim Gardner Oct. 10, 2016, 7:29 p.m. UTC
From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>

BugLink: http://bugs.launchpad.net/bugs/1632049

If the capi link is going down while the PSL owns a dirty cache line,
any access from the host for that data could lead to an Uncorrectable
Error.

So when resetting the capi adapter through sysfs, make sure the PSL
cache is flushed. It won't help if there are any active Process
Elements on the card, as the cache would likely get new dirty cache
lines immediately, but if resetting an idle adapter, it should avoid
any bad surprises from data left over from terminated Process Elements.

Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
(cherry picked from commit aaa2245ed836824f21f8e42e0ab63b1637d1cb20)
Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---
 drivers/misc/cxl/cxl.h    |  6 +++++-
 drivers/misc/cxl/native.c | 31 +++++++++++++++++++++++++++++++
 drivers/misc/cxl/pci.c    |  3 +++
 3 files changed, 39 insertions(+), 1 deletion(-)

Comments

Seth Forshee Oct. 11, 2016, 12:30 p.m. UTC | #1

Colin Ian King Oct. 11, 2016, 1:27 p.m. UTC | #2
On 10/10/16 20:29, Tim Gardner wrote:
> From: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1632049
> 
> If the capi link is going down while the PSL owns a dirty cache line,
> any access from the host for that data could lead to an Uncorrectable
> Error.
> 
> So when resetting the capi adapter through sysfs, make sure the PSL
> cache is flushed. It won't help if there are any active Process
> Elements on the card, as the cache would likely get new dirty cache
> lines immediately, but if resetting an idle adapter, it should avoid
> any bad surprises from data left over from terminated Process Elements.
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> Acked-by: Ian Munsie <imunsie@au1.ibm.com>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> (cherry picked from commit aaa2245ed836824f21f8e42e0ab63b1637d1cb20)
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
> ---
>  drivers/misc/cxl/cxl.h    |  6 +++++-
>  drivers/misc/cxl/native.c | 31 +++++++++++++++++++++++++++++++
>  drivers/misc/cxl/pci.c    |  3 +++
>  3 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
> index 9ecb54b..47137de5 100644
> --- a/drivers/misc/cxl/cxl.h
> +++ b/drivers/misc/cxl/cxl.h
> @@ -155,7 +155,10 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
>  #define CXL_PSL_SPAP_V    0x0000000000000001ULL
>  
>  /****** CXL_PSL_Control ****************************************************/
> -#define CXL_PSL_Control_tb 0x0000000000000001ULL
> +#define CXL_PSL_Control_tb              (0x1ull << (63-63))
> +#define CXL_PSL_Control_Fr              (0x1ull << (63-31))
> +#define CXL_PSL_Control_Fs_MASK         (0x3ull << (63-29))
> +#define CXL_PSL_Control_Fs_Complete     (0x3ull << (63-29))
>  
>  /****** CXL_PSL_DLCNTL *****************************************************/
>  #define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
> @@ -808,6 +811,7 @@ int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
>  int cxl_check_error(struct cxl_afu *afu);
>  int cxl_afu_slbia(struct cxl_afu *afu);
>  int cxl_tlb_slb_invalidate(struct cxl *adapter);
> +int cxl_data_cache_flush(struct cxl *adapter);
>  int cxl_afu_disable(struct cxl_afu *afu);
>  int cxl_psl_purge(struct cxl_afu *afu);
>  
> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
> index ecf7557..09e30e2 100644
> --- a/drivers/misc/cxl/native.c
> +++ b/drivers/misc/cxl/native.c
> @@ -269,6 +269,37 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
>  	return 0;
>  }
>  
> +int cxl_data_cache_flush(struct cxl *adapter)
> +{
> +	u64 reg;
> +	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
> +
> +	pr_devel("Flushing data cache\n");
> +
> +	reg = cxl_p1_read(adapter, CXL_PSL_Control);
> +	reg |= CXL_PSL_Control_Fr;
> +	cxl_p1_write(adapter, CXL_PSL_Control, reg);
> +
> +	reg = cxl_p1_read(adapter, CXL_PSL_Control);
> +	while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) {
> +		if (time_after_eq(jiffies, timeout)) {
> +			dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n");
> +			return -EBUSY;
> +		}
> +
> +		if (!cxl_ops->link_ok(adapter, NULL)) {
> +			dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n");
> +			return -EIO;
> +		}
> +		cpu_relax();
> +		reg = cxl_p1_read(adapter, CXL_PSL_Control);
> +	}
> +
> +	reg &= ~CXL_PSL_Control_Fr;
> +	cxl_p1_write(adapter, CXL_PSL_Control, reg);
> +	return 0;
> +}
> +
>  static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
>  {
>  	int rc;
> diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
> index 4aea07e..94fceaa 100644
> --- a/drivers/misc/cxl/pci.c
> +++ b/drivers/misc/cxl/pci.c
> @@ -956,6 +956,9 @@ int cxl_pci_reset(struct cxl *adapter)
>  
>  	dev_info(&dev->dev, "CXL reset\n");
>  
> +	/* the adapter is about to be reset, so ignore errors */
> +	cxl_data_cache_flush(adapter);
> +
>  	/* pcie_warm_reset requests a fundamental pci reset which includes a
>  	 * PERST assert/deassert.  PERST triggers a loading of the image
>  	 * if "user" or "factory" is selected in sysfs */
> 
Seems reasonable fix and has limit scope to one driver

Acked-by: Colin Ian King <colin.king@canonical.com>
Seth Forshee Oct. 11, 2016, 1:43 p.m. UTC | #3
Applied to xenial/yakkety.
diff mbox

Patch

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 9ecb54b..47137de5 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -155,7 +155,10 @@  static const cxl_p2n_reg_t CXL_PSL_WED_An     = {0x0A0};
 #define CXL_PSL_SPAP_V    0x0000000000000001ULL
 
 /****** CXL_PSL_Control ****************************************************/
-#define CXL_PSL_Control_tb 0x0000000000000001ULL
+#define CXL_PSL_Control_tb              (0x1ull << (63-63))
+#define CXL_PSL_Control_Fr              (0x1ull << (63-31))
+#define CXL_PSL_Control_Fs_MASK         (0x3ull << (63-29))
+#define CXL_PSL_Control_Fs_Complete     (0x3ull << (63-29))
 
 /****** CXL_PSL_DLCNTL *****************************************************/
 #define CXL_PSL_DLCNTL_D (0x1ull << (63-28))
@@ -808,6 +811,7 @@  int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler,
 int cxl_check_error(struct cxl_afu *afu);
 int cxl_afu_slbia(struct cxl_afu *afu);
 int cxl_tlb_slb_invalidate(struct cxl *adapter);
+int cxl_data_cache_flush(struct cxl *adapter);
 int cxl_afu_disable(struct cxl_afu *afu);
 int cxl_psl_purge(struct cxl_afu *afu);
 
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index ecf7557..09e30e2 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -269,6 +269,37 @@  int cxl_tlb_slb_invalidate(struct cxl *adapter)
 	return 0;
 }
 
+int cxl_data_cache_flush(struct cxl *adapter)
+{
+	u64 reg;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
+
+	pr_devel("Flushing data cache\n");
+
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	reg |= CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+
+	reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	while ((reg & CXL_PSL_Control_Fs_MASK) != CXL_PSL_Control_Fs_Complete) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&adapter->dev, "WARNING: cache flush timed out!\n");
+			return -EBUSY;
+		}
+
+		if (!cxl_ops->link_ok(adapter, NULL)) {
+			dev_warn(&adapter->dev, "WARNING: link down when flushing cache\n");
+			return -EIO;
+		}
+		cpu_relax();
+		reg = cxl_p1_read(adapter, CXL_PSL_Control);
+	}
+
+	reg &= ~CXL_PSL_Control_Fr;
+	cxl_p1_write(adapter, CXL_PSL_Control, reg);
+	return 0;
+}
+
 static int cxl_write_sstp(struct cxl_afu *afu, u64 sstp0, u64 sstp1)
 {
 	int rc;
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 4aea07e..94fceaa 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -956,6 +956,9 @@  int cxl_pci_reset(struct cxl *adapter)
 
 	dev_info(&dev->dev, "CXL reset\n");
 
+	/* the adapter is about to be reset, so ignore errors */
+	cxl_data_cache_flush(adapter);
+
 	/* pcie_warm_reset requests a fundamental pci reset which includes a
 	 * PERST assert/deassert.  PERST triggers a loading of the image
 	 * if "user" or "factory" is selected in sysfs */