diff mbox series

[2/2,SRU,OEM-5.6] drm/amdgpu: add TMR destory function for psp

Message ID 20201228092441.926727-3-acelan.kao@canonical.com
State Accepted
Headers show
Series S3 stress test fails with amdgpu errors | expand

Commit Message

AceLan Kao Dec. 28, 2020, 9:24 a.m. UTC
From: Huang Rui <ray.huang@amd.com>

BugLink: https://bugs.launchpad.net/bugs/1909453

TMR is required to be destoried with GFX_CMD_ID_DESTROY_TMR while the
system goes to suspend. Otherwise, PSP may return the failure state
(0xFFFF007) on Gfx-2-PSP command GFX_CMD_ID_SETUP_TMR after do multiple
times suspend/resume.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(backported from commit 90937420c44f7535fd3ac4341a48c4c4dd1fe190)
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 62 +++++++++++++++++++++----
 1 file changed, 54 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d50f29b9a64e..849d8588c1c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -318,6 +318,52 @@  static int psp_tmr_load(struct psp_context *psp)
 	return ret;
 }
 
+static void psp_prep_tmr_unload_cmd_buf(struct psp_context *psp,
+					struct psp_gfx_cmd_resp *cmd)
+{
+	if (amdgpu_sriov_vf(psp->adev))
+		cmd->cmd_id = GFX_CMD_ID_DESTROY_VMR;
+	else
+		cmd->cmd_id = GFX_CMD_ID_DESTROY_TMR;
+}
+
+static int psp_tmr_unload(struct psp_context *psp)
+{
+	int ret;
+	struct psp_gfx_cmd_resp *cmd;
+
+	cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	psp_prep_tmr_unload_cmd_buf(psp, cmd);
+	DRM_INFO("free PSP TMR buffer\n");
+
+	ret = psp_cmd_submit_buf(psp, NULL, cmd,
+				 psp->fence_buf_mc_addr);
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static int psp_tmr_terminate(struct psp_context *psp)
+{
+	int ret;
+	void *tmr_buf;
+	void **pptr;
+
+	ret = psp_tmr_unload(psp);
+	if (ret)
+		return ret;
+
+	/* free TMR memory buffer */
+	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+
+	return 0;
+}
+
 static void psp_prep_asd_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 				uint64_t asd_mc, uint32_t size)
 {
@@ -1515,12 +1561,7 @@  static int psp_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;
-	void *tmr_buf;
-	void **pptr;
-
-	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    psp->xgmi_context.initialized == 1)
-                psp_xgmi_terminate(psp);
+	int ret;
 
 	if (psp->adev->psp.ta_fw) {
 		psp_ras_terminate(psp);
@@ -1530,10 +1571,9 @@  static int psp_hw_fini(void *handle)
 
 	psp_asd_unload(psp);
 
+	psp_tmr_terminate(psp);
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
-	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
-	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
 	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
 			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
 	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
@@ -1586,6 +1626,12 @@  static int psp_suspend(void *handle)
 		return ret;
 	}
 
+	ret = psp_tmr_terminate(psp);
+	if (ret) {
+		DRM_ERROR("Falied to terminate tmr\n");
+		return ret;
+	}
+
 	ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
 	if (ret) {
 		DRM_ERROR("PSP ring stop failed\n");