From patchwork Wed Apr 8 04:42:24 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Deepak Goyal X-Patchwork-Id: 459087 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id E2B611400A0 for ; Wed, 8 Apr 2015 14:42:25 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751026AbbDHEmZ (ORCPT ); Wed, 8 Apr 2015 00:42:25 -0400 Received: from hqemgate14.nvidia.com ([216.228.121.143]:16286 "EHLO hqemgate14.nvidia.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751004AbbDHEmX (ORCPT ); Wed, 8 Apr 2015 00:42:23 -0400 Received: from hqnvupgp07.nvidia.com (Not Verified[216.228.121.13]) by hqemgate14.nvidia.com id ; Tue, 07 Apr 2015 21:42:03 -0700 Received: from hqemhub02.nvidia.com ([172.20.150.31]) by hqnvupgp07.nvidia.com (PGP Universal service); Tue, 07 Apr 2015 21:39:45 -0700 X-PGP-Universal: processed; by hqnvupgp07.nvidia.com on Tue, 07 Apr 2015 21:39:45 -0700 Received: from deepak.nvidia.com (172.20.144.16) by hqemhub02.nvidia.com (172.20.150.31) with Microsoft SMTP Server (TLS) id 8.3.342.0; Tue, 7 Apr 2015 21:42:21 -0700 From: Deepak Goyal To: gnurou@gmail.com, bskeggs@redhat.com, nouveau@lists.freedesktop.org, linux-tegra@vger.kernel.org CC: Deepak Goyal Subject: [PATCH V2] pmu/gk20a: PMU boot support. Date: Wed, 8 Apr 2015 10:12:24 +0530 Message-ID: <1428468144-13988-1-git-send-email-dgoyal@nvidia.com> X-Mailer: git-send-email 1.9.1 X-NVConfidentiality: public MIME-Version: 1.0 Sender: linux-tegra-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-tegra@vger.kernel.org - Maps PMU firmware into PMU virtual memory. - Copy bootloader into PMU memory and start it. - Allow the PMU to interact with HOST via interrupts. PMU after successful configurations (to follow after this patch) will: 1.Autonomously power gate graphics engine when not in use.It will save us a lot of power. 2.Provide better way to scale frequencies by reporting Perf counters. 3.Be critical for GPU functionality as future GPUs secure some register & mem accesses involved in context switch. Signed-off-by: Deepak Goyal --- drm/nouveau/nvkm/subdev/pmu/gk20a.c | 855 +++++++++++++++++++++++++++++++++++- 1 file changed, 840 insertions(+), 15 deletions(-) diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c index 594f746e68f2..328c0a68d615 100644 --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,13 +20,180 @@ * DEALINGS IN THE SOFTWARE. */ #include "priv.h" - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for mdelay */ +#include #include #include #include +#define APP_VERSION_GK20A 17997577 +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024) +#define PMU_QUEUE_COUNT 5 + +enum { + GK20A_PMU_DMAIDX_UCODE = 0, + GK20A_PMU_DMAIDX_VIRT = 1, + GK20A_PMU_DMAIDX_PHYS_VID = 2, + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3, + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4, + GK20A_PMU_DMAIDX_RSVD = 5, + GK20A_PMU_DMAIDX_PELPG = 6, + GK20A_PMU_DMAIDX_END = 7 +}; + +struct pmu_buf_desc { + struct nvkm_gpuobj *obj; + struct nvkm_vma vma; + size_t size; +}; + +struct nvkm_pmu_priv_vm { + struct nvkm_gpuobj *mem; + struct nvkm_gpuobj *pgd; + struct nvkm_vm *vm; +}; + +/* Choices for pmu_state */ +enum { + PMU_STATE_OFF, /*0 PMU is off */ + PMU_STATE_STARTING, /*1 PMU is on, but not booted */ + PMU_STATE_INIT_RECEIVED /*2 PMU init message received */ +}; + +struct pmu_mem_gk20a { + u32 dma_base; + u8 dma_offset; + u8 dma_idx; + u16 fb_size; +}; + +struct pmu_cmdline_args_gk20a { + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */ + u32 falc_trace_size; /* falctrace buffer size (bytes) */ + u32 falc_trace_dma_base; /* 256-byte block address */ + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */ + u8 secure_mode; + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */ +}; + +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */ +#define GK20A_PMU_DMEM_BLKSIZE2 8 + +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32 +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64 + +struct pmu_ucode_desc { + u32 descriptor_size; + u32 image_size; + u32 tools_version; + u32 app_version; + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH]; + u32 bootloader_start_offset; + u32 bootloader_size; + u32 bootloader_imem_offset; + u32 bootloader_entry_point; + u32 app_start_offset; + u32 app_size; + u32 app_imem_offset; + u32 app_imem_entry; + u32 app_dmem_offset; + u32 app_resident_code_offset; + u32 app_resident_code_size; + u32 app_resident_data_offset; + u32 app_resident_data_size; + u32 nb_overlays; + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY]; + u32 compressed; +}; + +#define PMU_UNIT_REWIND (0x00) +#define PMU_UNIT_PG (0x03) +#define PMU_UNIT_INIT (0x07) +#define PMU_UNIT_PERFMON (0x12) +#define PMU_UNIT_THERM (0x1B) +#define PMU_UNIT_RC (0x1F) +#define PMU_UNIT_NULL (0x20) +#define PMU_UNIT_END (0x23) + +#define PMU_UNIT_TEST_START (0xFE) +#define PMU_UNIT_END_SIM (0xFF) +#define PMU_UNIT_TEST_END (0xFF) + +#define PMU_UNIT_ID_IS_VALID(id) \ + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START)) +#define PMU_DMEM_ALIGNMENT (4) + +struct pmu_hdr { + u8 unit_id; + u8 size; + u8 ctrl_flags; + u8 seq_id; +}; + +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr) + +enum { + PMU_INIT_MSG_TYPE_PMU_INIT = 0, +}; + +struct pmu_init_msg_pmu_gk20a { + u8 msg_type; + u8 pad; + u16 os_debug_entry_point; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[PMU_QUEUE_COUNT]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; +}; + +struct pmu_init_msg { + union { + u8 msg_type; + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a; + }; +}; + +enum { + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0, +}; + +struct pmu_rc_msg_unhandled_cmd { + u8 msg_type; + u8 unit_id; +}; + +struct pmu_rc_msg { + u8 msg_type; + struct pmu_rc_msg_unhandled_cmd unhandled_cmd; +}; + +struct pmu_msg { + struct pmu_hdr hdr; + union { + struct pmu_init_msg init; + struct pmu_rc_msg rc; + } msg; +}; + #define BUSY_SLOT 0 #define CLK_SLOT 7 +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin" struct gk20a_pmu_dvfs_data { int p_load_target; @@ -39,8 +206,22 @@ struct gk20a_pmu_priv { struct nvkm_pmu base; struct nvkm_alarm alarm; struct gk20a_pmu_dvfs_data *data; + struct pmu_ucode_desc *desc; + struct pmu_buf_desc ucode; + struct pmu_buf_desc trace_buf; + struct mutex pmu_copy_lock; + bool pmu_ready; + int pmu_state; + struct nvkm_pmu_priv_vm pmuvm; + struct work_struct isr_workq; + bool initialized; + bool sw_ready; + struct mutex isr_mutex; + bool isr_enabled; }; +#define to_gk20a_priv(ptr) container_of(ptr, struct gk20a_pmu_priv, base) + struct gk20a_pmu_dvfs_dev_status { unsigned long total; unsigned long busy; @@ -48,6 +229,61 @@ struct gk20a_pmu_dvfs_dev_status { }; static int +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw) +{ + struct nvkm_device *dev; + char name[32]; + + dev = nv_device(ppmu); + snprintf(name, sizeof(name), "nvidia/tegra124/%s", + GK20A_PMU_UCODE_IMAGE); + return request_firmware(pfw, name, nv_device_base(dev)); +} + +static void +gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu, const struct firmware *pfw) +{ + nv_debug(ppmu, "firmware released\n"); + release_firmware(pfw); +} + +static void +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, + const struct firmware *fw) +{ + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + + nv_debug(ppmu, "GK20A PMU firmware information\n"); + nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size); + nv_debug(ppmu, "image size = %u\n", desc->image_size); + nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version); + nv_debug(ppmu, "date = %s\n", desc->date); + nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n", + desc->bootloader_start_offset); + nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size); + nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n", + desc->bootloader_imem_offset); + nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n", + desc->bootloader_entry_point); + nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset); + nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size); + nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset); + nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry); + nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset); + nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n", + desc->app_resident_code_offset); + nv_debug(ppmu, "app_resident_code_size = 0x%08x\n", + desc->app_resident_code_size); + nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n", + desc->app_resident_data_offset); + nv_debug(ppmu, "app_resident_data_size = 0x%08x\n", + desc->app_resident_data_size); + nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays); + + nv_debug(ppmu, "compressed = %u\n", desc->compressed); +} + +static int gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state) { struct nvkm_clk *clk = nvkm_clk(priv); @@ -163,32 +399,601 @@ static int gk20a_pmu_fini(struct nvkm_object *object, bool suspend) { struct nvkm_pmu *pmu = (void *)object; - struct gk20a_pmu_priv *priv = (void *)pmu; - + struct gk20a_pmu_priv *priv = to_gk20a_priv(pmu); + nv_wr32(pmu, 0x10a014, 0x00000060); + flush_work(&pmu->recv.work); nvkm_timer_alarm_cancel(priv, &priv->alarm); return nvkm_subdev_fini(&pmu->base, suspend); } static int +gk20a_pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, + bool enable) +{ + if (enable) { + nv_mask(pmc, 0x000200, 0x00002000, 0x00002000); + nv_rd32(pmc, 0x00000200); + if (nv_wait(ppmu, 0x0010a10c, 0x00000006, 0x00000000)) + return 0; + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); + nv_error(ppmu, "Falcon mem scrubbing timeout\n"); + return -ETIMEDOUT; + } else { + nv_mask(pmc, 0x00000200, 0x2000, 0x00000000); + return 0; + } +} +static void +pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, bool enable) +{ + if (enable) { + nv_debug(ppmu, "enable pmu irq\n"); + nv_wr32(ppmu, 0x0010a010, 0xff); + nv_mask(pmc, 0x00000640, 0x1000000, 0x1000000); + nv_mask(pmc, 0x00000644, 0x1000000, 0x1000000); + } else { + nv_debug(ppmu, "disable pmu irq\n"); + nv_mask(pmc, 0x00000640, 0x1000000, 0x00000000); + nv_mask(pmc, 0x00000644, 0x1000000, 0x00000000); + nv_wr32(ppmu, 0x0010a014, 0xff); + } + +} + +static int +pmu_idle(struct nvkm_pmu *ppmu) +{ + if (!nv_wait(ppmu, 0x0010a04c, 0x0000ffff, 0x00000000)) { + nv_error(ppmu, "timedout waiting pmu idle\n"); + return -EBUSY; + } + + return 0; +} + +static int +pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc, bool enable) +{ + u32 pmc_enable; + int err; + + if (enable) { + err = gk20a_pmu_enable_hw(ppmu, pmc, true); + if (err) + return err; + + err = pmu_idle(ppmu); + if (err) + return err; + + pmu_enable_irq(ppmu, pmc, true); + } else { + pmc_enable = nv_rd32(pmc, 0x200); + if ((pmc_enable & 0x2000) != 0x0) { + pmu_enable_irq(ppmu, pmc, false); + gk20a_pmu_enable_hw(ppmu, pmc, false); + } + } + + return 0; +} + +int +pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) +{ + int err; + + err = pmu_idle(ppmu); + if (err) + return err; + + /* TBD: release pmu hw mutex */ + + err = pmu_enable(ppmu, pmc, false); + if (err) + return err; + + err = pmu_enable(ppmu, pmc, true); + if (err) + return err; + + return 0; +} + +static void +pmu_copy_to_dmem(struct gk20a_pmu_priv *pmu, + u32 dst, u8 *src, u32 size, u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *src_u32 = (u32 *)src; + struct nvkm_pmu *ppmu = &pmu->base; + + if (size == 0) { + nv_error(ppmu, "size is zero\n"); + goto out; + } + + if (dst & 0x3) { + nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst); + goto out; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = 0xfffc; + + dst &= addr_mask; + + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24))); + + for (i = 0; i < words; i++) { + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]); + nv_debug(ppmu, "0x%08x\n", src_u32[i]); + } + if (bytes > 0) { + data = 0; + for (i = 0; i < bytes; i++) + ((u8 *)&data)[i] = src[(words << 2) + i]; + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data); + nv_debug(ppmu, "0x%08x\n", data); + } + + data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask; + size = ALIGN(size, 4); + if (data != dst + size) { + nv_error(ppmu, "copy failed. bytes written %d, expected %d", + data - dst, size); + } + mutex_unlock(&pmu->pmu_copy_lock); +out: + nv_debug(ppmu, "exit %s", __func__); +} + +static void +pmu_copy_from_dmem(struct gk20a_pmu_priv *pmu, + u32 src, u8 *dst, u32 size, u8 port) +{ + u32 i, words, bytes; + u32 data, addr_mask; + u32 *dst_u32 = (u32 *)dst; + struct nvkm_pmu *ppmu = &pmu->base; + + if (size == 0) { + nv_error(ppmu, "size is zero\n"); + goto out; + } + + if (src & 0x3) { + nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src); + goto out; + } + + mutex_lock(&pmu->pmu_copy_lock); + + words = size >> 2; + bytes = size & 0x3; + + addr_mask = 0xfffc; + + src &= addr_mask; + + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25))); + + for (i = 0; i < words; i++) { + dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); + nv_debug(ppmu, "0x%08x\n", dst_u32[i]); + } + if (bytes > 0) { + data = nv_rd32(ppmu, (0x0010a1c4 + port * 8)); + nv_debug(ppmu, "0x%08x\n", data); + + for (i = 0; i < bytes; i++) + dst[(words << 2) + i] = ((u8 *)&data)[i]; + } + mutex_unlock(&pmu->pmu_copy_lock); +out: + nv_debug(ppmu, "exit %s\n", __func__); +} + +static int +pmu_process_init_msg(struct gk20a_pmu_priv *pmu, struct pmu_msg *msg) +{ + struct nvkm_pmu *ppmu = &pmu->base; + struct pmu_init_msg_pmu_gk20a *init; + u32 tail; + + tail = nv_rd32(ppmu, 0x0010a4cc); + + pmu_copy_from_dmem(pmu, tail, + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0); + + if (msg->hdr.unit_id != PMU_UNIT_INIT) { + nv_error(ppmu, + "expecting init msg"); + return -EINVAL; + } + + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE, + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0); + + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) { + nv_error(ppmu, + "expecting init msg"); + return -EINVAL; + } + + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT); + nv_wr32(ppmu, 0x0010a4cc, tail); + init = &msg->msg.init.pmu_init_gk20a; + pmu->pmu_ready = true; + pmu->pmu_state = PMU_STATE_INIT_RECEIVED; + nv_debug(ppmu, "init msg processed\n"); + return 0; +} + +static void +gk20a_pmu_process_message(struct work_struct *work) +{ + struct gk20a_pmu_priv *pmu = container_of(work, + struct gk20a_pmu_priv, isr_workq); + struct pmu_msg msg; + struct nvkm_pmu *ppmu = &pmu->base; + struct nvkm_mc *pmc = nvkm_mc(ppmu); + + mutex_lock(&pmu->isr_mutex); + if (unlikely(!pmu->pmu_ready)) { + nv_debug(ppmu, "processing init msg\n"); + pmu_process_init_msg(pmu, &msg); + mutex_unlock(&pmu->isr_mutex); + pmu_enable_irq(ppmu, pmc, true); + } else + mutex_unlock(&pmu->isr_mutex); +} + +static int +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw) +{ + int ret = 0; + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + u32 *ucode_image; + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data; + int i; + struct nvkm_pmu_priv_vm *ppmuvm = &pmu->pmuvm; + struct nvkm_device *device = nv_device(&ppmu->base); + struct nvkm_vm *vm; + const u64 pmu_area_len = 300*1024; + /* mem for inst blk*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0, + &ppmuvm->mem); + if (ret) + goto instblk_alloc_err; + + /* mem for pgd*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0, + &ppmuvm->pgd); + if (ret) + goto pgd_alloc_err; + + /*allocate virtual memory range*/ + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm); + if (ret) + goto fw_alloc_err; + + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]); + /*update VM with pgd */ + + ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd); + if (ret) + goto fw_alloc_err; + + /*update pgd in inst blk */ + nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr)); + nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr)); + nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1)); + nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1)); + + /* allocate memory for pmu fw to be copied to*/ + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, + GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.obj); + if (ret) + goto fw_alloc_err; + + ucode_image = (u32 *)((u8 *)desc + desc->descriptor_size); + for (i = 0; i < (desc->app_start_offset + desc->app_size); i += 4) + nv_wo32(pmu->ucode.obj, i, ucode_image[i/4]); + + /* map allocated memory into GMMU */ + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.obj), vm, + NV_MEM_ACCESS_RW, &pmu->ucode.vma); + if (ret) + goto map_err; + + nv_debug(ppmu, "%s function end\n", __func__); + return ret; +map_err: + nvkm_gpuobj_destroy(pmu->ucode.obj); +fw_alloc_err: + nvkm_gpuobj_destroy(ppmuvm->pgd); +pgd_alloc_err: + nvkm_gpuobj_destroy(ppmuvm->mem); +instblk_alloc_err: + return ret; +} + +static int +gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu) +{ + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + struct nvkm_pmu_priv_vm *ppmuvm = &pmu->pmuvm; + int err = 0; + int ret = 0; + + if (pmu->sw_ready) { + nv_debug(ppmu, "skipping init\n"); + goto skip_init; + } + + /* no infoRom script from vbios? */ + + /* TBD: sysmon subtask */ + + INIT_WORK(&pmu->isr_workq, gk20a_pmu_process_message); + + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE, + 0, 0, &pmu->trace_buf.obj); + if (ret) + goto err; + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.obj), + ppmuvm->vm, + NV_MEM_ACCESS_RW, + &pmu->trace_buf.vma); + if (ret) + goto map_err; + + pmu->sw_ready = true; + +skip_init: + return 0; +map_err: + nvkm_gpuobj_destroy(pmu->trace_buf.obj); +err: + return err; +} + +static int +pmu_bootstrap(struct gk20a_pmu_priv *pmu) +{ + struct nvkm_pmu *ppmu = &pmu->base; + struct pmu_ucode_desc *desc = pmu->desc; + u32 addr_code, addr_data, addr_load; + u32 i, blocks, addr_args; + struct pmu_cmdline_args_gk20a cmdline_args; + struct nvkm_pmu_priv_vm *ppmuvm = &pmu->pmuvm; + nv_mask(ppmu, 0x0010a048, 0x01, 0x01); + /*bind the address*/ + nv_wr32(ppmu, 0x0010a480, + ppmuvm->mem->addr >> 12 | + 0x1 << 30 | + 0x20000000); + + /* TBD: load all other surfaces */ + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE; + cmdline_args.falc_trace_dma_base = + lower_32_bits(pmu->trace_buf.vma.offset >> 8); + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT; + cmdline_args.cpu_freq_hz = 204; + cmdline_args.secure_mode = 0; + + addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff; + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2; + addr_args -= sizeof(struct pmu_cmdline_args_gk20a); + nv_debug(ppmu, "initiating copy to dmem\n"); + pmu_copy_to_dmem(pmu, addr_args, + (u8 *)&cmdline_args, + sizeof(struct pmu_cmdline_args_gk20a), 0); + + nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24); + + addr_code = lower_32_bits((pmu->ucode.vma.offset + + desc->app_start_offset + + desc->app_resident_code_offset) >> 8); + + addr_data = lower_32_bits((pmu->ucode.vma.offset + + desc->app_start_offset + + desc->app_resident_data_offset) >> 8); + + addr_load = lower_32_bits((pmu->ucode.vma.offset + + desc->bootloader_start_offset) >> 8); + + nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE); + nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE); + nv_wr32(ppmu, 0x0010a1c4, (addr_code)); + nv_debug(ppmu, "0x%08x\n", (addr_code)); + nv_wr32(ppmu, 0x0010a1c4, desc->app_size); + nv_debug(ppmu, "0x%08x\n", desc->app_size); + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size); + nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size); + nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry); + nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry); + nv_wr32(ppmu, 0x0010a1c4, (addr_data)); + nv_debug(ppmu, "0x%08x\n", (addr_data)); + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size); + nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size); + nv_wr32(ppmu, 0x0010a1c4, (addr_code)); + nv_debug(ppmu, "0x%08x\n", (addr_code)); + nv_wr32(ppmu, 0x0010a1c4, 0x1); + nv_debug(ppmu, "0x%08x\n", 1); + nv_wr32(ppmu, 0x0010a1c4, addr_args); + nv_debug(ppmu, "0x%08x\n", addr_args); + + nv_wr32(ppmu, 0x0010a110, + (addr_load) - (desc->bootloader_imem_offset >> 8)); + + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8; + + for (i = 0; i < blocks; i++) { + nv_wr32(ppmu, 0x0010a114, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(ppmu, 0x0010a11c, + desc->bootloader_imem_offset + (i << 8)); + nv_wr32(ppmu, 0x0010a118, + 0x01 << 4 | + 0x06 << 8 | + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12)); + } + + nv_wr32(ppmu, 0x0010a104, + (0xffffffff & desc->bootloader_entry_point)); + + nv_wr32(ppmu, 0x0010a100, 0x1 << 1); + + nv_wr32(ppmu, 0x0010a080, desc->app_version); + + return 0; +} + +static int +gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc) +{ + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + int err; + + mutex_lock(&pmu->isr_mutex); + pmu_reset(ppmu, pmc); + pmu->isr_enabled = true; + mutex_unlock(&pmu->isr_mutex); + + /* setup apertures - virtual */ + nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0); + nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0); + /* setup apertures - physical */ + nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0); + nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1); + nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2); + + /* TBD: load pmu ucode */ + err = pmu_bootstrap(pmu); + if (err) + return err; + + return 0; +} + + +static void +gk20a_pmu_intr(struct nvkm_subdev *subdev) +{ + struct nvkm_pmu *ppmu = nvkm_pmu(subdev); + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + struct nvkm_mc *pmc = nvkm_mc(ppmu); + u32 intr, mask; + if (!pmu->isr_enabled) + return; + + mask = nv_rd32(ppmu, 0x0010a018) & nv_rd32(ppmu, 0x0010a01c); + + intr = nv_rd32(ppmu, 0x0010a008) & mask; + + nv_error(ppmu, "received falcon interrupt: 0x%08x", intr); + pmu_enable_irq(ppmu, pmc, false); + if (!intr || pmu->pmu_state == PMU_STATE_OFF) { + nv_wr32(ppmu, 0x0010a004, intr); + nv_error(ppmu, "pmu state off\n"); + pmu_enable_irq(ppmu, pmc, true); + goto out; + } + if (intr & 0x10) + nv_error(ppmu, "pmu halt intr not implemented"); + + if (intr & 0x20) { + nv_error(ppmu, + "pmu exterr intr not implemented. Clearing interrupt."); + nv_mask(ppmu, 0x0010a16c, (0x1 << 31), 0x00000000); + goto out; + } + if (intr & 0x40) { + nv_debug(ppmu, "scheduling work\n"); + schedule_work(&pmu->isr_workq); + goto out; + } + +out: + pmu_enable_irq(ppmu, pmc, true); + nv_wr32(ppmu, 0x0010a004, intr); + nv_debug(ppmu, "irq handled\n"); +} + +static void +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable) +{ +} + +static int gk20a_pmu_init(struct nvkm_object *object) { - struct nvkm_pmu *pmu = (void *)object; - struct gk20a_pmu_priv *priv = (void *)pmu; + struct nvkm_pmu *ppmu = (void *)object; + struct nvkm_mc *pmc = nvkm_mc(object); + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + const struct firmware *pmufw = NULL; int ret; - ret = nvkm_subdev_init(&pmu->base); + nv_subdev(ppmu)->intr = gk20a_pmu_intr; + + mutex_init(&pmu->isr_mutex); + mutex_init(&pmu->pmu_copy_lock); + + if (pmufw == NULL) { + ret = gk20a_pmu_load_firmware(ppmu, &pmufw); + if (ret < 0) { + nv_error(ppmu, "failed to load pmu fimware\n"); + return ret; + } + ret = gk20a_pmu_init_vm(ppmu, pmufw); + if (ret < 0) { + nv_error(ppmu, "failed to map pmu fw to va space\n"); + goto err; + } + } + pmu->desc = (struct pmu_ucode_desc *)pmufw->data; + gk20a_pmu_dump_firmware_info(ppmu, pmufw); + + if (pmu->desc->app_version != APP_VERSION_GK20A) { + nv_error(ppmu, + "PMU code version not supported version: %d\n", + pmu->desc->app_version); + ret = -EINVAL; + goto err; + } + + ret = gk20a_init_pmu_setup_sw(ppmu); if (ret) - return ret; + goto err; + + pmu->pmu_state = PMU_STATE_STARTING; + ret = gk20a_init_pmu_setup_hw1(ppmu, pmc); + if (ret) + goto err; - pmu->pgob = nvkm_pmu_pgob; + ret = nvkm_subdev_init(&ppmu->base); + if (ret) + goto err; - /* init pwr perf counter */ - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); + ppmu->pgob = nvkm_pmu_pgob; - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm); + pmu->initialized = true; + /* init pmu perf counter */ + nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001); + nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002); + nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003); + + nvkm_timer_alarm(ppmu, 2000000000, &pmu->alarm); +err: + gk20a_pmu_release_firmware(ppmu, pmufw); return ret; } @@ -218,13 +1023,33 @@ gk20a_pmu_ctor(struct nvkm_object *parent, struct nvkm_object *engine, return 0; } +static void +gk20a_pmu_dtor(struct nvkm_object *object) +{ + struct nvkm_pmu *ppmu = (void *)object; + struct nvkm_mc *pmc = nvkm_mc(object); + struct gk20a_pmu_priv *pmu = to_gk20a_priv(ppmu); + + /* make sure the pending operations are finished before we continue */ + cancel_work_sync(&pmu->isr_workq); + pmu->initialized = false; + + mutex_lock(&pmu->isr_mutex); + pmu_enable(ppmu, pmc, false); + pmu->isr_enabled = false; + mutex_unlock(&pmu->isr_mutex); + pmu->pmu_state = PMU_STATE_OFF; + pmu->pmu_ready = false; +} + struct nvkm_oclass * gk20a_pmu_oclass = &(struct nvkm_pmu_impl) { .base.handle = NV_SUBDEV(PMU, 0xea), .base.ofuncs = &(struct nvkm_ofuncs) { .ctor = gk20a_pmu_ctor, - .dtor = _nvkm_pmu_dtor, + .dtor = gk20a_pmu_dtor, .init = gk20a_pmu_init, .fini = gk20a_pmu_fini, }, + .pgob = gk20a_pmu_pgob, }.base;