Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/406256/?format=api
{ "id": 406256, "url": "http://patchwork.ozlabs.org/api/patches/406256/?format=api", "web_url": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1415030910-5799-4-git-send-email-shreyas@linux.vnet.ibm.com/", "project": { "id": 2, "url": "http://patchwork.ozlabs.org/api/projects/2/?format=api", "name": "Linux PPC development", "link_name": "linuxppc-dev", "list_id": "linuxppc-dev.lists.ozlabs.org", "list_email": "linuxppc-dev@lists.ozlabs.org", "web_url": "https://github.com/linuxppc/wiki/wiki", "scm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git", "webscm_url": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/", "list_archive_url_format": "https://lore.kernel.org/linuxppc-dev/{}/", "commit_url_format": "https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id={}" }, "msgid": "<1415030910-5799-4-git-send-email-shreyas@linux.vnet.ibm.com>", "list_archive_url": "https://lore.kernel.org/linuxppc-dev/1415030910-5799-4-git-send-email-shreyas@linux.vnet.ibm.com/", "date": "2014-11-03T16:08:29", "name": "[3/4] powernv: cpuidle: Redesign idle states management", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "c1887c9e38fafe403edc9277f086b27d367433ab", "submitter": { "id": 64129, "url": "http://patchwork.ozlabs.org/api/people/64129/?format=api", "name": "Shreyas B. Prabhu", "email": "shreyas@linux.vnet.ibm.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/linuxppc-dev/patch/1415030910-5799-4-git-send-email-shreyas@linux.vnet.ibm.com/mbox/", "series": [], "comments": "http://patchwork.ozlabs.org/api/patches/406256/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/406256/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>", "X-Original-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Delivered-To": [ "patchwork-incoming@ozlabs.org", "linuxppc-dev@lists.ozlabs.org" ], "Received": [ "from lists.ozlabs.org (lists.ozlabs.org [103.22.144.68])\n\t(using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 13458140082\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 4 Nov 2014 03:13:52 +1100 (AEDT)", "from ozlabs.org (ozlabs.org [103.22.144.67])\n\tby lists.ozlabs.org (Postfix) with ESMTP id BE6121A0029\n\tfor <patchwork-incoming@ozlabs.org>;\n\tTue, 4 Nov 2014 03:13:51 +1100 (AEDT)", "from e34.co.us.ibm.com (e34.co.us.ibm.com [32.97.110.152])\n\t(using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits))\n\t(No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 515311A05AC\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tTue, 4 Nov 2014 03:09:08 +1100 (AEDT)", "from /spool/local\n\tby e34.co.us.ibm.com with IBM ESMTP SMTP Gateway: Authorized Use\n\tOnly! Violators will be prosecuted\n\tfor <linuxppc-dev@lists.ozlabs.org> from <shreyas@linux.vnet.ibm.com>;\n\tMon, 3 Nov 2014 09:09:05 -0700", "from d03dlp03.boulder.ibm.com (9.17.202.179)\n\tby e34.co.us.ibm.com (192.168.1.134) with IBM ESMTP SMTP Gateway:\n\tAuthorized Use Only! Violators will be prosecuted; \n\tMon, 3 Nov 2014 09:09:02 -0700", "from b03cxnp07029.gho.boulder.ibm.com\n\t(b03cxnp07029.gho.boulder.ibm.com [9.17.130.16])\n\tby d03dlp03.boulder.ibm.com (Postfix) with ESMTP id 2D69619D8053\n\tfor <linuxppc-dev@lists.ozlabs.org>;\n\tMon, 3 Nov 2014 08:57:44 -0700 (MST)", "from d03av01.boulder.ibm.com (d03av01.boulder.ibm.com\n\t[9.17.195.167])\n\tby b03cxnp07029.gho.boulder.ibm.com (8.14.9/8.14.9/NCO v10.0) with\n\tESMTP id sA3E4aKF49479694\n\tfor <linuxppc-dev@lists.ozlabs.org>; Mon, 3 Nov 2014 15:04:36 +0100", "from d03av01.boulder.ibm.com (localhost [127.0.0.1])\n\tby d03av01.boulder.ibm.com (8.14.4/8.14.4/NCO v10.0 AVout) with ESMTP\n\tid sA3G8u91027340\n\tfor <linuxppc-dev@lists.ozlabs.org>; Mon, 3 Nov 2014 09:08:56 -0700", "from adminib-ovr2cdm.in.ibm.com ([9.79.201.189])\n\tby d03av01.boulder.ibm.com (8.14.4/8.14.4/NCO v10.0 AVin) with ESMTP\n\tid sA3G8YB9025138; Mon, 3 Nov 2014 09:08:52 -0700" ], "From": "\"Shreyas B. Prabhu\" <shreyas@linux.vnet.ibm.com>", "To": "linux-kernel@vger.kernel.org", "Subject": "[PATCH 3/4] powernv: cpuidle: Redesign idle states management", "Date": "Mon, 3 Nov 2014 21:38:29 +0530", "Message-Id": "<1415030910-5799-4-git-send-email-shreyas@linux.vnet.ibm.com>", "X-Mailer": "git-send-email 1.9.3", "In-Reply-To": "<1415030910-5799-1-git-send-email-shreyas@linux.vnet.ibm.com>", "References": "<1415030910-5799-1-git-send-email-shreyas@linux.vnet.ibm.com>", "X-TM-AS-MML": "disable", "X-Content-Scanned": "Fidelis XPS MAILER", "x-cbid": "14110316-0017-0000-0000-00000606130B", "Cc": "linux-pm@vger.kernel.org,\n\t\"Shreyas B. Prabhu\" <shreyas@linux.vnet.ibm.com>, \n\t\"Rafael J. Wysocki\" <rjw@rjwysocki.net>,\n\tPaul Mackerras <paulus@samba.org>, linuxppc-dev@lists.ozlabs.org", "X-BeenThere": "linuxppc-dev@lists.ozlabs.org", "X-Mailman-Version": "2.1.18", "Precedence": "list", "List-Id": "Linux on PowerPC Developers Mail List\n\t<linuxppc-dev.lists.ozlabs.org>", "List-Unsubscribe": "<https://lists.ozlabs.org/options/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=unsubscribe>", "List-Archive": "<http://lists.ozlabs.org/pipermail/linuxppc-dev/>", "List-Post": "<mailto:linuxppc-dev@lists.ozlabs.org>", "List-Help": "<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=help>", "List-Subscribe": "<https://lists.ozlabs.org/listinfo/linuxppc-dev>,\n\t<mailto:linuxppc-dev-request@lists.ozlabs.org?subject=subscribe>", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"utf-8\"", "Content-Transfer-Encoding": "base64", "Errors-To": "linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org", "Sender": "\"Linuxppc-dev\"\n\t<linuxppc-dev-bounces+patchwork-incoming=ozlabs.org@lists.ozlabs.org>" }, "content": "Deep idle states like sleep and winkle are per core idle states. A core\nenters these states only when all the threads enter either the\nparticular idle state or a deeper one. There are tasks like fastsleep\nhardware bug workaround and hypervisor core state save which have to be\ndone only by the last thread of the core entering deep idle state and\nsimilarly tasks like timebase resync, hypervisor core register restore\nthat have to be done only by the first thread waking up from these\nstate.\n\nThe current idle state management does not have a way to distinguish the\nfirst/last thread of the core waking/entering idle states. Tasks like\ntimebase resync are done for all the threads. This is not only is\nsuboptimal, but can cause functionality issues when subcores and kvm is\ninvolved.\n\nThis patch adds the necessary infrastructure to track idle states of\nthreads in a per-core structure. It uses this info to perform tasks like\nfastsleep workaround and timebase resync only once per core.\n\nSigned-off-by: Shreyas B. Prabhu <shreyas@linux.vnet.ibm.com>\nOriginally-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>\nCc: Benjamin Herrenschmidt <benh@kernel.crashing.org>\nCc: Paul Mackerras <paulus@samba.org>\nCc: Michael Ellerman <mpe@ellerman.id.au>\nCc: Rafael J. Wysocki <rjw@rjwysocki.net>\nCc: linux-pm@vger.kernel.org\nCc: linuxppc-dev@lists.ozlabs.org\n---\n arch/powerpc/include/asm/cpuidle.h | 14 ++\n arch/powerpc/include/asm/opal.h | 2 +\n arch/powerpc/include/asm/paca.h | 4 +\n arch/powerpc/kernel/asm-offsets.c | 4 +\n arch/powerpc/kernel/exceptions-64s.S | 20 ++-\n arch/powerpc/kernel/idle_power7.S | 183 +++++++++++++++++++------\n arch/powerpc/platforms/powernv/opal-wrappers.S | 37 +++++\n arch/powerpc/platforms/powernv/setup.c | 52 ++++++-\n arch/powerpc/platforms/powernv/smp.c | 3 +-\n drivers/cpuidle/cpuidle-powernv.c | 3 +-\n 10 files changed, 267 insertions(+), 55 deletions(-)\n create mode 100644 arch/powerpc/include/asm/cpuidle.h", "diff": "diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h\nnew file mode 100644\nindex 0000000..8c82850\n--- /dev/null\n+++ b/arch/powerpc/include/asm/cpuidle.h\n@@ -0,0 +1,14 @@\n+#ifndef _ASM_POWERPC_CPUIDLE_H\n+#define _ASM_POWERPC_CPUIDLE_H\n+\n+#ifdef CONFIG_PPC_POWERNV\n+/* Used in powernv idle state management */\n+#define PNV_THREAD_RUNNING 0\n+#define PNV_THREAD_NAP 1\n+#define PNV_THREAD_SLEEP 2\n+#define PNV_THREAD_WINKLE 3\n+#define PNV_CORE_IDLE_LOCK_BIT 0x100\n+#define PNV_CORE_IDLE_THREAD_BITS 0x0FF\n+#endif\n+\n+#endif\ndiff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h\nindex f8b95c0..bef7fbc 100644\n--- a/arch/powerpc/include/asm/opal.h\n+++ b/arch/powerpc/include/asm/opal.h\n@@ -152,6 +152,7 @@ struct opal_sg_list {\n #define OPAL_PCI_ERR_INJECT\t\t\t96\n #define OPAL_PCI_EEH_FREEZE_SET\t\t\t97\n #define OPAL_HANDLE_HMI\t\t\t\t98\n+#define OPAL_CONFIG_CPU_IDLE_STATE\t\t99\n #define OPAL_REGISTER_DUMP_REGION\t\t101\n #define OPAL_UNREGISTER_DUMP_REGION\t\t102\n \n@@ -162,6 +163,7 @@ struct opal_sg_list {\n */\n #define OPAL_PM_NAP_ENABLED\t0x00010000\n #define OPAL_PM_SLEEP_ENABLED\t0x00020000\n+#define OPAL_PM_SLEEP_ENABLED_ER1\t0x00080000\n \n #ifndef __ASSEMBLY__\n \ndiff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h\nindex a5139ea..85aeedb 100644\n--- a/arch/powerpc/include/asm/paca.h\n+++ b/arch/powerpc/include/asm/paca.h\n@@ -158,6 +158,10 @@ struct paca_struct {\n \t * early exception handler for use by high level C handler\n \t */\n \tstruct opal_machine_check_event *opal_mc_evt;\n+\n+\t/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */\n+\tu32 *core_idle_state_ptr;\n+\tu8 thread_idle_state;\t\t/* ~Idle[0]/Nap[1]/Sleep[2]/Winkle[3] */\n #endif\n #ifdef CONFIG_PPC_BOOK3S_64\n \t/* Exclusive emergency stack pointer for machine check exception. */\ndiff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c\nindex 9d7dede..50f299e 100644\n--- a/arch/powerpc/kernel/asm-offsets.c\n+++ b/arch/powerpc/kernel/asm-offsets.c\n@@ -731,6 +731,10 @@ int main(void)\n \tDEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0));\n \tDEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1));\n \tDEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt));\n+\tDEFINE(PACA_CORE_IDLE_STATE_PTR,\n+\t\t\toffsetof(struct paca_struct, core_idle_state_ptr));\n+\tDEFINE(PACA_THREAD_IDLE_STATE,\n+\t\t\toffsetof(struct paca_struct, thread_idle_state));\n #endif\n \n \treturn 0;\ndiff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S\nindex 72e783e..3311c8d 100644\n--- a/arch/powerpc/kernel/exceptions-64s.S\n+++ b/arch/powerpc/kernel/exceptions-64s.S\n@@ -15,6 +15,7 @@\n #include <asm/hw_irq.h>\n #include <asm/exception-64s.h>\n #include <asm/ptrace.h>\n+#include <asm/cpuidle.h>\n \n /*\n * We layout physical memory as follows:\n@@ -109,15 +110,19 @@ BEGIN_FTR_SECTION\n \trlwinm.\tr13,r13,47-31,30,31\n \tbeq\t9f\n \n-\t/* waking up from powersave (nap) state */\n \tcmpwi\tcr1,r13,2\n-\t/* Total loss of HV state is fatal, we could try to use the\n-\t * PIR to locate a PACA, then use an emergency stack etc...\n-\t * OPAL v3 based powernv platforms have new idle states\n-\t * which fall in this catagory.\n-\t */\n-\tbgt\tcr1,8f\n+\n \tGET_PACA(r13)\n+\tlbz\tr0,PACA_THREAD_IDLE_STATE(r13)\n+\tcmpwi cr2,r0,PNV_THREAD_NAP\n+\tbgt cr2,8f\t\t\t\t/* Either sleep or Winkle */\n+\n+\t/* Waking up from nap should not cause hypervisor state loss */\n+\tbgt\tcr1,.\n+\n+\t/* Waking up from nap */\n+\tli\tr0,PNV_THREAD_RUNNING\n+\tstb\tr0,PACA_THREAD_IDLE_STATE(r13)\t/* Clear thread state */\n \n #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE\n \tli\tr0,KVM_HWTHREAD_IN_KERNEL\n@@ -1386,6 +1391,7 @@ machine_check_handle_early:\n \tMACHINE_CHECK_HANDLER_WINDUP\n \tGET_PACA(r13)\n \tld\tr1,PACAR1(r13)\n+\tli\tr3,PNV_THREAD_NAP\n \tb\tpower7_enter_nap_mode\n 4:\n #endif\ndiff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S\nindex 283c603..df11acb 100644\n--- a/arch/powerpc/kernel/idle_power7.S\n+++ b/arch/powerpc/kernel/idle_power7.S\n@@ -18,6 +18,7 @@\n #include <asm/hw_irq.h>\n #include <asm/kvm_book3s_asm.h>\n #include <asm/opal.h>\n+#include <asm/cpuidle.h>\n \n #undef DEBUG\n \n@@ -123,12 +124,62 @@ power7_enter_nap_mode:\n \tli\tr4,KVM_HWTHREAD_IN_NAP\n \tstb\tr4,HSTATE_HWTHREAD_STATE(r13)\n #endif\n-\tcmpwi\tcr0,r3,1\n-\tbeq\t2f\n+\tstb\tr3,PACA_THREAD_IDLE_STATE(r13)\n+\tcmpwi\tcr1,r3,PNV_THREAD_SLEEP\n+\tbge\tcr1,2f\n \tIDLE_STATE_ENTER_SEQ(PPC_NAP)\n \t/* No return */\n-2:\tIDLE_STATE_ENTER_SEQ(PPC_SLEEP)\n-\t/* No return */\n+2:\n+\t/* Sleep or winkle */\n+\tli\tr7,1\n+\tmfspr\tr8,SPRN_PIR\n+\t/*\n+\t * The last 3 bits of PIR represents the thread id of a cpu\n+\t * in power8. This will need adjusting for power7.\n+\t */\n+\tandi.\tr8,r8,0x07\t\t\t/* Get thread id into r8 */\n+\trotld\tr7,r7,r8\n+\n+\tld\tr14,PACA_CORE_IDLE_STATE_PTR(r13)\n+lwarx_loop1:\n+\tlwarx\tr15,0,r14\n+\tandc\tr15,r15,r7\t\t\t/* Clear thread bit */\n+\n+\tandi.\tr15,r15,PNV_CORE_IDLE_THREAD_BITS\n+\tbeq\tlast_thread\n+\n+\t/* Not the last thread to goto sleep */\n+\tstwcx.\tr15,0,r14\n+\tbne-\tlwarx_loop1\n+\tb\tcommon_enter\n+\n+last_thread:\n+\tLOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)\n+\tlbz\tr3,0(r3)\n+\tcmpwi\tr3,1\n+\tbne\tcommon_enter\n+\t/*\n+\t * Last thread of the core entering sleep. Last thread needs to execute\n+\t * the hardware bug workaround code. Before that, set the lock bit to\n+\t * avoid the race of other threads waking up and undoing workaround\n+\t * before workaround is applied.\n+\t */\n+\tori\tr15,r15,PNV_CORE_IDLE_LOCK_BIT\n+\tstwcx.\tr15,0,r14\n+\tbne-\tlwarx_loop1\n+\n+\t/* Fast sleep workaround */\n+\tli\tr3,1\n+\tli\tr4,1\n+\tli\tr0,OPAL_CONFIG_CPU_IDLE_STATE\n+\tbl\topal_call_realmode\n+\n+\t/* Clear Lock bit */\n+\tandi.\tr15,r15,PNV_CORE_IDLE_THREAD_BITS\n+\tstw\tr15,0(r14)\n+\n+common_enter: /* common code for all the threads entering sleep */\n+\tIDLE_STATE_ENTER_SEQ(PPC_SLEEP)\n \n _GLOBAL(power7_idle)\n \t/* Now check if user or arch enabled NAP mode */\n@@ -141,49 +192,16 @@ _GLOBAL(power7_idle)\n \n _GLOBAL(power7_nap)\n \tmr\tr4,r3\n-\tli\tr3,0\n+\tli\tr3,1\n \tb\tpower7_powersave_common\n \t/* No return */\n \n _GLOBAL(power7_sleep)\n-\tli\tr3,1\n+\tli\tr3,2\n \tli\tr4,1\n \tb\tpower7_powersave_common\n \t/* No return */\n \n-/*\n- * Make opal call in realmode. This is a generic function to be called\n- * from realmode from reset vector. It handles endianess.\n- *\n- * r13 - paca pointer\n- * r1 - stack pointer\n- * r3 - opal token\n- */\n-opal_call_realmode:\n-\tmflr\tr12\n-\tstd\tr12,_LINK(r1)\n-\tld\tr2,PACATOC(r13)\n-\t/* Set opal return address */\n-\tLOAD_REG_ADDR(r0,return_from_opal_call)\n-\tmtlr\tr0\n-\t/* Handle endian-ness */\n-\tli\tr0,MSR_LE\n-\tmfmsr\tr12\n-\tandc\tr12,r12,r0\n-\tmtspr\tSPRN_HSRR1,r12\n-\tmr\tr0,r3\t\t\t/* Move opal token to r0 */\n-\tLOAD_REG_ADDR(r11,opal)\n-\tld\tr12,8(r11)\n-\tld\tr2,0(r11)\n-\tmtspr\tSPRN_HSRR0,r12\n-\thrfid\n-\n-return_from_opal_call:\n-\tFIXUP_ENDIAN\n-\tld\tr0,_LINK(r1)\n-\tmtlr\tr0\n-\tblr\n-\n #define CHECK_HMI_INTERRUPT\t\t\t\t\t\t\\\n \tmfspr\tr0,SPRN_SRR1;\t\t\t\t\t\t\\\n BEGIN_FTR_SECTION_NESTED(66);\t\t\t\t\t\t\\\n@@ -196,10 +214,8 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);\t\t\\\n \t/* Invoke opal call to handle hmi */\t\t\t\t\\\n \tld\tr2,PACATOC(r13);\t\t\t\t\t\\\n \tld\tr1,PACAR1(r13);\t\t\t\t\t\t\\\n-\tstd\tr3,ORIG_GPR3(r1);\t/* Save original r3 */\t\t\\\n-\tli\tr3,OPAL_HANDLE_HMI;\t/* Pass opal token argument*/\t\\\n+\tli\tr0,OPAL_HANDLE_HMI;\t/* Pass opal token argument*/\t\\\n \tbl\topal_call_realmode;\t\t\t\t\t\\\n-\tld\tr3,ORIG_GPR3(r1);\t/* Restore original r3 */\t\\\n 20:\tnop;\n \n \n@@ -210,12 +226,91 @@ _GLOBAL(power7_wakeup_tb_loss)\n BEGIN_FTR_SECTION\n \tCHECK_HMI_INTERRUPT\n END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)\n+\n+\tli\tr7,1\n+\tmfspr\tr8,SPRN_PIR\n+\t/*\n+\t * The last 3 bits of PIR represents the thread id of a cpu\n+\t * in power8. This will need adjusting for power7.\n+\t */\n+\tandi.\tr8,r8,0x07\t\t/* Get thread id into r8 */\n+\trotld\tr7,r7,r8\n+\t/* r7 now has 'thread_id'th bit set */\n+\n+\tld\tr14,PACA_CORE_IDLE_STATE_PTR(r13)\n+lwarx_loop2:\n+\tlwarx\tr15,0,r14\n+\tandi.\tr9,r15,PNV_CORE_IDLE_LOCK_BIT\n+\t/*\n+\t * Lock bit is set in one of the 2 cases-\n+\t * a. In the sleep/winkle enter path, the last thread is executing\n+\t * fastsleep workaround code.\n+\t * b. In the wake up path, another thread is executing fastsleep\n+\t * workaround undo code or resyncing timebase or restoring context\n+\t * In either case loop until the lock bit is cleared.\n+\t */\n+\tbne\tlwarx_loop2\n+\n+\tcmpwi\tcr2,r15,0\n+\tor\tr15,r15,r7\t\t/* Set thread bit */\n+\n+\tbeq\tcr2,first_thread\n+\n+\t/* Not first thread in core to wake up */\n+\tstwcx.\tr15,0,r14\n+\tbne-\tlwarx_loop2\n+\tb\tcommon_exit\n+\n+first_thread:\n+\t/* First thread in core to wakeup */\n+\tori\tr15,r15,PNV_CORE_IDLE_LOCK_BIT\n+\tstwcx.\tr15,0,r14\n+\tbne-\tlwarx_loop2\n+\n+\tLOAD_REG_ADDR(r3, pnv_need_fastsleep_workaround)\n+\tlbz\tr3,0(r3)\n+\tcmpwi\tr3,1\n+\t/* skip fastsleep workaround if its not needed */\n+\tbne\ttimebase_resync\n+\n+\t/* Undo fast sleep workaround */\n+\tmfcr\tr16\t/* Backup CR into a non-volatile register */\n+\tli\tr3,1\n+\tli\tr4,0\n+\tli\tr0,OPAL_CONFIG_CPU_IDLE_STATE\n+\tbl\topal_call_realmode\n+\tmtcr\tr16\t/* Restore CR */\n+\n+\t/* Do timebase resync if we are waking up from sleep. Use cr1 value\n+\t * set in exceptions-64s.S */\n+\tble\tcr1,clear_lock\n+\n+timebase_resync:\n \t/* Time base re-sync */\n-\tli\tr3,OPAL_RESYNC_TIMEBASE\n+\tli\tr0,OPAL_RESYNC_TIMEBASE\n \tbl\topal_call_realmode;\n-\n \t/* TODO: Check r3 for failure */\n \n+clear_lock:\n+\tandi.\tr15,r15,PNV_CORE_IDLE_THREAD_BITS\n+\tstw\tr15,0(r14)\n+\n+common_exit:\n+\tli\tr5,PNV_THREAD_RUNNING\n+\tstb r5,PACA_THREAD_IDLE_STATE(r13)\n+\n+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE\n+\tli r0,KVM_HWTHREAD_IN_KERNEL\n+\tstb r0,HSTATE_HWTHREAD_STATE(r13)\n+\t/* Order setting hwthread_state vs. testing hwthread_req */\n+\tsync\n+\tlbz r0,HSTATE_HWTHREAD_REQ(r13)\n+\tcmpwi r0,0\n+\tbeq 6f\n+\tb kvm_start_guest\n+6:\n+#endif\n+\n \tREST_NVGPRS(r1)\n \tREST_GPR(2, r1)\n \tld\tr3,_CCR(r1)\ndiff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S\nindex feb549a..b2aa93b 100644\n--- a/arch/powerpc/platforms/powernv/opal-wrappers.S\n+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S\n@@ -158,6 +158,43 @@ opal_tracepoint_return:\n \tblr\n #endif\n \n+/*\n+ * Make opal call in realmode. This is a generic function to be called\n+ * from realmode. It handles endianness.\n+ *\n+ * r13 - paca pointer\n+ * r1 - stack pointer\n+ * r0 - opal token\n+ */\n+_GLOBAL(opal_call_realmode)\n+\tmflr\tr12\n+\tstd\tr12,_LINK(r1)\n+\tld\tr2,PACATOC(r13)\n+\t/* Set opal return address */\n+\tLOAD_REG_ADDR(r12,return_from_opal_call)\n+\tmtlr\tr12\n+\n+\tmfmsr\tr12\n+#ifdef __LITTLE_ENDIAN__\n+\t/* Handle endian-ness */\n+\tli\tr11,MSR_LE\n+\tandc\tr12,r12,r11\n+#endif\n+\tmtspr\tSPRN_HSRR1,r12\n+\tLOAD_REG_ADDR(r11,opal)\n+\tld\tr12,8(r11)\n+\tld\tr2,0(r11)\n+\tmtspr\tSPRN_HSRR0,r12\n+\thrfid\n+\n+return_from_opal_call:\n+#ifdef __LITTLE_ENDIAN__\n+\tFIXUP_ENDIAN\n+#endif\n+\tld\tr12,_LINK(r1)\n+\tmtlr\tr12\n+\tblr\n+\n OPAL_CALL(opal_invalid_call,\t\t\tOPAL_INVALID_CALL);\n OPAL_CALL(opal_console_write,\t\t\tOPAL_CONSOLE_WRITE);\n OPAL_CALL(opal_console_read,\t\t\tOPAL_CONSOLE_READ);\ndiff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c\nindex 34c6665..980c964 100644\n--- a/arch/powerpc/platforms/powernv/setup.c\n+++ b/arch/powerpc/platforms/powernv/setup.c\n@@ -36,6 +36,8 @@\n #include <asm/opal.h>\n #include <asm/kexec.h>\n #include <asm/smp.h>\n+#include <asm/cputhreads.h>\n+#include <asm/cpuidle.h>\n \n #include \"powernv.h\"\n \n@@ -292,11 +294,55 @@ static void __init pnv_setup_machdep_rtas(void)\n \n static u32 supported_cpuidle_states;\n \n+static void pnv_alloc_idle_core_states(void)\n+{\n+\tint i, j;\n+\tint nr_cores = cpu_nr_cores();\n+\tu32 *core_idle_state;\n+\n+\t/*\n+\t * Deep idle states like sleep and winkle are per core idle states.\n+\t * A core enters these states only when all the threads enter either\n+\t * the particular idle state or a deeper one. There are tasks like\n+\t * fastsleep hardware bug workaround and hypervisor core state save\n+\t * which have to be done only by the last thread of the core entering\n+\t * deep idle state and similarly tasks like timebase resync, hypervisor\n+\t * core register restore that have to be done only by the first thread\n+\t * waking up from these states. Introducing core_idle_state, a per core\n+\t * structure which will keep track threads entering idle states deeper\n+\t * than sleep.\n+\t * core_idle_state - First 8 bits track the idle state of each thread\n+\t * of the core. The 8th bit is the lock bit. Initially all thread bits\n+\t * are set. They are cleared when the thread enters deep idle state\n+\t * like sleep and winkle. Initially the lock bit is cleared.\n+\t * The lock bit has 2 purposes\n+\t * a. While the first thread is restoring core state, it prevents\n+\t * from other threads in the core from switching to prcoess context.\n+\t * b. While the last thread in the core is saving the core state, it\n+\t * prevent a different thread from waking up.\n+\t */\n+\tfor (i = 0; i < nr_cores; i++) {\n+\t\tint first_cpu = i * threads_per_core;\n+\t\tint node = cpu_to_node(first_cpu);\n+\n+\t\tcore_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);\n+\t\tfor (j = 0; j < threads_per_core; j++) {\n+\t\t\tint cpu = first_cpu + j;\n+\n+\t\t\tpaca[cpu].core_idle_state_ptr = core_idle_state;\n+\t\t\tpaca[cpu].thread_idle_state = PNV_THREAD_RUNNING;\n+\n+\t\t}\n+\t}\n+}\n+\n u32 pnv_get_supported_cpuidle_states(void)\n {\n \treturn supported_cpuidle_states;\n }\n+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);\n \n+u8 pnv_need_fastsleep_workaround;\n static int __init pnv_init_idle_states(void)\n {\n \tstruct device_node *power_mgt;\n@@ -306,6 +352,7 @@ static int __init pnv_init_idle_states(void)\n \tint i;\n \n \tsupported_cpuidle_states = 0;\n+\tpnv_need_fastsleep_workaround = 0;\n \n \tif (cpuidle_disable != IDLE_NO_OVERRIDE)\n \t\treturn 0;\n@@ -332,13 +379,14 @@ static int __init pnv_init_idle_states(void)\n \t\tflags = be32_to_cpu(idle_state_flags[i]);\n \t\tsupported_cpuidle_states |= flags;\n \t}\n-\n+\tif (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)\n+\t\tpnv_need_fastsleep_workaround = 1;\n+\tpnv_alloc_idle_core_states();\n \treturn 0;\n }\n \n subsys_initcall(pnv_init_idle_states);\n \n-\n static int __init pnv_probe(void)\n {\n \tunsigned long root = of_get_flat_dt_root();\ndiff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c\nindex 3dc4cec..12b761a 100644\n--- a/arch/powerpc/platforms/powernv/smp.c\n+++ b/arch/powerpc/platforms/powernv/smp.c\n@@ -167,7 +167,8 @@ static void pnv_smp_cpu_kill_self(void)\n \tmtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);\n \twhile (!generic_check_cpu_restart(cpu)) {\n \t\tppc64_runlatch_off();\n-\t\tif (idle_states & OPAL_PM_SLEEP_ENABLED)\n+\t\tif ((idle_states & OPAL_PM_SLEEP_ENABLED) ||\n+\t\t\t\t(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))\n \t\t\tpower7_sleep();\n \t\telse\n \t\t\tpower7_nap(1);\ndiff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c\nindex 0a7d827..a489b56 100644\n--- a/drivers/cpuidle/cpuidle-powernv.c\n+++ b/drivers/cpuidle/cpuidle-powernv.c\n@@ -208,7 +208,8 @@ static int powernv_add_idle_states(void)\n \t\t\tnr_idle_states++;\n \t\t}\n \n-\t\tif (flags & OPAL_PM_SLEEP_ENABLED) {\n+\t\tif (flags & OPAL_PM_SLEEP_ENABLED ||\n+\t\t\tflags & OPAL_PM_SLEEP_ENABLED_ER1) {\n \t\t\t/* Add FASTSLEEP state */\n \t\t\tstrcpy(powernv_states[nr_idle_states].name, \"FastSleep\");\n \t\t\tstrcpy(powernv_states[nr_idle_states].desc, \"FastSleep\");\n", "prefixes": [ "3/4" ] }