Message ID | 20200427021027.114582-1-huntbag@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [RFC,1/3] powernv/cpuidle : Support for pre-entry and post exit of stop state in firmware | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch powerpc/merge (54dc28ff5e0b3585224d49a31b53e030342ca5c3) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 0 warnings, 4 checks, 132 lines checked |
snowpatch_ozlabs/needsstable | success | Patch has no Fixes tags |
Hi Abhishek, On Sun, Apr 26, 2020 at 09:10:25PM -0500, Abhishek Goel wrote: > This patch provides kernel framework fro opal support of save restore > of sprs in idle stop loop. Opal support for stop states is needed to > selectively enable stop states or to introduce a quirk quickly in case > a buggy stop state is present. > > We make a opal call from kernel if firmware-stop-support for stop > states is present and enabled. All the quirks for pre-entry of stop > state is handled inside opal. A call from opal is made into kernel > where we execute stop afer saving of NVGPRs. > After waking up from 0x100 vector in kernel, we enter back into opal. > All the quirks in post exit path, if any, are then handled in opal, > from where we return successfully back to kernel. > For deep stop states in which additional SPRs are lost, saving and > restoration will be done in OPAL. > > This idea was first proposed by Nick here: > https://patchwork.ozlabs.org/patch/1208159/ > > The corresponding skiboot patch for this kernel patch is here: > https://patchwork.ozlabs.org/project/skiboot/list/?series=172831 > > When we callback from OPAL into kernel, r13 is clobbered. So, to > access PACA we need to restore it from HSPRGO. In future we can > handle this into OPAL as in here: > https://patchwork.ozlabs.org/patch/1245275/ > > Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- > > v1->v2 : No change in this patch. > > arch/powerpc/include/asm/opal-api.h | 8 ++++- > arch/powerpc/include/asm/opal.h | 3 ++ > arch/powerpc/kernel/idle_book3s.S | 5 +++ > arch/powerpc/platforms/powernv/idle.c | 37 ++++++++++++++++++++++ > arch/powerpc/platforms/powernv/opal-call.c | 2 ++ > 5 files changed, 54 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h > index c1f25a760eb1..a2c782c99c9e 100644 > --- a/arch/powerpc/include/asm/opal-api.h > +++ b/arch/powerpc/include/asm/opal-api.h > @@ -214,7 +214,9 @@ > #define OPAL_SECVAR_GET 176 > #define OPAL_SECVAR_GET_NEXT 177 > #define OPAL_SECVAR_ENQUEUE_UPDATE 178 > -#define OPAL_LAST 178 > +#define OPAL_REGISTER_OS_OPS 181 > +#define OPAL_CPU_IDLE 182 > +#define OPAL_LAST 182 > > #define QUIESCE_HOLD 1 /* Spin all calls at entry */ > #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ > @@ -1181,6 +1183,10 @@ struct opal_mpipl_fadump { > struct opal_mpipl_region region[]; > } __packed; > > +struct opal_os_ops { > + __be64 os_idle_stop; > +}; > + > #endif /* __ASSEMBLY__ */ > > #endif /* __OPAL_API_H */ > diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h > index 9986ac34b8e2..3c340bc4df8e 100644 > --- a/arch/powerpc/include/asm/opal.h > +++ b/arch/powerpc/include/asm/opal.h > @@ -400,6 +400,9 @@ void opal_powercap_init(void); > void opal_psr_init(void); > void opal_sensor_groups_init(void); > > +extern int64_t opal_register_os_ops(struct opal_os_ops *os_ops); > +extern int64_t opal_cpu_idle(__be64 srr1_addr, uint64_t psscr); > + > #endif /* __ASSEMBLY__ */ > > #endif /* _ASM_POWERPC_OPAL_H */ > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S > index 22f249b6f58d..8d287d1d06c0 100644 > --- a/arch/powerpc/kernel/idle_book3s.S > +++ b/arch/powerpc/kernel/idle_book3s.S > @@ -49,6 +49,8 @@ _GLOBAL(isa300_idle_stop_noloss) > */ > _GLOBAL(isa300_idle_stop_mayloss) > mtspr SPRN_PSSCR,r3 > + mr r6, r13 > + mfspr r13, SPRN_HSPRG0 > std r1,PACAR1(r13) > mflr r4 > mfcr r5 > @@ -74,6 +76,7 @@ _GLOBAL(isa300_idle_stop_mayloss) > std r31,-8*18(r1) > std r4,-8*19(r1) > std r5,-8*20(r1) > + std r6,-8*21(r1) > /* 168 bytes */ > PPC_STOP > b . /* catch bugs */ > @@ -91,8 +94,10 @@ _GLOBAL(idle_return_gpr_loss) > ld r1,PACAR1(r13) > ld r4,-8*19(r1) > ld r5,-8*20(r1) > + ld r6,-8*21(r1) > mtlr r4 > mtcr r5 > + mr r13,r6 > /* > * KVM nap requires r2 to be saved, rather than just restoring it > * from PACATOC. This could be avoided for that less common case > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c > index 78599bca66c2..1841027b25c5 100644 > --- a/arch/powerpc/platforms/powernv/idle.c > +++ b/arch/powerpc/platforms/powernv/idle.c > @@ -35,6 +35,7 @@ > static u32 supported_cpuidle_states; > struct pnv_idle_states_t *pnv_idle_states; > int nr_pnv_idle_states; > +static bool firmware_stop_supported; > > /* > * The default stop state that will be used by ppc_md.power_save > @@ -602,6 +603,25 @@ struct p9_sprs { > u64 uamor; > }; > > +/* > + * This function is called from OPAL if firmware support for stop > + * states is present and enabled. It provides a fallback for idle > + * stop states via OPAL. > + */ > +static uint64_t os_idle_stop(uint64_t psscr, bool save_gprs) > +{ > + /* > + * For lite state which does not lose even GPRS we call > + * idle_stop_noloss while for all other states we call > + * idle_stop_mayloss. Saving and restoration of other additional > + * SPRs if required is handled in OPAL. All the quirks are also > + * handled in OPAL. > + */ > + if (!save_gprs) > + return isa300_idle_stop_noloss(psscr); I think PSSCR[ESL|EC] = 0 case is an overkill to go into OPAL and come back via a callback. That can be handled in the kernel itself. > + return isa300_idle_stop_mayloss(psscr); > +} > + > static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) > { > int cpu = raw_smp_processor_id(); > @@ -613,6 +633,16 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) > unsigned long mmcr0 = 0; > struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ > bool sprs_saved = false; > + int rc = 0; > + > + /* > + * Kernel takes decision whether to make OPAL call or not. This logic > + * will be combined with the logic for BE opal to take decision. > + */ > + if (firmware_stop_supported) { > + rc = opal_cpu_idle(cpu_to_be64(__pa(&srr1)), (uint64_t) psscr); Couple of comments here. 1) If PSSCR[ESL|EC] = 0, the current code expects mmu_on=true. When we make an OPAL call and come back into the kernel via the callback today, we will be in real-mode, with mmu turned off. 2) You seem to be choosing the opal cpuidle support as the default case, and not as a fallback. Thus, with this patch you will miss out on the deep stop-state support. > + goto out; > + } > > if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { > /* EC=ESL=0 case */ > @@ -1232,6 +1262,10 @@ static int pnv_parse_cpuidle_dt(void) > pr_warn("opal: PowerMgmt Node not found\n"); > return -ENODEV; > } > + > + if (of_device_is_compatible(np, "firmware-stop-supported")) > + firmware_stop_supported = true; IMO, at least for POWER9 generation, you need to do this only when "idle-stop" device-tree cpu-feature is unavailable. > + > nr_idle_states = of_property_count_u32_elems(np, > "ibm,cpu-idle-state-flags"); > > @@ -1326,6 +1360,7 @@ static int pnv_parse_cpuidle_dt(void) > > static int __init pnv_init_idle_states(void) > { > + struct opal_os_ops os_ops; > int cpu; > int rc = 0; > > @@ -1349,6 +1384,8 @@ static int __init pnv_init_idle_states(void) > } > } > > + os_ops.os_idle_stop = be64_to_cpu(os_idle_stop); > + rc = opal_register_os_ops((struct opal_os_ops *)(&os_ops)); > /* In case we error out nr_pnv_idle_states will be zero */ > nr_pnv_idle_states = 0; > supported_cpuidle_states = 0; > diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c > index 5cd0f52d258f..c885e607ba62 100644 > --- a/arch/powerpc/platforms/powernv/opal-call.c > +++ b/arch/powerpc/platforms/powernv/opal-call.c > @@ -293,3 +293,5 @@ OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG); > OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET); > OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT); > OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE); > +OPAL_CALL(opal_register_os_ops, OPAL_REGISTER_OS_OPS); > +OPAL_CALL(opal_cpu_idle, OPAL_CPU_IDLE); > -- > 2.17.1 >
Thanks for picking this up and pushing it along. I do plan to come back and take another look at it all, but what we do need to do first is get a coherent approach to this proposed new calling convention and OS ops. It's fine to work on this in the meantime, but to start merging things my idea is: - OPAL must leave r13-r15 untouched for the OS. - OS ops are made available only for a "v4" OS that uses the new calling convention, including kernel stack. - OS ops baseline (all OSes must provide) will be console / printk facility, trap handling and crash/symbol decoding on behalf of OPAL, and runtime virtual memory. Other OS ops features can be added in the versioned structure, including this. I'm trying to get back to cleaning these things up and start getting them merged now. Any comments or review on those would be helpful. Thanks, Nick
Hi Nick, Have you posted out the kernel side of "opal v4" patchset? I could only find the opal patchset. Thanks, Abhishek On 04/28/2020 06:38 AM, Nicholas Piggin wrote: > Thanks for picking this up and pushing it along. I do plan to come back > and take another look at it all, but what we do need to do first is get > a coherent approach to this proposed new calling convention and OS ops. > > It's fine to work on this in the meantime, but to start merging things > my idea is: > > - OPAL must leave r13-r15 untouched for the OS. > - OS ops are made available only for a "v4" OS that uses the new > calling convention, including kernel stack. > - OS ops baseline (all OSes must provide) will be console / printk > facility, trap handling and crash/symbol decoding on behalf of OPAL, > and runtime virtual memory. > > Other OS ops features can be added in the versioned structure, including > this. > > I'm trying to get back to cleaning these things up and start getting > them merged now. Any comments or review on those would be helpful. > > Thanks, > Nick >
Excerpts from Abhishek's message of April 30, 2020 3:52 pm: > Hi Nick, > > Have you posted out the kernel side of "opal v4" patchset? > I could only find the opal patchset. I just posted some new ones. I have some change sfor the cpuidle side but I haven't really looked to see what needs reconciling with your version, but I'll try to do that when I get time. Thanks, Nick
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index c1f25a760eb1..a2c782c99c9e 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -214,7 +214,9 @@ #define OPAL_SECVAR_GET 176 #define OPAL_SECVAR_GET_NEXT 177 #define OPAL_SECVAR_ENQUEUE_UPDATE 178 -#define OPAL_LAST 178 +#define OPAL_REGISTER_OS_OPS 181 +#define OPAL_CPU_IDLE 182 +#define OPAL_LAST 182 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ @@ -1181,6 +1183,10 @@ struct opal_mpipl_fadump { struct opal_mpipl_region region[]; } __packed; +struct opal_os_ops { + __be64 os_idle_stop; +}; + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 9986ac34b8e2..3c340bc4df8e 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -400,6 +400,9 @@ void opal_powercap_init(void); void opal_psr_init(void); void opal_sensor_groups_init(void); +extern int64_t opal_register_os_ops(struct opal_os_ops *os_ops); +extern int64_t opal_cpu_idle(__be64 srr1_addr, uint64_t psscr); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 22f249b6f58d..8d287d1d06c0 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -49,6 +49,8 @@ _GLOBAL(isa300_idle_stop_noloss) */ _GLOBAL(isa300_idle_stop_mayloss) mtspr SPRN_PSSCR,r3 + mr r6, r13 + mfspr r13, SPRN_HSPRG0 std r1,PACAR1(r13) mflr r4 mfcr r5 @@ -74,6 +76,7 @@ _GLOBAL(isa300_idle_stop_mayloss) std r31,-8*18(r1) std r4,-8*19(r1) std r5,-8*20(r1) + std r6,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -91,8 +94,10 @@ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) ld r4,-8*19(r1) ld r5,-8*20(r1) + ld r6,-8*21(r1) mtlr r4 mtcr r5 + mr r13,r6 /* * KVM nap requires r2 to be saved, rather than just restoring it * from PACATOC. This could be avoided for that less common case diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 78599bca66c2..1841027b25c5 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -35,6 +35,7 @@ static u32 supported_cpuidle_states; struct pnv_idle_states_t *pnv_idle_states; int nr_pnv_idle_states; +static bool firmware_stop_supported; /* * The default stop state that will be used by ppc_md.power_save @@ -602,6 +603,25 @@ struct p9_sprs { u64 uamor; }; +/* + * This function is called from OPAL if firmware support for stop + * states is present and enabled. It provides a fallback for idle + * stop states via OPAL. + */ +static uint64_t os_idle_stop(uint64_t psscr, bool save_gprs) +{ + /* + * For lite state which does not lose even GPRS we call + * idle_stop_noloss while for all other states we call + * idle_stop_mayloss. Saving and restoration of other additional + * SPRs if required is handled in OPAL. All the quirks are also + * handled in OPAL. + */ + if (!save_gprs) + return isa300_idle_stop_noloss(psscr); + return isa300_idle_stop_mayloss(psscr); +} + static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) { int cpu = raw_smp_processor_id(); @@ -613,6 +633,16 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) unsigned long mmcr0 = 0; struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ bool sprs_saved = false; + int rc = 0; + + /* + * Kernel takes decision whether to make OPAL call or not. This logic + * will be combined with the logic for BE opal to take decision. + */ + if (firmware_stop_supported) { + rc = opal_cpu_idle(cpu_to_be64(__pa(&srr1)), (uint64_t) psscr); + goto out; + } if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { /* EC=ESL=0 case */ @@ -1232,6 +1262,10 @@ static int pnv_parse_cpuidle_dt(void) pr_warn("opal: PowerMgmt Node not found\n"); return -ENODEV; } + + if (of_device_is_compatible(np, "firmware-stop-supported")) + firmware_stop_supported = true; + nr_idle_states = of_property_count_u32_elems(np, "ibm,cpu-idle-state-flags"); @@ -1326,6 +1360,7 @@ static int pnv_parse_cpuidle_dt(void) static int __init pnv_init_idle_states(void) { + struct opal_os_ops os_ops; int cpu; int rc = 0; @@ -1349,6 +1384,8 @@ static int __init pnv_init_idle_states(void) } } + os_ops.os_idle_stop = be64_to_cpu(os_idle_stop); + rc = opal_register_os_ops((struct opal_os_ops *)(&os_ops)); /* In case we error out nr_pnv_idle_states will be zero */ nr_pnv_idle_states = 0; supported_cpuidle_states = 0; diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index 5cd0f52d258f..c885e607ba62 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -293,3 +293,5 @@ OPAL_CALL(opal_mpipl_query_tag, OPAL_MPIPL_QUERY_TAG); OPAL_CALL(opal_secvar_get, OPAL_SECVAR_GET); OPAL_CALL(opal_secvar_get_next, OPAL_SECVAR_GET_NEXT); OPAL_CALL(opal_secvar_enqueue_update, OPAL_SECVAR_ENQUEUE_UPDATE); +OPAL_CALL(opal_register_os_ops, OPAL_REGISTER_OS_OPS); +OPAL_CALL(opal_cpu_idle, OPAL_CPU_IDLE);