Message ID | 562FC1E8.9040902@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
On Tue, 2015-10-27 at 13:26 -0500, Nathan Fontenot wrote: > Add the ability to dlpar remove CPUs via hotplug rtas events, either by > specifying the drc-index of the CPU to remove or providing a count of cpus > to remove. > > diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c > index f080e81..635f0ba 100644 > --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c > +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c > @@ -570,6 +571,143 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) > return 0; > } > > +static struct device_node *cpu_drc_index_to_dn(u32 drc_index) > +{ > + struct device_node *dn; > + u32 my_index; > + int rc; > + > + for_each_node_by_type(dn, "cpu") { > + rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index); > + if (rc) > + continue; > + > + if (my_index == drc_index) > + break; > + } > + > + return dn; > +} > + > +static int dlpar_cpu_remove_by_index(u32 drc_index) > +{ > + struct device_node *dn; > + int rc; > + > + dn = cpu_drc_index_to_dn(drc_index); > + if (!dn) > + return -ENODEV; > + > + rc = dlpar_cpu_remove(dn, drc_index); > + of_node_put(dn); Does dlpar_cpu_remove() work when you still hold a reference to the dn? It looks like dlpar_detach_node() does an of_node_put() also. > + return rc; > +} > + > +static u32 *dlpar_cpus_to_remove(int cpus_to_remove) > +{ > + struct device_node *dn; > + u32 *cpu_drcs; > + int cpus_found = 0; > + int i, rc; > + > + cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL); > + if (!cpu_drcs) > + return NULL; > + > + i = 0; > + for_each_node_by_type(dn, "cpu") { > + cpus_found++; > + > + if (cpus_found > cpus_to_remove) { > + of_node_put(dn); > + break; > + } > + > + rc = of_property_read_u32(dn, "ibm,my-drc-index", > + &cpu_drcs[i++]); > + if (rc) { > + of_node_put(dn); > + break; I'm not sure about the logic here with cpus_found and i. If you break here cpus_found will be 1, but you found zero, which seems odd. If instead you delayed the increment of i: rc = of_property_read_u32(dn, "ibm,my-drc-index", &cpu_drcs[i]); if (rc) { of_node_put(dn); break; } i++; } Then i would equal the number of cpus found at all times. If you need to count one more in the if below you can just do that there. > + > + /* We want to find cpus_to_remove + 1 CPUs to ensure we do not > + * remove the last CPU. > + */ > + if (cpus_found <= cpus_to_remove) { > + pr_warn("Failed to find enough CPUs (%d of %d) to remove\n", > + cpus_found, cpus_to_remove); > + kfree(cpu_drcs); > + cpu_drcs = NULL; > + } On my two cpu system when I do "cpu remove count 1" this always says: pseries-hotplug-cpu: Failed to find enough CPUs (1 of 1) to remove Which confuses me. I suspect that's because I actually have one cpu *node*, which appears to Linux as two cpus (due to SMT). So I think it's working as expected, but it's not very clear from a user's perspective. cheers
On 11/25/2015 11:02 PM, Michael Ellerman wrote: > On Tue, 2015-10-27 at 13:26 -0500, Nathan Fontenot wrote: > >> Add the ability to dlpar remove CPUs via hotplug rtas events, either by >> specifying the drc-index of the CPU to remove or providing a count of cpus >> to remove. >> >> diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c >> index f080e81..635f0ba 100644 >> --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c >> +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c >> @@ -570,6 +571,143 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) >> return 0; >> } >> >> +static struct device_node *cpu_drc_index_to_dn(u32 drc_index) >> +{ >> + struct device_node *dn; >> + u32 my_index; >> + int rc; >> + >> + for_each_node_by_type(dn, "cpu") { >> + rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index); >> + if (rc) >> + continue; >> + >> + if (my_index == drc_index) >> + break; >> + } >> + >> + return dn; >> +} >> + >> +static int dlpar_cpu_remove_by_index(u32 drc_index) >> +{ >> + struct device_node *dn; >> + int rc; >> + >> + dn = cpu_drc_index_to_dn(drc_index); >> + if (!dn) >> + return -ENODEV; >> + >> + rc = dlpar_cpu_remove(dn, drc_index); >> + of_node_put(dn); > > Does dlpar_cpu_remove() work when you still hold a reference to the dn? Yes, this works while holding the dn reference here. > > It looks like dlpar_detach_node() does an of_node_put() also. Correct. The of_node_put() in dlpar_detach_node() is there to do a put from the initial node creation, without this the reference count would never go to zero and the node would not be released. > >> + return rc; >> +} >> + >> +static u32 *dlpar_cpus_to_remove(int cpus_to_remove) >> +{ >> + struct device_node *dn; >> + u32 *cpu_drcs; >> + int cpus_found = 0; >> + int i, rc; >> + >> + cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL); >> + if (!cpu_drcs) >> + return NULL; >> + >> + i = 0; >> + for_each_node_by_type(dn, "cpu") { >> + cpus_found++; >> + >> + if (cpus_found > cpus_to_remove) { >> + of_node_put(dn); >> + break; >> + } >> + >> + rc = of_property_read_u32(dn, "ibm,my-drc-index", >> + &cpu_drcs[i++]); >> + if (rc) { >> + of_node_put(dn); >> + break; > > I'm not sure about the logic here with cpus_found and i. If you break here > cpus_found will be 1, but you found zero, which seems odd. > Agreed, it is a bit odd. The cpus_found var is meant to count the number of cpus we find when looping through for_each_node_by_type() whereas i is just meant to be an index into the cpu_cars array so we can save the drc-index of the cpus we find. The variable i is just to index the array, nothing more. Perhaps instead of having i to use as an index into the cpu_drcs array I could just use [cpus_found - 1] to index cpu_drcs and get rid of i. Not sure if that makes the code any easier to read. > If instead you delayed the increment of i: > > rc = of_property_read_u32(dn, "ibm,my-drc-index", > &cpu_drcs[i]); > if (rc) { > of_node_put(dn); > break; > } > > i++; > } > > Then i would equal the number of cpus found at all times. If you need to count > one more in the if below you can just do that there. > >> + >> + /* We want to find cpus_to_remove + 1 CPUs to ensure we do not >> + * remove the last CPU. >> + */ >> + if (cpus_found <= cpus_to_remove) { >> + pr_warn("Failed to find enough CPUs (%d of %d) to remove\n", >> + cpus_found, cpus_to_remove); >> + kfree(cpu_drcs); >> + cpu_drcs = NULL; >> + } > > On my two cpu system when I do "cpu remove count 1" this always says: > > pseries-hotplug-cpu: Failed to find enough CPUs (1 of 1) to remove > > Which confuses me. > > I suspect that's because I actually have one cpu *node*, which appears to Linux > as two cpus (due to SMT). So I think it's working as expected, but it's not > very clear from a user's perspective. > You are correct. For Power CPU DLPAR works on a node basis. If there is only one node it will not remove the last CPU. I should update the cpus_found to check to print a message if the remove request would entail removing the last CPU and that the request is being failed because of that. Thanks for the feedback, -Nathan
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f080e81..635f0ba 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -26,6 +26,7 @@ #include <linux/sched.h> /* for idle_task_exit */ #include <linux/cpu.h> #include <linux/of.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/rtas.h> #include <asm/firmware.h> @@ -570,6 +571,143 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index) return 0; } +static struct device_node *cpu_drc_index_to_dn(u32 drc_index) +{ + struct device_node *dn; + u32 my_index; + int rc; + + for_each_node_by_type(dn, "cpu") { + rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index); + if (rc) + continue; + + if (my_index == drc_index) + break; + } + + return dn; +} + +static int dlpar_cpu_remove_by_index(u32 drc_index) +{ + struct device_node *dn; + int rc; + + dn = cpu_drc_index_to_dn(drc_index); + if (!dn) + return -ENODEV; + + rc = dlpar_cpu_remove(dn, drc_index); + of_node_put(dn); + return rc; +} + +static u32 *dlpar_cpus_to_remove(int cpus_to_remove) +{ + struct device_node *dn; + u32 *cpu_drcs; + int cpus_found = 0; + int i, rc; + + cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL); + if (!cpu_drcs) + return NULL; + + i = 0; + for_each_node_by_type(dn, "cpu") { + cpus_found++; + + if (cpus_found > cpus_to_remove) { + of_node_put(dn); + break; + } + + rc = of_property_read_u32(dn, "ibm,my-drc-index", + &cpu_drcs[i++]); + if (rc) { + of_node_put(dn); + break; + } + } + + /* We want to find cpus_to_remove + 1 CPUs to ensure we do not + * remove the last CPU. + */ + if (cpus_found <= cpus_to_remove) { + pr_warn("Failed to find enough CPUs (%d of %d) to remove\n", + cpus_found, cpus_to_remove); + kfree(cpu_drcs); + cpu_drcs = NULL; + } + + return cpu_drcs; +} + +static int dlpar_cpu_remove_by_count(u32 cpus_to_remove) +{ + u32 *cpu_drcs; + int cpus_removed = 0; + int i, rc; + + pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove); + + cpu_drcs = dlpar_cpus_to_remove(cpus_to_remove); + if (!cpu_drcs) + return -EINVAL; + + for (i = 0; i < cpus_to_remove; i++) { + rc = dlpar_cpu_remove_by_index(cpu_drcs[i]); + if (rc) + break; + + cpus_removed++; + } + + if (cpus_removed != cpus_to_remove) { + pr_warn("CPU hot-remove failed, adding back removed CPUs\n"); + + for (i = 0; i < cpus_removed; i++) + dlpar_cpu_add(cpu_drcs[i]); + + rc = -EINVAL; + } else { + rc = 0; + } + + kfree(cpu_drcs); + return rc; +} + +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + u32 count, drc_index; + int rc; + + count = hp_elog->_drc_u.drc_count; + drc_index = hp_elog->_drc_u.drc_index; + + lock_device_hotplug(); + + switch (hp_elog->action) { + case PSERIES_HP_ELOG_ACTION_REMOVE: + if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT) + rc = dlpar_cpu_remove_by_count(count); + else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX) + rc = dlpar_cpu_remove_by_index(drc_index); + else + rc = -EINVAL; + break; + default: + pr_err("Invalid action (%d) specified\n", hp_elog->action); + rc = -EINVAL; + break; + } + + unlock_device_hotplug(); + return rc; +} + #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE static ssize_t dlpar_cpu_probe(const char *buf, size_t count) diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 8411c27..7aa83f0 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -73,6 +73,15 @@ static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog) } #endif +#ifdef CONFIG_HOTPLUG_CPU +int dlpar_cpu(struct pseries_hp_errorlog *hp_elog); +#else +static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) +{ + return -EOPNOTSUPP; +} +#endif + /* PCI root bridge prepare function override for pseries */ struct pci_host_bridge; int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
Add the ability to dlpar remove CPUs via hotplug rtas events, either by specifying the drc-index of the CPU to remove or providing a count of cpus to remove. To remove multiple cpus in a single request we create a list of possible DR (Dynamic Reconfiguration) cpus and their drc indexes that can be removed. We can then traverse the list remove each cpu and easily clean up in any cases of failure. Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com> --- Updates for v2: - use for_each_node_by_type() instead of for_each_child_of_node - updated how the list of cpus to remove is generated - change to remove all cpus requested or none at all - put function declarations under CONFIG_HOTPLUG_CPU arch/powerpc/platforms/pseries/hotplug-cpu.c | 138 ++++++++++++++++++++++++++ arch/powerpc/platforms/pseries/pseries.h | 9 ++ 2 files changed, 147 insertions(+)