diff mbox

[v5,7/7] powerpc/powernv: nest pmu cpumask and cpu hotplug support

Message ID 1437045206-7491-8-git-send-email-maddy@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

maddy July 16, 2015, 11:13 a.m. UTC
Adds cpumask attribute to be used by each nest pmu since nest
units are per-chip. Only one cpu (first online cpu) from each node/chip
is designated to read counters.

On cpu hotplug, dying cpu is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same node/chip is
designated as new cpu to read counters.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Preeti U Murthy <preetium@andrew.cmu.edu>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
 arch/powerpc/perf/nest-pmu.c | 172 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

Comments

Daniel Axtens July 22, 2015, 5:03 a.m. UTC | #1
> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
> +{
> +	int i;
> +
> +	for (i = 0; per_nest_pmu_arr[i] != NULL; i++)
> +		perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu,
> +						old_cpu, new_cpu);
From patch 4, I see per_nest_pmu_arr is defined as:
 +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];

Therefore, does this loop need to have a check that 
i < P8_NEST_MAX_PMUS?
maddy July 23, 2015, 6:48 a.m. UTC | #2
On Wednesday 22 July 2015 10:33 AM, Daniel Axtens wrote:
>> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; per_nest_pmu_arr[i] != NULL; i++)
>> +		perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu,
>> +						old_cpu, new_cpu);
> From patch 4, I see per_nest_pmu_arr is defined as:
>  +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];
>
> Therefore, does this loop need to have a check that 
> i < P8_NEST_MAX_PMUS?

No, that is max possible pmu, but we may have only couple for nest pmus
registered.

Thanks for the review comments
Maddy
>
Daniel Axtens July 23, 2015, 6:49 a.m. UTC | #3
On Thu, 2015-07-23 at 12:18 +0530, Madhavan Srinivasan wrote:
> 
> On Wednesday 22 July 2015 10:33 AM, Daniel Axtens wrote:
> >> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
> >> +{
> >> +	int i;
> >> +
> >> +	for (i = 0; per_nest_pmu_arr[i] != NULL; i++)
> >> +		perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu,
> >> +						old_cpu, new_cpu);
> > From patch 4, I see per_nest_pmu_arr is defined as:
> >  +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];
> >
> > Therefore, does this loop need to have a check that 
> > i < P8_NEST_MAX_PMUS?
> 
> No, that is max possible pmu, but we may have only couple for nest pmus
> registered.
> 
What if we have P8_NEST_MAX_PMUS registered? Then we'll check beyond the
end of the array...

> Thanks for the review comments
> Maddy
> >
>
maddy July 23, 2015, 7:25 a.m. UTC | #4
On Thursday 23 July 2015 12:19 PM, Daniel Axtens wrote:
> On Thu, 2015-07-23 at 12:18 +0530, Madhavan Srinivasan wrote:
>> On Wednesday 22 July 2015 10:33 AM, Daniel Axtens wrote:
>>>> +static void nest_change_cpu_context(int old_cpu, int new_cpu)
>>>> +{
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; per_nest_pmu_arr[i] != NULL; i++)
>>>> +		perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu,
>>>> +						old_cpu, new_cpu);
>>> From patch 4, I see per_nest_pmu_arr is defined as:
>>>  +static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];
>>>
>>> Therefore, does this loop need to have a check that 
>>> i < P8_NEST_MAX_PMUS?
>> No, that is max possible pmu, but we may have only couple for nest pmus
>> registered.
>>
> What if we have P8_NEST_MAX_PMUS registered? Then we'll check beyond the
> end of the array...

OK, i will add check for P8_NEST_MAX_PMUS also.

>> Thanks for the review comments
>> Maddy
diff mbox

Patch

diff --git a/arch/powerpc/perf/nest-pmu.c b/arch/powerpc/perf/nest-pmu.c
index 2ebd0508e9b3..d3a2fd746cf9 100644
--- a/arch/powerpc/perf/nest-pmu.c
+++ b/arch/powerpc/perf/nest-pmu.c
@@ -12,6 +12,7 @@ 
 
 static struct perchip_nest_info p8_nest_perchip_info[P8_NEST_MAX_CHIPS];
 static struct nest_pmu *per_nest_pmu_arr[P8_NEST_MAX_PMUS];
+static cpumask_t nest_pmu_cpu_mask;
 
 PMU_FORMAT_ATTR(event, "config:0-20");
 static struct attribute *p8_nest_format_attrs[] = {
@@ -24,6 +25,172 @@  static struct attribute_group p8_nest_format_group = {
 	.attrs = p8_nest_format_attrs,
 };
 
+static ssize_t nest_pmu_cpumask_get_attr(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &nest_pmu_cpu_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, nest_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *nest_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group nest_pmu_cpumask_attr_group = {
+	.attrs = nest_pmu_cpumask_attrs,
+};
+
+static void nest_init(int *loc)
+{
+	int rc;
+
+	rc = opal_nest_ima_control(
+				P8_NEST_MODE_PRODUCTION, P8_NEST_ENGINE_START);
+	if (rc)
+		loc[smp_processor_id()] = 1;
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+	int i;
+
+	for (i = 0; per_nest_pmu_arr[i] != NULL; i++)
+		perf_pmu_migrate_context(&per_nest_pmu_arr[i]->pmu,
+						old_cpu, new_cpu);
+}
+
+static void nest_exit_cpu(int cpu)
+{
+	int nid, target = -1;
+	struct cpumask *l_cpumask;
+
+	/*
+	 * Check in the designated list for this cpu. Dont bother
+	 * if not one of them.
+	 */
+	if (!cpumask_test_and_clear_cpu(cpu, &nest_pmu_cpu_mask))
+		return;
+
+	/*
+	 * Now that this cpu is one of the designated,
+	 * find a next cpu a) which is online and b) in same chip.
+	 */
+	nid = cpu_to_node(cpu);
+	l_cpumask = cpumask_of_node(nid);
+	target = cpumask_next(cpu, l_cpumask);
+
+	/*
+	 * Update the cpumask with the target cpu and
+	 * migrate the context if needed
+	 */
+	if (target >= 0 && target <= nr_cpu_ids) {
+		cpumask_set_cpu(target, &nest_pmu_cpu_mask);
+		nest_change_cpu_context(cpu, target);
+	}
+}
+
+static void nest_init_cpu(int cpu)
+{
+	int nid, fcpu, ncpu;
+	struct cpumask *l_cpumask, tmp_mask;
+
+	nid = cpu_to_node(cpu);
+	l_cpumask = cpumask_of_node(nid);
+
+	/*
+	 * if empty cpumask, just add incoming cpu and move on.
+	 */
+	if (!cpumask_and(&tmp_mask, l_cpumask, &nest_pmu_cpu_mask)) {
+		cpumask_set_cpu(cpu, &nest_pmu_cpu_mask);
+		return;
+	}
+
+	/*
+	 * Alway have the first online cpu of a chip as designated one.
+	 */
+	fcpu = cpumask_first(l_cpumask);
+	ncpu = cpumask_next(cpu, l_cpumask);
+	if (cpu == fcpu) {
+		if (cpumask_test_and_clear_cpu(ncpu, &nest_pmu_cpu_mask)) {
+			cpumask_set_cpu(cpu, &nest_pmu_cpu_mask);
+			nest_change_cpu_context(ncpu, cpu);
+		}
+	}
+}
+
+static int nest_pmu_cpu_notifier(struct notifier_block *self,
+				unsigned long action, void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		nest_init_cpu(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+	       nest_exit_cpu(cpu);
+	       break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block nest_pmu_cpu_nb = {
+	.notifier_call  = nest_pmu_cpu_notifier,
+	.priority       = CPU_PRI_PERF + 1,
+};
+
+static int nest_pmu_cpumask_init(void)
+{
+	const struct cpumask *l_cpumask;
+	int cpu, nid;
+	int *cpus_opal_rc;
+
+	cpu_notifier_register_begin();
+
+	/*
+	 * Nest PMUs are per-chip counters. So designate a cpu
+	 * from each chip for counter collection.
+	 */
+	for_each_online_node(nid) {
+		l_cpumask = cpumask_of_node(nid);
+
+		/* designate first online cpu in this node */
+		cpu = cpumask_first(l_cpumask);
+		cpumask_set_cpu(cpu, &nest_pmu_cpu_mask);
+	}
+
+	/*
+	 * Memory for OPAL call return value.
+	 */
+	cpus_opal_rc = kzalloc((sizeof(int) * nr_cpu_ids), GFP_KERNEL);
+	if (!cpus_opal_rc)
+		goto fail;
+
+	/* Initialize Nest PMUs in each node using designated cpus */
+	on_each_cpu_mask(&nest_pmu_cpu_mask, (smp_call_func_t)nest_init,
+						(void *)cpus_opal_rc, 1);
+
+	/* Check return value array for any OPAL call failure */
+	for_each_cpu(cpu, &nest_pmu_cpu_mask) {
+		if (cpus_opal_rc[cpu])
+			goto fail;
+	}
+
+	__register_cpu_notifier(&nest_pmu_cpu_nb);
+
+	cpu_notifier_register_done();
+	return 0;
+
+fail:
+	cpu_notifier_register_done();
+	return -ENODEV;
+}
+
 static int p8_nest_event_init(struct perf_event *event)
 {
 	int chip_id;
@@ -250,6 +417,7 @@  static int nest_pmu_create(struct device_node *dev, int pmu_index)
 			sprintf(buf, "Nest_%s", (char *)pp->value);
 			pmu_ptr->pmu.name = (char *)buf;
 			pmu_ptr->attr_groups[1] = &p8_nest_format_group;
+			pmu_ptr->attr_groups[2] = &nest_pmu_cpumask_attr_group;
 			continue;
 		}
 
@@ -359,6 +527,10 @@  static int __init nest_pmu_init(void)
 	    !cpu_has_feature(CPU_FTR_HVMODE))
 		return ret;
 
+	/* Add cpumask and register for hotplug notification */
+	if (nest_pmu_cpumask_init())
+		return ret;
+
 	/*
 	 * Nest PMU information is grouped under "nest-ima" node
 	 * of the top-level device-tree directory. Detect Nest PMU