Message ID | 305ed693-ea85-8a70-1d3c-ae405aebc0ad@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | [v03] powerpc/numa: Perform full re-add of CPU for PRRN/VPHN topology update | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | next/apply_patch Successfully applied |
snowpatch_ozlabs/build-ppc64le | fail | build failed! |
snowpatch_ozlabs/build-ppc64be | fail | build failed! |
snowpatch_ozlabs/build-ppc64e | success | build succeeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/build-pmac32 | success | build succeeded & removed 0 sparse warning(s) |
snowpatch_ozlabs/checkpatch | warning | total: 0 errors, 2 warnings, 5 checks, 181 lines checked |
Hi Michael, Thank you for the patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on v5.0-rc4 next-20190206] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Michael-Bringmann/powerpc-numa-Perform-full-re-add-of-CPU-for-PRRN-VPHN-topology-update/20190207-101545 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-allyesconfig (attached as .config) compiler: powerpc64-linux-gnu-gcc (Debian 8.2.0-11) 8.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=8.2.0 make.cross ARCH=powerpc All errors (new ones prefixed by >>): arch/powerpc/mm/numa.c: In function 'numa_update_cpu_topology': >> arch/powerpc/mm/numa.c:1361:4: error: implicit declaration of function 'dlpar_cpu_readd'; did you mean 'raw_cpu_read'? [-Werror=implicit-function-declaration] dlpar_cpu_readd(cpu); ^~~~~~~~~~~~~~~ raw_cpu_read cc1: some warnings being treated as errors vim +1361 arch/powerpc/mm/numa.c 1298 1299 /* 1300 * Update the node maps and sysfs entries for each cpu whose home node 1301 * has changed. Returns 1 when the topology has changed, and 0 otherwise. 1302 * 1303 * readd_cpus: Also readd any CPUs that have changed affinity 1304 */ 1305 static int numa_update_cpu_topology(bool readd_cpus) 1306 { 1307 unsigned int cpu, sibling, changed = 0; 1308 struct topology_update_data *updates, *ud; 1309 cpumask_t updated_cpus; 1310 struct device *dev; 1311 int weight, new_nid, i = 0; 1312 1313 if ((!prrn_enabled && !vphn_enabled && topology_inited) || 1314 topology_update_in_progress) 1315 return 0; 1316 1317 weight = cpumask_weight(&cpu_associativity_changes_mask); 1318 if (!weight) 1319 return 0; 1320 1321 updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL); 1322 if (!updates) 1323 return 0; 1324 1325 topology_update_in_progress = 1; 1326 1327 cpumask_clear(&updated_cpus); 1328 1329 for_each_cpu(cpu, &cpu_associativity_changes_mask) { 1330 /* 1331 * If siblings aren't flagged for changes, updates list 1332 * will be too short. Skip on this update and set for next 1333 * update. 1334 */ 1335 if (!cpumask_subset(cpu_sibling_mask(cpu), 1336 &cpu_associativity_changes_mask)) { 1337 pr_info("Sibling bits not set for associativity " 1338 "change, cpu%d\n", cpu); 1339 cpumask_or(&cpu_associativity_changes_mask, 1340 &cpu_associativity_changes_mask, 1341 cpu_sibling_mask(cpu)); 1342 cpu = cpu_last_thread_sibling(cpu); 1343 continue; 1344 } 1345 1346 new_nid = find_and_online_cpu_nid(cpu); 1347 1348 if ((new_nid == numa_cpu_lookup_table[cpu]) || 1349 !cpu_present(cpu)) { 1350 cpumask_andnot(&cpu_associativity_changes_mask, 1351 &cpu_associativity_changes_mask, 1352 cpu_sibling_mask(cpu)); 1353 if (cpu_present(cpu)) 1354 dbg("Assoc chg gives same node %d for cpu%d\n", 1355 new_nid, cpu); 1356 cpu = cpu_last_thread_sibling(cpu); 1357 continue; 1358 } 1359 1360 if (readd_cpus) > 1361 dlpar_cpu_readd(cpu); 1362 1363 for_each_cpu(sibling, cpu_sibling_mask(cpu)) { 1364 ud = &updates[i++]; 1365 ud->next = &updates[i]; 1366 ud->cpu = sibling; 1367 ud->new_nid = new_nid; 1368 ud->old_nid = numa_cpu_lookup_table[sibling]; 1369 cpumask_set_cpu(sibling, &updated_cpus); 1370 } 1371 cpu = cpu_last_thread_sibling(cpu); 1372 } 1373 1374 /* 1375 * Prevent processing of 'updates' from overflowing array 1376 * where last entry filled in a 'next' pointer. 1377 */ 1378 if (i) 1379 updates[i-1].next = NULL; 1380 1381 pr_debug("Topology update for the following CPUs:\n"); 1382 if (cpumask_weight(&updated_cpus)) { 1383 for (ud = &updates[0]; ud; ud = ud->next) { 1384 pr_debug("cpu %d moving from node %d " 1385 "to %d\n", ud->cpu, 1386 ud->old_nid, ud->new_nid); 1387 } 1388 } 1389 1390 /* 1391 * In cases where we have nothing to update (because the updates list 1392 * is too short or because the new topology is same as the old one), 1393 * skip invoking update_cpu_topology() via stop-machine(). This is 1394 * necessary (and not just a fast-path optimization) since stop-machine 1395 * can end up electing a random CPU to run update_cpu_topology(), and 1396 * thus trick us into setting up incorrect cpu-node mappings (since 1397 * 'updates' is kzalloc()'ed). 1398 * 1399 * And for the similar reason, we will skip all the following updating. 1400 */ 1401 if (!cpumask_weight(&updated_cpus)) 1402 goto out; 1403 1404 stop_machine(update_cpu_topology, &updates[0], &updated_cpus); 1405 1406 /* 1407 * Update the numa-cpu lookup table with the new mappings, even for 1408 * offline CPUs. It is best to perform this update from the stop- 1409 * machine context. 1410 */ 1411 stop_machine(update_lookup_table, &updates[0], 1412 cpumask_of(raw_smp_processor_id())); 1413 1414 for (ud = &updates[0]; ud; ud = ud->next) { 1415 unregister_cpu_under_node(ud->cpu, ud->old_nid); 1416 register_cpu_under_node(ud->cpu, ud->new_nid); 1417 1418 dev = get_cpu_device(ud->cpu); 1419 if (dev) 1420 kobject_uevent(&dev->kobj, KOBJ_CHANGE); 1421 cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask); 1422 changed = 1; 1423 } 1424 1425 out: 1426 topology_changed = changed; 1427 topology_update_in_progress = 0; 1428 kfree(updates); 1429 return changed; 1430 } 1431 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
> > int arch_update_cpu_topology(void) > { > - return numa_update_cpu_topology(true); > + int changed = topology_changed; > + > + topology_changed = 0; > + return changed; > } > Do we need Powerpc override for arch_update_cpu_topology() now? That topology_changed sometime back doesn't seem to have help. The scheduler atleast now is neglecting whether the topology changed or not. Also we can do away with the new topology_changed. > static void topology_work_fn(struct work_struct *work) > { > - rebuild_sched_domains(); > + lock_device_hotplug(); > + if (numa_update_cpu_topology(true)) > + rebuild_sched_domains(); > + unlock_device_hotplug(); > } Should this hunk be a separate patch by itself to say why rebuild_sched_domains with a changelog that explains why it should be under lock_device_hotplug? rebuild_sched_domains already takes cpuset_mutex. So I am not sure if we need to take device_hotplug_lock. > static DECLARE_WORK(topology_work, topology_work_fn); > > -static void topology_schedule_update(void) > +void topology_schedule_update(void) > { > - schedule_work(&topology_work); > + if (!topology_update_in_progress) > + schedule_work(&topology_work); > } > > static void topology_timer_fn(struct timer_list *unused) > { > + bool sdo = false; Is sdo any abbrevation? > + > + if (topology_scans < 1) > + bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), > + nr_cpumask_bits); Why do we need topology_scan? Just to make sure cpu_associativity_changes_mask is populated only once? cant we use a static bool inside the function for the same? > + > if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) > - topology_schedule_update(); > - else if (vphn_enabled) { > + sdo = true; > + if (vphn_enabled) { Any reason to remove the else above? > if (update_cpu_associativity_changes_mask() > 0) > - topology_schedule_update(); > + sdo = true; > reset_topology_timer(); > } > + if (sdo) > + topology_schedule_update(); > + topology_scans++; > } Are the above two hunks necessary? Not getting how the current changes are different from the previous.
On 2/7/19 11:44 PM, Srikar Dronamraju wrote: >> >> int arch_update_cpu_topology(void) >> { >> - return numa_update_cpu_topology(true); >> + int changed = topology_changed; >> + >> + topology_changed = 0; >> + return changed; >> } >> > > Do we need Powerpc override for arch_update_cpu_topology() now? That > topology_changed sometime back doesn't seem to have help. The scheduler > atleast now is neglecting whether the topology changed or not. I was dealing with a a concurrency problem. Revisiting again. > > Also we can do away with the new topology_changed. > >> static void topology_work_fn(struct work_struct *work) >> { >> - rebuild_sched_domains(); >> + lock_device_hotplug(); >> + if (numa_update_cpu_topology(true)) >> + rebuild_sched_domains(); >> + unlock_device_hotplug(); >> } > > Should this hunk be a separate patch by itself to say why > rebuild_sched_domains with a changelog that explains why it should be under > lock_device_hotplug? rebuild_sched_domains already takes cpuset_mutex. > So I am not sure if we need to take device_hotplug_lock. topology_work_fn runs in its own thread like the DLPAR operations. This patch adds calls to Nathan's 'dlpar_cpu_readd' from the topology_work_fn thread. The lock/unlock_device_hotplug guard against concurrency issues with the DLPAR operations, grabbing that lock here to avoid overlap with those other operations. This mod is dependent upon using dlpar_cpu_readd. > >> static DECLARE_WORK(topology_work, topology_work_fn); >> >> -static void topology_schedule_update(void) >> +void topology_schedule_update(void) >> { >> - schedule_work(&topology_work); >> + if (!topology_update_in_progress) >> + schedule_work(&topology_work); >> } >> >> static void topology_timer_fn(struct timer_list *unused) >> { >> + bool sdo = false; > > Is sdo any abbrevation? 'for do the schedule update'. Will remove per below. > >> + >> + if (topology_scans < 1) >> + bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), >> + nr_cpumask_bits); > > Why do we need topology_scan? Just to make sure > cpu_associativity_changes_mask is populated only once? > cant we use a static bool inside the function for the same? I was running into a race condition. On one of my test systems, start_topology_update via shared_proc_topology_init and the PHYP did not provide any change info about the CPUs that early in the boot. The first run erased the cpu bits in cpu_associativity_changes_mask, and subsequent runs did not pay attention to the reported updates. Taking another look. > > >> + >> if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) >> - topology_schedule_update(); >> - else if (vphn_enabled) { >> + sdo = true; >> + if (vphn_enabled) { > > Any reason to remove the else above? When vphn_enabled and prrn_enabled, it was not calling 'update_cpu_associativity_changes_mask()', so was not getting the necessary change info. >> if (update_cpu_associativity_changes_mask() > 0) >> - topology_schedule_update(); >> + sdo = true; >> reset_topology_timer(); >> } >> + if (sdo) >> + topology_schedule_update(); >> + topology_scans++; >> } > > Are the above two hunks necessary? Not getting how the current changes are > different from the previous. Not important. Will undo. >
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f85e2b0..79505c3 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -42,7 +42,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) extern int sysfs_add_device_to_node(struct device *dev, int nid); extern void sysfs_remove_device_from_node(struct device *dev, int nid); -extern int numa_update_cpu_topology(bool cpus_locked); +extern void topology_schedule_update(void); static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) { @@ -77,10 +77,7 @@ static inline void sysfs_remove_device_from_node(struct device *dev, { } -static inline int numa_update_cpu_topology(bool cpus_locked) -{ - return 0; -} +static inline void topology_schedule_update(void) {} static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {} diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 8a1746d..b1828de 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -285,7 +285,7 @@ static void handle_prrn_event(s32 scope) * the RTAS event. */ pseries_devicetree_update(-scope); - numa_update_cpu_topology(false); + topology_schedule_update(); } static void handle_rtas_event(const struct rtas_error_log *log) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index ef6bdf1..a750ec0 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1077,6 +1077,9 @@ struct topology_update_data { static void reset_topology_timer(void); static int topology_timer_secs = 1; static int topology_inited; +static int topology_update_in_progress; +static int topology_changed; +static unsigned long topology_scans; /* * Change polling interval for associativity changes. @@ -1297,9 +1300,9 @@ static int update_lookup_table(void *data) * Update the node maps and sysfs entries for each cpu whose home node * has changed. Returns 1 when the topology has changed, and 0 otherwise. * - * cpus_locked says whether we already hold cpu_hotplug_lock. + * readd_cpus: Also readd any CPUs that have changed affinity */ -int numa_update_cpu_topology(bool cpus_locked) +static int numa_update_cpu_topology(bool readd_cpus) { unsigned int cpu, sibling, changed = 0; struct topology_update_data *updates, *ud; @@ -1307,7 +1310,8 @@ int numa_update_cpu_topology(bool cpus_locked) struct device *dev; int weight, new_nid, i = 0; - if (!prrn_enabled && !vphn_enabled && topology_inited) + if ((!prrn_enabled && !vphn_enabled && topology_inited) || + topology_update_in_progress) return 0; weight = cpumask_weight(&cpu_associativity_changes_mask); @@ -1318,6 +1322,8 @@ int numa_update_cpu_topology(bool cpus_locked) if (!updates) return 0; + topology_update_in_progress = 1; + cpumask_clear(&updated_cpus); for_each_cpu(cpu, &cpu_associativity_changes_mask) { @@ -1339,16 +1345,21 @@ int numa_update_cpu_topology(bool cpus_locked) new_nid = find_and_online_cpu_nid(cpu); - if (new_nid == numa_cpu_lookup_table[cpu]) { + if ((new_nid == numa_cpu_lookup_table[cpu]) || + !cpu_present(cpu)) { cpumask_andnot(&cpu_associativity_changes_mask, &cpu_associativity_changes_mask, cpu_sibling_mask(cpu)); - dbg("Assoc chg gives same node %d for cpu%d\n", + if (cpu_present(cpu)) + dbg("Assoc chg gives same node %d for cpu%d\n", new_nid, cpu); cpu = cpu_last_thread_sibling(cpu); continue; } + if (readd_cpus) + dlpar_cpu_readd(cpu); + for_each_cpu(sibling, cpu_sibling_mask(cpu)) { ud = &updates[i++]; ud->next = &updates[i]; @@ -1390,23 +1401,15 @@ int numa_update_cpu_topology(bool cpus_locked) if (!cpumask_weight(&updated_cpus)) goto out; - if (cpus_locked) - stop_machine_cpuslocked(update_cpu_topology, &updates[0], - &updated_cpus); - else - stop_machine(update_cpu_topology, &updates[0], &updated_cpus); + stop_machine(update_cpu_topology, &updates[0], &updated_cpus); /* * Update the numa-cpu lookup table with the new mappings, even for * offline CPUs. It is best to perform this update from the stop- * machine context. */ - if (cpus_locked) - stop_machine_cpuslocked(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); - else - stop_machine(update_lookup_table, &updates[0], - cpumask_of(raw_smp_processor_id())); + stop_machine(update_lookup_table, &updates[0], + cpumask_of(raw_smp_processor_id())); for (ud = &updates[0]; ud; ud = ud->next) { unregister_cpu_under_node(ud->cpu, ud->old_nid); @@ -1420,35 +1423,53 @@ int numa_update_cpu_topology(bool cpus_locked) } out: + topology_changed = changed; + topology_update_in_progress = 0; kfree(updates); return changed; } int arch_update_cpu_topology(void) { - return numa_update_cpu_topology(true); + int changed = topology_changed; + + topology_changed = 0; + return changed; } static void topology_work_fn(struct work_struct *work) { - rebuild_sched_domains(); + lock_device_hotplug(); + if (numa_update_cpu_topology(true)) + rebuild_sched_domains(); + unlock_device_hotplug(); } static DECLARE_WORK(topology_work, topology_work_fn); -static void topology_schedule_update(void) +void topology_schedule_update(void) { - schedule_work(&topology_work); + if (!topology_update_in_progress) + schedule_work(&topology_work); } static void topology_timer_fn(struct timer_list *unused) { + bool sdo = false; + + if (topology_scans < 1) + bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), + nr_cpumask_bits); + if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask)) - topology_schedule_update(); - else if (vphn_enabled) { + sdo = true; + if (vphn_enabled) { if (update_cpu_associativity_changes_mask() > 0) - topology_schedule_update(); + sdo = true; reset_topology_timer(); } + if (sdo) + topology_schedule_update(); + topology_scans++; } static struct timer_list topology_timer; @@ -1561,7 +1582,7 @@ void __init shared_proc_topology_init(void) if (lppaca_shared_proc(get_lppaca())) { bitmap_fill(cpumask_bits(&cpu_associativity_changes_mask), nr_cpumask_bits); - numa_update_cpu_topology(false); + topology_schedule_update(); } }