Message ID | 20191115101039.43386-1-lkml@jv-coder.de |
---|---|
State | Superseded |
Headers | show |
Series | cpuset_regression_test: Fix for already existing cpusets | expand |
Hi, a ping for this patch? Is something like that (workaround environment issues) not wanted in ltp? Greets Jörg On 11/15/2019 11:10 AM, Joerg Vehlow wrote: > From: Joerg Vehlow <joerg.vehlow@aox-tech.de> > > If there are already cpusets defined on the system, that use cpu 0-1, > the test fails, because it tries to exclusively use cpu 0-1 for the > testcase. > > The fix sets the cpuset for all cgroups to 0 and disables exclusive > cpu usage for the duration of the test and restores it on cleanup. > For the test only cpu 1 is set as exclusive. This is enough to > trigger the bug this regression test was designed for. > This was tested by reverting the commit mentioned in the testcase. > > Signed-off-by: Joerg Vehlow <joerg.vehlow@aox-tech.de> > --- > .../cpuset/cpuset_regression_test.sh | 58 +++++++++++++++++-- > 1 file changed, 54 insertions(+), 4 deletions(-) > > diff --git a/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh b/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh > index dccfd91cd..ed5e30f2a 100755 > --- a/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh > +++ b/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh > @@ -26,6 +26,49 @@ TCID=cpuset_regression_test > TST_TOTAL=1 > . test.sh > > +# cpuset_backup_and_update <backup_dir> <what> <value> > +# Create backup of the values of a specific file (<what>) > +# in all cpuset groups and set the value to <value> > +# The backup is written to <backup_dir> in the same structure > +# as in the cpuset filesystem > +cpuset_backup_and_update() > +{ > + local backup_dir=$1 > + local what=$2 > + local value=$3 > + local old_dir=$(pwd) > + > + cd ${root_cpuset_dir} > + find . -mindepth 2 -name ${what} -print0 | > + while IFS= read -r -d '' file; do > + tst_resm TINFO "Backup ${file} ($(cat "${file}"))" > + mkdir -p "$(dirname "${backup_dir}/${file}")" > + cat "${file}" > "${backup_dir}/${file}" > + echo "${value}" > "${file}" > + done > + > + cd ${old_dir} > +} > + > +# cpuset_restore <backup_dir> <what> > +# Restores the value of a file (<what>) in all cpuset > +# groups from the backup created by cpuset_backup_and_update > +cpuset_restore() > +{ > + local backup_dir=$1 > + local what=$2 > + local old_dir=$(pwd) > + > + cd ${backup_dir} > + find . -mindepth 2 -name ${what} -print0 | > + while IFS= read -r -d '' file; do > + tst_resm TINFO "Restore ${file} ($(cat "${file}"))" > + cat "${file}" > "${root_cpuset_dir}/${file}" > + done > + > + cd ${old_dir} > +} > + > setup() > { > tst_require_root > @@ -69,6 +112,10 @@ setup() > "do not exist." > fi > > + mkdir cpuset_backup > + cpuset_backup_and_update "$(pwd)/cpuset_backup" ${cpu_exclusive} 0 > + cpuset_backup_and_update "$(pwd)/cpuset_backup" cpuset.cpus 0 > + > cpu_exclusive_value=$(cat ${root_cpuset_dir}/${cpu_exclusive}) > if [ "${cpu_exclusive_value}" != "1" ];then > echo 1 > ${root_cpuset_dir}/${cpu_exclusive} > @@ -86,6 +133,9 @@ cleanup() > rmdir ${root_cpuset_dir}/testdir > fi > > + cpuset_restore "$(pwd)/cpuset_backup" cpuset.cpus > + cpuset_restore "$(pwd)/cpuset_backup" ${cpu_exclusive} > + > if [ "$cpu_exclusive_value" != 1 ]; then > # Need to flush, or may be output: > # "write error: Device or resource busy" > @@ -129,15 +179,15 @@ cpuset_test() > fi > > # ${cpus} is empty at the begin, that maybe make the system *crash*. > - echo 0-1 > ${root_cpuset_dir}/testdir/${cpus} > + echo 1 > ${root_cpuset_dir}/testdir/${cpus} > if [ $? -ne 0 ]; then > - tst_brkm TFAIL "'echo 0-1 >" \ > + tst_brkm TFAIL "'echo 1 >" \ > "${root_cpuset_dir}/testdir/${cpus}' failed" > fi > > local cpus_value=$(cat ${root_cpuset_dir}/testdir/${cpus}) > - if [ "${cpus_value}" != "0-1" ]; then > - tst_brkm TFAIL "${cpus} is '${cpus_value}', expected '0-1'" > + if [ "${cpus_value}" != "1" ]; then > + tst_brkm TFAIL "${cpus} is '${cpus_value}', expected '1'" > fi > > tst_resm TPASS "Bug is not reproduced"
Hello, Joerg Vehlow <lkml@jv-coder.de> writes: > Hi, > > a ping for this patch? Is something like that (workaround environment > issues) not wanted in ltp? Generally speaking, yes, again you are right to bump it and these tests are in need of more attention. However I have a couple of concerns about this. > > Greets > Jörg > > On 11/15/2019 11:10 AM, Joerg Vehlow wrote: >> From: Joerg Vehlow <joerg.vehlow@aox-tech.de> >> >> If there are already cpusets defined on the system, that use cpu 0-1, >> the test fails, because it tries to exclusively use cpu 0-1 for the >> testcase. >> >> The fix sets the cpuset for all cgroups to 0 and disables exclusive >> cpu usage for the duration of the test and restores it on cleanup. >> For the test only cpu 1 is set as exclusive. This is enough to >> trigger the bug this regression test was designed for. >> This was tested by reverting the commit mentioned in the testcase. If the system has already set exclusive cpus then it is unlikely this regression effects it. Either the kernel has been patched or the system manager configures the cpus first before setting the exclusive knob. Normally I would say the test should try to run anyway, but you are having to make some intrusive changes to the cgroup setup which could lead to other problems. So why not just call 'tst_brk TCONF' if the system already has exclusive cpus configured?
Hi, On 11/16/2020 3:46 PM, Richard Palethorpe wrote: > If the system has already set exclusive cpus then it is unlikely this > regression effects it. Either the kernel has been patched or the system > manager configures the cpus first before setting the exclusive knob. Yes "either or". If the system manager or whatever configured the cgroups did it in the "right" order, that cannot trigger the bug, we do not know, if the bug still exists. > Normally I would say the test should try to run anyway, but you are > having to make some intrusive changes to the cgroup setup which could > lead to other problems. > > So why not just call 'tst_brk TCONF' if the system already has exclusive > cpus configured? The question is, should ltp try hard to run a test or not. You may be right, that this could have other effects, but ltp tests can crash a system anyway, so I wouldn't worry about that. Of course TCONF would be simpler, but it would also skip the test... Do you have a scenario in mind, where changing the cpusets could potentially cause problems? This would require a system, where something meaningful is running, that requires specific cpu time or a specific cpu. But if that would be the case, all ltp tests could interfere with it right? Jörg
Hello Joerg, Joerg Vehlow <lkml@jv-coder.de> writes: > Hi, > On 11/16/2020 3:46 PM, Richard Palethorpe wrote: >> If the system has already set exclusive cpus then it is unlikely this >> regression effects it. Either the kernel has been patched or the system >> manager configures the cpus first before setting the exclusive knob. > Yes "either or". If the system manager or whatever configured the > cgroups did it in the > "right" order, that cannot trigger the bug, we do not know, if the bug > still exists. Yes and this is why I would normally say we should still try to find the bug. > >> Normally I would say the test should try to run anyway, but you are >> having to make some intrusive changes to the cgroup setup which could >> lead to other problems. >> >> So why not just call 'tst_brk TCONF' if the system already has exclusive >> cpus configured? > The question is, should ltp try hard to run a test or not. You may be right, > that this could have other effects, but ltp tests can crash a system anyway, > so I wouldn't worry about that. Of course TCONF would be simpler, but > it would > also skip the test... In general we have the rule that tests should try to leave the system in a working state. Sometimes that is not possible, but that is usually only if a test triggers a serious issue. > > Do you have a scenario in mind, where changing the cpusets could potentially > cause problems? This would require a system, where something meaningful is > running, that requires specific cpu time or a specific cpu. But if > that would > be the case, all ltp tests could interfere with it right? > > Jörg If we assume there is a good reason for having exclusive cpusets, even if we don't know that reason, then we can't just remove them and expect the system to continue working. Possibly it will even cause errors in later unrelated tests and it will take some time for somebody to figure out that it is due to a process running on the wrong CPU. I assume that if a particular CGroup has exclusive CPU access then processes in the root CGroup will not run in it. However if they do then the user may run LTP tests in a leaf CGroup. So you can't assume all tests would break such a system. OTOH TCONF is often ignored, but this seems like quite a small and tricky corner case that we are adding complexity for.
diff --git a/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh b/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh index dccfd91cd..ed5e30f2a 100755 --- a/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh +++ b/testcases/kernel/controllers/cpuset/cpuset_regression_test.sh @@ -26,6 +26,49 @@ TCID=cpuset_regression_test TST_TOTAL=1 . test.sh +# cpuset_backup_and_update <backup_dir> <what> <value> +# Create backup of the values of a specific file (<what>) +# in all cpuset groups and set the value to <value> +# The backup is written to <backup_dir> in the same structure +# as in the cpuset filesystem +cpuset_backup_and_update() +{ + local backup_dir=$1 + local what=$2 + local value=$3 + local old_dir=$(pwd) + + cd ${root_cpuset_dir} + find . -mindepth 2 -name ${what} -print0 | + while IFS= read -r -d '' file; do + tst_resm TINFO "Backup ${file} ($(cat "${file}"))" + mkdir -p "$(dirname "${backup_dir}/${file}")" + cat "${file}" > "${backup_dir}/${file}" + echo "${value}" > "${file}" + done + + cd ${old_dir} +} + +# cpuset_restore <backup_dir> <what> +# Restores the value of a file (<what>) in all cpuset +# groups from the backup created by cpuset_backup_and_update +cpuset_restore() +{ + local backup_dir=$1 + local what=$2 + local old_dir=$(pwd) + + cd ${backup_dir} + find . -mindepth 2 -name ${what} -print0 | + while IFS= read -r -d '' file; do + tst_resm TINFO "Restore ${file} ($(cat "${file}"))" + cat "${file}" > "${root_cpuset_dir}/${file}" + done + + cd ${old_dir} +} + setup() { tst_require_root @@ -69,6 +112,10 @@ setup() "do not exist." fi + mkdir cpuset_backup + cpuset_backup_and_update "$(pwd)/cpuset_backup" ${cpu_exclusive} 0 + cpuset_backup_and_update "$(pwd)/cpuset_backup" cpuset.cpus 0 + cpu_exclusive_value=$(cat ${root_cpuset_dir}/${cpu_exclusive}) if [ "${cpu_exclusive_value}" != "1" ];then echo 1 > ${root_cpuset_dir}/${cpu_exclusive} @@ -86,6 +133,9 @@ cleanup() rmdir ${root_cpuset_dir}/testdir fi + cpuset_restore "$(pwd)/cpuset_backup" cpuset.cpus + cpuset_restore "$(pwd)/cpuset_backup" ${cpu_exclusive} + if [ "$cpu_exclusive_value" != 1 ]; then # Need to flush, or may be output: # "write error: Device or resource busy" @@ -129,15 +179,15 @@ cpuset_test() fi # ${cpus} is empty at the begin, that maybe make the system *crash*. - echo 0-1 > ${root_cpuset_dir}/testdir/${cpus} + echo 1 > ${root_cpuset_dir}/testdir/${cpus} if [ $? -ne 0 ]; then - tst_brkm TFAIL "'echo 0-1 >" \ + tst_brkm TFAIL "'echo 1 >" \ "${root_cpuset_dir}/testdir/${cpus}' failed" fi local cpus_value=$(cat ${root_cpuset_dir}/testdir/${cpus}) - if [ "${cpus_value}" != "0-1" ]; then - tst_brkm TFAIL "${cpus} is '${cpus_value}', expected '0-1'" + if [ "${cpus_value}" != "1" ]; then + tst_brkm TFAIL "${cpus} is '${cpus_value}', expected '1'" fi tst_resm TPASS "Bug is not reproduced"