diff mbox series

[3/3] memcontrol04: Copy from kselftest

Message ID 20220208140322.6842-4-rpalethorpe@suse.com
State Superseded
Headers show
Series memcontrol04 and s/cgroup/cg/ | expand

Commit Message

Richard Palethorpe Feb. 8, 2022, 2:03 p.m. UTC
At first glance this test looks the same as memcontrol03. However
there are some significant changes which complicate combining the two.

Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
---
 runtest/controllers                           |   1 +
 testcases/kernel/controllers/memcg/.gitignore |   1 +
 .../kernel/controllers/memcg/memcontrol04.c   | 228 ++++++++++++++++++
 3 files changed, 230 insertions(+)
 create mode 100644 testcases/kernel/controllers/memcg/memcontrol04.c

Comments

Li Wang Feb. 9, 2022, 10:47 a.m. UTC | #1
On Tue, Feb 8, 2022 at 10:04 PM Richard Palethorpe via ltp <
ltp@lists.linux.it> wrote:

> At first glance this test looks the same as memcontrol03. However
> there are some significant changes which complicate combining the two.
>
> Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
> ---
>  runtest/controllers                           |   1 +
>  testcases/kernel/controllers/memcg/.gitignore |   1 +
>  .../kernel/controllers/memcg/memcontrol04.c   | 228 ++++++++++++++++++
>  3 files changed, 230 insertions(+)
>  create mode 100644 testcases/kernel/controllers/memcg/memcontrol04.c
>
> diff --git a/runtest/controllers b/runtest/controllers
> index 4a6f919af..3108a2561 100644
> --- a/runtest/controllers
> +++ b/runtest/controllers
> @@ -20,6 +20,7 @@ memcg_control         memcg_control_test.sh
>  memcontrol01 memcontrol01
>  memcontrol02 memcontrol02
>  memcontrol03 memcontrol03
> +memcontrol04 memcontrol04
>
>  cgroup_fj_function_debug cgroup_fj_function.sh debug
>  cgroup_fj_function_cpuset cgroup_fj_function.sh cpuset
> diff --git a/testcases/kernel/controllers/memcg/.gitignore
> b/testcases/kernel/controllers/memcg/.gitignore
> index 49df1582c..3883cede6 100644
> --- a/testcases/kernel/controllers/memcg/.gitignore
> +++ b/testcases/kernel/controllers/memcg/.gitignore
> @@ -8,3 +8,4 @@
>  memcontrol01
>  memcontrol02
>  memcontrol03
> +memcontrol04
> diff --git a/testcases/kernel/controllers/memcg/memcontrol04.c
> b/testcases/kernel/controllers/memcg/memcontrol04.c
> new file mode 100644
> index 000000000..cdfeff4a4
> --- /dev/null
> +++ b/testcases/kernel/controllers/memcg/memcontrol04.c
> @@ -0,0 +1,228 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*\
> + *
> + * [Description]
> + *
> + * Conversion of the forth kself test in cgroup/test_memcontrol.c.
> + *
> + * Original description:
> + * "First, this test creates the following hierarchy:
> + * A       memory.low = 50M,  memory.max = 200M
> + * A/B     memory.low = 50M,  memory.current = 50M
> + * A/B/C   memory.low = 75M,  memory.current = 50M
> + * A/B/D   memory.low = 25M,  memory.current = 50M
> + * A/B/E   memory.low = 500M, memory.current = 0
> + * A/B/F   memory.low = 0,    memory.current = 50M
>

This test has a high repetition with memcontrol03, I'm just
wondering if any possibility to merge together by defining a
struct tcase?

static struct tcase {
       char *ctrl_file;
} tcases[] = {
       {"memory.min"},
       {"memory.low"}
};
Li Wang Feb. 9, 2022, 11:05 a.m. UTC | #2
On Tue, Feb 8, 2022 at 10:04 PM Richard Palethorpe via ltp <
ltp@lists.linux.it> wrote:

> At first glance this test looks the same as memcontrol03. However
> there are some significant changes which complicate combining the two.
>
> Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
> ---
>  runtest/controllers                           |   1 +
>  testcases/kernel/controllers/memcg/.gitignore |   1 +
>  .../kernel/controllers/memcg/memcontrol04.c   | 228 ++++++++++++++++++
>  3 files changed, 230 insertions(+)
>  create mode 100644 testcases/kernel/controllers/memcg/memcontrol04.c
>
> diff --git a/runtest/controllers b/runtest/controllers
> index 4a6f919af..3108a2561 100644
> --- a/runtest/controllers
> +++ b/runtest/controllers
> @@ -20,6 +20,7 @@ memcg_control         memcg_control_test.sh
>  memcontrol01 memcontrol01
>  memcontrol02 memcontrol02
>  memcontrol03 memcontrol03
> +memcontrol04 memcontrol04
>
>  cgroup_fj_function_debug cgroup_fj_function.sh debug
>  cgroup_fj_function_cpuset cgroup_fj_function.sh cpuset
> diff --git a/testcases/kernel/controllers/memcg/.gitignore
> b/testcases/kernel/controllers/memcg/.gitignore
> index 49df1582c..3883cede6 100644
> --- a/testcases/kernel/controllers/memcg/.gitignore
> +++ b/testcases/kernel/controllers/memcg/.gitignore
> @@ -8,3 +8,4 @@
>  memcontrol01
>  memcontrol02
>  memcontrol03
> +memcontrol04
> diff --git a/testcases/kernel/controllers/memcg/memcontrol04.c
> b/testcases/kernel/controllers/memcg/memcontrol04.c
> new file mode 100644
> index 000000000..cdfeff4a4
> --- /dev/null
> +++ b/testcases/kernel/controllers/memcg/memcontrol04.c
> @@ -0,0 +1,228 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*\
> + *
> + * [Description]
> + *
> + * Conversion of the forth kself test in cgroup/test_memcontrol.c.
> + *
> + * Original description:
> + * "First, this test creates the following hierarchy:
> + * A       memory.low = 50M,  memory.max = 200M
> + * A/B     memory.low = 50M,  memory.current = 50M
> + * A/B/C   memory.low = 75M,  memory.current = 50M
> + * A/B/D   memory.low = 25M,  memory.current = 50M
> + * A/B/E   memory.low = 500M, memory.current = 0
> + * A/B/F   memory.low = 0,    memory.current = 50M
> + *
> + * Usages are pagecache
> + * Then it creates A/G and creates a significant
> + * memory pressure in it.
> + *
> + * A/B    memory.current ~= 50M
> + * A/B/C  memory.current ~= 33M
> + * A/B/D  memory.current ~= 17M
> + * A/B/E  memory.current ~= 0
> + *
> + * After that it tries to allocate more than there is unprotected
> + * memory in A available, and checks that memory.low protects
> + * pagecache even in this case."
> + *
> + * The closest thing to memory.low on V1 is soft_limit_in_bytes which
> + * uses a different mechanism and has different semantics. So we only
> + * test on V2 like the selftest. We do test on more file systems, but
> + * not tempfs becaue it can't evict the page cache without swap. Also
> + * we avoid filesystems which allocate extra memory for buffer heads.
> + *
> + * The tolerances have been increased from the self tests.
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include <inttypes.h>
> +
> +#include "memcontrol_common.h"
> +
> +#define TMPDIR "mntdir"
> +
> +static struct tst_cg_group *trunk_cg[3];
> +static struct tst_cg_group *leaf_cg[4];
> +static int fd = -1;
> +
> +enum checkpoints {
> +       CHILD_IDLE
> +};
> +
> +enum trunk_cg {
> +       A,
> +       B,
> +       G
> +};
> +
> +enum leaf_cg {
> +       C,
> +       D,
> +       E,
> +       F
> +};
> +
> +static void cleanup_sub_groups(void)
> +{
> +       size_t i;
> +
> +       for (i = ARRAY_SIZE(leaf_cg); i > 0; i--) {
> +               if (!leaf_cg[i - 1])
> +                       continue;
> +
> +               leaf_cg[i - 1] = tst_cg_group_rm(leaf_cg[i - 1]);
> +       }
> +
> +       for (i = ARRAY_SIZE(trunk_cg); i > 0; i--) {
> +               if (!trunk_cg[i - 1])
> +                       continue;
> +
> +               trunk_cg[i - 1] = tst_cg_group_rm(trunk_cg[i - 1]);
> +       }
> +}
> +
> +static void alloc_anon_in_child(const struct tst_cg_group *const cg,
> +                               const size_t size)
> +{
> +       const pid_t pid = SAFE_FORK();
> +
> +       if (pid) {
> +               tst_reap_children();
> +               return;
> +       }
> +
> +       SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +
> +       tst_res(TINFO, "Child %d in %s: Allocating anon: %"PRIdPTR,
> +               getpid(), tst_cg_group_name(cg), size);
> +       alloc_anon(size);
> +
> +       exit(0);
> +}
> +
> +static void alloc_pagecache_in_child(const struct tst_cg_group *const cg,
> +                                    const size_t size)
> +{
> +       const pid_t pid = SAFE_FORK();
> +
> +       if (pid) {
> +               tst_reap_children();
> +               return;
> +       }
> +
> +       SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +
> +       tst_res(TINFO, "Child %d in %s: Allocating pagecache: %"PRIdPTR,
> +               getpid(), tst_cg_group_name(cg), size);
> +       alloc_pagecache(fd, size);
> +
> +       exit(0);
> +}
> +
> +static void test_memcg_min(void)
>

test_memcg_low() ^



> +{
> +       long c[4];
> +       unsigned int i;
> +
> +       fd = SAFE_OPEN(TMPDIR"/tmpfile", O_RDWR | O_CREAT, 0600);
> +       trunk_cg[A] = tst_cg_group_mk(tst_cg, "trunk_A");
> +
> +       SAFE_CG_SCANF(trunk_cg[A], "memory.low", "%ld", c);
> +       if (c[0]) {
> +               tst_brk(TCONF,
> +                       "memory.low already set to %ld on parent group",
> c[0]);
> +       }
> +
> +       SAFE_CG_PRINT(trunk_cg[A], "cgroup.subtree_control", "+memory");
> +
> +       SAFE_CG_PRINT(trunk_cg[A], "memory.max", "200M");
> +       SAFE_CG_PRINT(trunk_cg[A], "memory.swap.max", "0");
> +
> +       trunk_cg[B] = tst_cg_group_mk(trunk_cg[A], "trunk_B");
> +
> +       SAFE_CG_PRINT(trunk_cg[B], "cgroup.subtree_control", "+memory");
> +
> +       trunk_cg[G] = tst_cg_group_mk(trunk_cg[A], "trunk_G");
> +
> +       for (i = 0; i < ARRAY_SIZE(leaf_cg); i++) {
> +               leaf_cg[i] = tst_cg_group_mk(trunk_cg[B],
> +                                                "leaf_%c", 'C' + i);
> +
> +               if (i == E)
> +                       continue;
> +
> +               alloc_pagecache_in_child(leaf_cg[i], MB(50));
> +       }
> +
> +       SAFE_CG_PRINT(trunk_cg[A], "memory.low", "50M");
> +       SAFE_CG_PRINT(trunk_cg[B], "memory.low", "50M");
> +       SAFE_CG_PRINT(leaf_cg[C], "memory.low", "75M");
> +       SAFE_CG_PRINT(leaf_cg[D], "memory.low", "25M");
> +       SAFE_CG_PRINT(leaf_cg[E], "memory.low", "500M");
> +       SAFE_CG_PRINT(leaf_cg[F], "memory.low", "0");
> +
> +       alloc_anon_in_child(trunk_cg[G], MB(148));
> +
> +       SAFE_CG_SCANF(trunk_cg[B], "memory.current", "%ld", c);
> +       TST_EXP_EXPR(values_close(c[0], MB(50), 5),
> +                    "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
> +
> +       for (i = 0; i < ARRAY_SIZE(leaf_cg); i++)
> +               SAFE_CG_SCANF(leaf_cg[i], "memory.current", "%ld", c + i);
> +
> +       TST_EXP_EXPR(values_close(c[0], MB(33), 20),
> +                    "(A/B/C memory.current=%ld) ~= %d", c[0], MB(33));
> +       TST_EXP_EXPR(values_close(c[1], MB(17), 20),
> +                    "(A/B/D memory.current=%ld) ~= %d", c[1], MB(17));
> +       TST_EXP_EXPR(values_close(c[2], 0, 1),
> +                    "(A/B/E memory.current=%ld) ~= 0", c[2]);
> +
> +       alloc_anon_in_child(trunk_cg[G], MB(166));
> +
> +       for (i = 0; i < ARRAY_SIZE(leaf_cg); i++) {
> +               long low, oom;
> +
> +               SAFE_CG_LINES_SCANF(leaf_cg[i], "memory.events",
> +                                       "low %ld", &low);
> +               SAFE_CG_LINES_SCANF(leaf_cg[i], "memory.events",
> +                                       "oom %ld", &oom);
> +
> +               TST_EXP_EXPR(oom == 0, "(oom events=%ld) == 0", oom);
> +
> +               if (i < E)
> +                       TST_EXP_EXPR(low > 0, "(low events=%ld) > 0", low)
> +               else
> +                       TST_EXP_EXPR(low == 0, "(low events=%ld) == 0",
> low);
> +       }
> +
> +       cleanup_sub_groups();
> +       SAFE_CLOSE(fd);
> +       SAFE_UNLINK(TMPDIR"/tmpfile");
> +}
> +
> +static void cleanup(void)
> +{
> +       cleanup_sub_groups();
> +       if (fd > -1)
> +               SAFE_CLOSE(fd);
> +}
> +
> +static struct tst_test test = {
> +       .cleanup = cleanup,
> +       .test_all = test_memcg_min,
>

test_memcg_low  ^



> +       .mount_device = 1,
> +       .dev_min_size = 256,
> +       .mntpoint = TMPDIR,
> +       .all_filesystems = 1,
> +       .skip_filesystems = (const char *const[]){
> +               "exfat", "vfat", "fuse", "ntfs", "tmpfs", NULL
> +       },
> +       .forks_child = 1,
> +       .needs_root = 1,
> +       .needs_checkpoints = 1,
> +       .needs_cgroup_ver = TST_CG_V2,
> +       .needs_cgroup_ctrls = (const char *const[]){ "memory", NULL },
> +};
>


Apart from the tiny issues and controversy on renaming in patch2/3.
The test itself looks good if we decide to go separate with memcontrol03.c

Btw, there are some TFAILs from my manual run.
(I will look into that try to figure it out tomorrow)

tst_test.c:1521: TINFO: Testing on ext4
tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra opts=''
mke2fs 1.46.5 (30-Dec-2021)
tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating pagecache:
52428800
memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating pagecache:
52428800
memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating pagecache:
52428800
memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon:
155189248
memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~= 52428800
memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~=
34603008
memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~=
17825792
memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon:
174063616
memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
Li Wang Feb. 10, 2022, 6:23 a.m. UTC | #3
On Wed, Feb 9, 2022 at 7:05 PM Li Wang <liwang@redhat.com> wrote:


> Btw, there are some TFAILs from my manual run.
> (I will look into that try to figure it out tomorrow)
>
> tst_test.c:1521: TINFO: Testing on ext4
> tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra
> opts=''
> mke2fs 1.46.5 (30-Dec-2021)
> tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
> memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating pagecache:
> 52428800
> memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating pagecache:
> 52428800
> memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating pagecache:
> 52428800
> memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon:
> 155189248
> memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~=
> 52428800
> memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~=
> 34603008
> memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~=
> 17825792
> memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
> memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon:
> 174063616
> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
>

It looks like a logic issue here, as we do alloc_pagecache 50MB
respectively in the leaf_cg[C, D, E, F] and only the 'memory.low'
of leaf_cg[E] is large enough (500MB) to avoid triggering low event.
The rest cgroups should all have low events, that kernel behavior
is correct.

This failure should be fix with:

--- a/testcases/kernel/controllers/memcg/memcontrol04.c
+++ b/testcases/kernel/controllers/memcg/memcontrol04.c
@@ -192,7 +192,7 @@ static void test_memcg_low(void)

                TST_EXP_EXPR(oom == 0, "(oom events=%ld) == 0", oom);

-               if (i < E)
+               if (i != E)
                        TST_EXP_EXPR(low > 0, "(low events=%ld) > 0", low)
                else
                        TST_EXP_EXPR(low == 0, "(low events=%ld) == 0",
low);


And better to add "leaf_cg%d" print in the output:
(to show the events from which cgroup)

    TST_EXP_EXPR(oom == 0, "(leaf_cg%d: oom events=%ld) == 0", i, oom);

    if (i != E)
            TST_EXP_EXPR(low > 0, "(leaf_cg%d: low events=%ld) > 0", i,
 low)
    else
             TST_EXP_EXPR(low == 0, "(leaf_cg%d: low events=%ld) == 0", i,
low);
Li Wang Feb. 10, 2022, 7:12 a.m. UTC | #4
On Thu, Feb 10, 2022 at 2:23 PM Li Wang <liwang@redhat.com> wrote:

>
>
> On Wed, Feb 9, 2022 at 7:05 PM Li Wang <liwang@redhat.com> wrote:
>
>
>> Btw, there are some TFAILs from my manual run.
>> (I will look into that try to figure it out tomorrow)
>>
>> tst_test.c:1521: TINFO: Testing on ext4
>> tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra
>> opts=''
>> mke2fs 1.46.5 (30-Dec-2021)
>> tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
>> memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating pagecache:
>> 52428800
>> memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating pagecache:
>> 52428800
>> memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating pagecache:
>> 52428800
>> memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon:
>> 155189248
>> memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~=
>> 52428800
>> memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~=
>> 34603008
>> memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~=
>> 17825792
>> memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
>> memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon:
>> 174063616
>> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>> memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>> memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>> memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
>> memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>> memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
>>
>
> It looks like a logic issue here, as we do alloc_pagecache 50MB
> respectively in the leaf_cg[C, D, E, F] and only the 'memory.low'
>

My apologies, the leaf_cg[E] does not have 50MB page cache
allocating, that is the main reason with no memory reclaims happening.
'500MB' low boundary obviously overcommitment, so leaf_cg[E] will
get the part of parent’s protection proportional to its actual memory
usage. However, it doesn't matter for this case, leaf_cg[E] can get
no event grows as well. Because it has no memory consumption at all.

The fix is still effective unless I misread something again:).



> of leaf_cg[E] is large enough (500MB) to avoid triggering low event.
> The rest cgroups should all have low events, that kernel behavior
> is correct.
>
> This failure should be fix with:
>
> --- a/testcases/kernel/controllers/memcg/memcontrol04.c
> +++ b/testcases/kernel/controllers/memcg/memcontrol04.c
> @@ -192,7 +192,7 @@ static void test_memcg_low(void)
>
>                 TST_EXP_EXPR(oom == 0, "(oom events=%ld) == 0", oom);
>
> -               if (i < E)
> +               if (i != E)
>                         TST_EXP_EXPR(low > 0, "(low events=%ld) > 0", low)
>                 else
>                         TST_EXP_EXPR(low == 0, "(low events=%ld) == 0",
> low);
>
>
> And better to add "leaf_cg%d" print in the output:
> (to show the events from which cgroup)
>
>     TST_EXP_EXPR(oom == 0, "(leaf_cg%d: oom events=%ld) == 0", i, oom);
>
>     if (i != E)
>             TST_EXP_EXPR(low > 0, "(leaf_cg%d: low events=%ld) > 0", i,
>  low)
>     else
>              TST_EXP_EXPR(low == 0, "(leaf_cg%d: low events=%ld) == 0",
> i, low);
>
> --
> Regards,
> Li Wang
>
Richard Palethorpe Feb. 10, 2022, 7:34 a.m. UTC | #5
Hello Li,

Li Wang <liwang@redhat.com> writes:

> This test has a high repetition with memcontrol03, I'm just
> wondering if any possibility to merge together by defining a 
> struct tcase?
>
> static struct tcase {
>        char *ctrl_file;
> } tcases[] = {
>        {"memory.min"},
>        {"memory.low"}
> };

There are a number of places where the logic is different. We would end
up a with a test full of branches. There is some boilerplate which is
the same for now, but that could also diverge between tests.
Richard Palethorpe Feb. 14, 2022, 5:40 a.m. UTC | #6
Hello Li,

Li Wang <liwang@redhat.com> writes:

> On Thu, Feb 10, 2022 at 2:23 PM Li Wang <liwang@redhat.com> wrote:
>
>  On Wed, Feb 9, 2022 at 7:05 PM Li Wang <liwang@redhat.com> wrote:
>   
>  Btw, there are some TFAILs from my manual run. 
>  (I will look into that try to figure it out tomorrow)
>
>  tst_test.c:1521: TINFO: Testing on ext4
>  tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra opts=''
>  mke2fs 1.46.5 (30-Dec-2021)
>  tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
>  memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating pagecache: 52428800
>  memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating pagecache: 52428800
>  memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating pagecache: 52428800
>  memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon: 155189248
>  memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~= 52428800
>  memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~= 34603008
>  memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~= 17825792
>  memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
>  memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon: 174063616
>  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
>  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
>
>  It looks like a logic issue here, as we do alloc_pagecache 50MB
>  respectively in the leaf_cg[C, D, E, F] and only the 'memory.low'
>
> My apologies, the leaf_cg[E] does not have 50MB page cache
> allocating, that is the main reason with no memory reclaims happening.
> '500MB' low boundary obviously overcommitment, so leaf_cg[E] will
> get the part of parent’s protection proportional to its actual memory
> usage. However, it doesn't matter for this case, leaf_cg[E] can get
> no event grows as well. Because it has no memory consumption at all.

leaf_cg[F]'s memory.low == 0. So how can it have reclaim events where
memory.current < memory.low?

Testing on the upstream kernel there are no low events in F.

>
> The fix is still effective unless I misread something again:).
>
>  
>  of leaf_cg[E] is large enough (500MB) to avoid triggering low event.
>  The rest cgroups should all have low events, that kernel behavior
>  is correct.
>
>  This failure should be fix with:
>
>  --- a/testcases/kernel/controllers/memcg/memcontrol04.c
>  +++ b/testcases/kernel/controllers/memcg/memcontrol04.c
>  @@ -192,7 +192,7 @@ static void test_memcg_low(void)
>   
>                  TST_EXP_EXPR(oom == 0, "(oom events=%ld) == 0", oom);
>   
>  -               if (i < E)
>  +               if (i != E)
>                          TST_EXP_EXPR(low > 0, "(low events=%ld) > 0", low)
>                  else
>                          TST_EXP_EXPR(low == 0, "(low events=%ld) ==
>  0", low);

Note that I am just copying the original test. AFAICT the original did
not expect there to be low events in F.

>
>  And better to add "leaf_cg%d" print in the output:
>  (to show the events from which cgroup)
>
>      TST_EXP_EXPR(oom == 0, "(leaf_cg%d: oom events=%ld) == 0", i, oom);
>
>      if (i != E)
>              TST_EXP_EXPR(low > 0, "(leaf_cg%d: low events=%ld) > 0", i,  low)
>      else
>               TST_EXP_EXPR(low == 0, "(leaf_cg%d: low events=%ld) ==
>      0", i, low);

+1, thanks!

>
>  -- 
>  Regards,
>  Li Wang
Li Wang Feb. 16, 2022, 10:13 a.m. UTC | #7
Hi Richard,

On Mon, Feb 14, 2022 at 2:07 PM Richard Palethorpe <rpalethorpe@suse.de>
wrote:

> Hello Li,
>
> Li Wang <liwang@redhat.com> writes:
>
> > On Thu, Feb 10, 2022 at 2:23 PM Li Wang <liwang@redhat.com> wrote:
> >
> >  On Wed, Feb 9, 2022 at 7:05 PM Li Wang <liwang@redhat.com> wrote:
> >
> >  Btw, there are some TFAILs from my manual run.
> >  (I will look into that try to figure it out tomorrow)
> >
> >  tst_test.c:1521: TINFO: Testing on ext4
> >  tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra
> opts=''
> >  mke2fs 1.46.5 (30-Dec-2021)
> >  tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
> >  memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating
> pagecache: 52428800
> >  memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating
> pagecache: 52428800
> >  memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating
> pagecache: 52428800
> >  memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon:
> 155189248
> >  memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~=
> 52428800
> >  memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~=
> 34603008
> >  memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~=
> 17825792
> >  memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
> >  memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon:
> 174063616
> >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> >  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
> >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> >  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
> >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> >  memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
> >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
> >  memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
> >
> >  It looks like a logic issue here, as we do alloc_pagecache 50MB
> >  respectively in the leaf_cg[C, D, E, F] and only the 'memory.low'
> >
> > My apologies, the leaf_cg[E] does not have 50MB page cache
> > allocating, that is the main reason with no memory reclaims happening.
> > '500MB' low boundary obviously overcommitment, so leaf_cg[E] will
> > get the part of parent’s protection proportional to its actual memory
> > usage. However, it doesn't matter for this case, leaf_cg[E] can get
> > no event grows as well. Because it has no memory consumption at all.
>
> leaf_cg[F]'s memory.low == 0. So how can it have reclaim events where
> memory.current < memory.low?
>
> Testing on the upstream kernel there are no low events in F.
>

A bit weird...

From my test, it indeed has the events that occur.
(even the original test fail as the same)
And the leaf_cg[F]'s memory.current is almost 1MB.

# uname -r
5.17.0-rc4.liwang
...
tst_test.c:1521: TINFO: Testing on xfs
tst_test.c:996: TINFO: Formatting /dev/loop0 with xfs opts='' extra opts=''
tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
memcontrol04.c:118: TINFO: Child 21200 in leaf_C: Allocating pagecache:
52428800
memcontrol04.c:118: TINFO: Child 21201 in leaf_D: Allocating pagecache:
52428800
memcontrol04.c:118: TINFO: Child 21202 in leaf_F: Allocating pagecache:
52428800
memcontrol04.c:99: TINFO: Child 21203 in trunk_G: Allocating anon: 155189248
memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54083584) ~= 52428800
memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=29822976) ~=
34603008
memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22061056) ~=
17825792
memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol04.c:99: TINFO: Child 21204 in trunk_G: Allocating anon: 174063616
memcontrol04.c:193: TPASS: Expect: (leaf_cg0: oom events=0) == 0
memcontrol04.c:196: TPASS: Expect: (leaf_cg0: low events=427) > 0
memcontrol04.c:193: TPASS: Expect: (leaf_cg1: oom events=0) == 0
memcontrol04.c:196: TPASS: Expect: (leaf_cg1: low events=427) > 0
memcontrol04.c:193: TPASS: Expect: (leaf_cg2: oom events=0) == 0
memcontrol04.c:198: TPASS: Expect: (leaf_cg2: low events=0) == 0
memcontrol04.c:193: TPASS: Expect: (leaf_cg3: oom events=0) == 0
memcontrol04.c:198: TFAIL: Expect: (leaf_cg3: low events=415) == 0

Summary:
passed   44
failed   4
broken   0
skipped  0
warnings 0


# pwd
/root/linux-5.17-rc4/tools/testing/selftests/cgroup

# ./test_memcontrol
ok 1 test_memcg_subtree_control
ok 2 test_memcg_current
ok 3 test_memcg_min
not ok 4 test_memcg_low
Richard Palethorpe Feb. 17, 2022, 4:35 a.m. UTC | #8
Hello Li,

Li Wang <liwang@redhat.com> writes:

> Hi Richard,
>
> On Mon, Feb 14, 2022 at 2:07 PM Richard Palethorpe <rpalethorpe@suse.de> wrote:
>
>  Hello Li,
>
>  Li Wang <liwang@redhat.com> writes:
>
>  > On Thu, Feb 10, 2022 at 2:23 PM Li Wang <liwang@redhat.com> wrote:
>  >
>  >  On Wed, Feb 9, 2022 at 7:05 PM Li Wang <liwang@redhat.com> wrote:
>  >   
>  >  Btw, there are some TFAILs from my manual run. 
>  >  (I will look into that try to figure it out tomorrow)
>  >
>  >  tst_test.c:1521: TINFO: Testing on ext4
>  >  tst_test.c:996: TINFO: Formatting /dev/loop0 with ext4 opts='' extra opts=''
>  >  mke2fs 1.46.5 (30-Dec-2021)
>  >  tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
>  >  memcontrol04.c:118: TINFO: Child 242775 in leaf_C: Allocating pagecache: 52428800
>  >  memcontrol04.c:118: TINFO: Child 242776 in leaf_D: Allocating pagecache: 52428800
>  >  memcontrol04.c:118: TINFO: Child 242777 in leaf_F: Allocating pagecache: 52428800
>  >  memcontrol04.c:99: TINFO: Child 242778 in trunk_G: Allocating anon: 155189248
>  >  memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54181888) ~= 52428800
>  >  memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30957568) ~= 34603008
>  >  memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22282240) ~= 17825792
>  >  memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
>  >  memcontrol04.c:99: TINFO: Child 242779 in trunk_G: Allocating anon: 174063616
>  >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  >  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>  >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  >  memcontrol04.c:196: TPASS: Expect: (low events=373) > 0
>  >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  >  memcontrol04.c:198: TPASS: Expect: (low events=0) == 0
>  >  memcontrol04.c:193: TPASS: Expect: (oom events=0) == 0
>  >  memcontrol04.c:198: TFAIL: Expect: (low events=370) == 0
>  >
>  >  It looks like a logic issue here, as we do alloc_pagecache 50MB
>  >  respectively in the leaf_cg[C, D, E, F] and only the 'memory.low'
>  >
>  > My apologies, the leaf_cg[E] does not have 50MB page cache
>  > allocating, that is the main reason with no memory reclaims happening.
>  > '500MB' low boundary obviously overcommitment, so leaf_cg[E] will
>  > get the part of parent’s protection proportional to its actual memory
>  > usage. However, it doesn't matter for this case, leaf_cg[E] can get
>  > no event grows as well. Because it has no memory consumption at all.
>
>  leaf_cg[F]'s memory.low == 0. So how can it have reclaim events where
>  memory.current < memory.low?
>
>  Testing on the upstream kernel there are no low events in F.
>
> A bit weird...
>
> From my test, it indeed has the events that occur.
> (even the original test fail as the same)
> And the leaf_cg[F]'s memory.current is almost 1MB.
>
> # uname -r
> 5.17.0-rc4.liwang
> ...
> tst_test.c:1521: TINFO: Testing on xfs
> tst_test.c:996: TINFO: Formatting /dev/loop0 with xfs opts='' extra opts=''
> tst_test.c:1452: TINFO: Timeout per run is 0h 05m 00s
> memcontrol04.c:118: TINFO: Child 21200 in leaf_C: Allocating pagecache: 52428800
> memcontrol04.c:118: TINFO: Child 21201 in leaf_D: Allocating pagecache: 52428800
> memcontrol04.c:118: TINFO: Child 21202 in leaf_F: Allocating pagecache: 52428800
> memcontrol04.c:99: TINFO: Child 21203 in trunk_G: Allocating anon: 155189248
> memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54083584) ~= 52428800
> memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=29822976) ~= 34603008
> memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22061056) ~= 17825792
> memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
> memcontrol04.c:99: TINFO: Child 21204 in trunk_G: Allocating anon: 174063616
> memcontrol04.c:193: TPASS: Expect: (leaf_cg0: oom events=0) == 0
> memcontrol04.c:196: TPASS: Expect: (leaf_cg0: low events=427) > 0
> memcontrol04.c:193: TPASS: Expect: (leaf_cg1: oom events=0) == 0
> memcontrol04.c:196: TPASS: Expect: (leaf_cg1: low events=427) > 0
> memcontrol04.c:193: TPASS: Expect: (leaf_cg2: oom events=0) == 0
> memcontrol04.c:198: TPASS: Expect: (leaf_cg2: low events=0) == 0
> memcontrol04.c:193: TPASS: Expect: (leaf_cg3: oom events=0) == 0
> memcontrol04.c:198: TFAIL: Expect: (leaf_cg3: low events=415) == 0
>
> Summary:
> passed   44
> failed   4
> broken   0
> skipped  0
> warnings 0
>
> # pwd
> /root/linux-5.17-rc4/tools/testing/selftests/cgroup
>
> # ./test_memcontrol 
> ok 1 test_memcg_subtree_control
> ok 2 test_memcg_current
> ok 3 test_memcg_min
> not ok 4 test_memcg_low

I tested on 5.17-rc4 and it still passed. I also printed some more info

EXT4:

memcontrol04.c:118: TINFO: Child 507 in leaf_C: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 508 in leaf_D: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 509 in leaf_F: Allocating pagecache: 52428800
memcontrol04.c:99: TINFO: Child 510 in trunk_G: Allocating anon: 155189248
memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54157312) ~= 52428800
memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=31477760) ~= 34603008
memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=21635072) ~= 17825792
memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol04.c:182: TINFO: A/B/F memory.current=970752
memcontrol04.c:99: TINFO: Child 511 in trunk_G: Allocating anon: 174063616
memcontrol04.c:195: TINFO: A: low events=1229, oom events=0
memcontrol04.c:195: TINFO: B: low events=1229, oom events=0
memcontrol04.c:195: TINFO: G: low events=0, oom events=0
memcontrol04.c:208: TPASS: Expect: (C oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (C low events=412) > 0
memcontrol04.c:208: TPASS: Expect: (D oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (D low events=412) > 0
memcontrol04.c:208: TPASS: Expect: (E oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (E low events=0) == 0
memcontrol04.c:208: TPASS: Expect: (F oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (F low events=0) == 0

XFS:

memcontrol04.c:118: TINFO: Child 523 in leaf_C: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 524 in leaf_D: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 525 in leaf_F: Allocating pagecache: 52428800
memcontrol04.c:99: TINFO: Child 526 in trunk_G: Allocating anon: 155189248
memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54038528) ~= 52428800
memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=30371840) ~= 34603008
memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=22536192) ~= 17825792
memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol04.c:182: TINFO: A/B/F memory.current=942080
memcontrol04.c:99: TINFO: Child 527 in trunk_G: Allocating anon: 174063616
memcontrol04.c:195: TINFO: A: low events=1288, oom events=0
memcontrol04.c:195: TINFO: B: low events=1288, oom events=0
memcontrol04.c:195: TINFO: G: low events=0, oom events=0
memcontrol04.c:208: TPASS: Expect: (C oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (C low events=432) > 0
memcontrol04.c:208: TPASS: Expect: (D oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (D low events=432) > 0
memcontrol04.c:208: TPASS: Expect: (E oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (E low events=0) == 0
memcontrol04.c:208: TPASS: Expect: (F oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (F low events=0) == 0

BTRFS:

memcontrol04.c:118: TINFO: Child 548 in leaf_C: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 549 in leaf_D: Allocating pagecache: 52428800
memcontrol04.c:118: TINFO: Child 550 in leaf_F: Allocating pagecache: 52428800
memcontrol04.c:99: TINFO: Child 551 in trunk_G: Allocating anon: 155189248
memcontrol04.c:170: TPASS: Expect: (A/B memory.current=54140928) ~= 52428800
memcontrol04.c:176: TPASS: Expect: (A/B/C memory.current=29540352) ~= 34603008
memcontrol04.c:178: TPASS: Expect: (A/B/D memory.current=21815296) ~= 17825792
memcontrol04.c:180: TPASS: Expect: (A/B/E memory.current=0) ~= 0
memcontrol04.c:182: TINFO: A/B/F memory.current=2711552
memcontrol04.c:99: TINFO: Child 552 in trunk_G: Allocating anon: 174063616
memcontrol04.c:195: TINFO: A: low events=1245, oom events=0
memcontrol04.c:195: TINFO: B: low events=1245, oom events=0
memcontrol04.c:195: TINFO: G: low events=0, oom events=0
memcontrol04.c:208: TPASS: Expect: (C oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (C low events=423) > 0
memcontrol04.c:208: TPASS: Expect: (D oom events=0) == 0
memcontrol04.c:211: TPASS: Expect: (D low events=423) > 0
memcontrol04.c:208: TPASS: Expect: (E oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (E low events=0) == 0
memcontrol04.c:208: TPASS: Expect: (F oom events=0) == 0
memcontrol04.c:213: TPASS: Expect: (F low events=0) == 0

I will post another version of the patch-set with the extra debug
info. I wonder if the low events in F are from the parent (B)?
diff mbox series

Patch

diff --git a/runtest/controllers b/runtest/controllers
index 4a6f919af..3108a2561 100644
--- a/runtest/controllers
+++ b/runtest/controllers
@@ -20,6 +20,7 @@  memcg_control		memcg_control_test.sh
 memcontrol01 memcontrol01
 memcontrol02 memcontrol02
 memcontrol03 memcontrol03
+memcontrol04 memcontrol04
 
 cgroup_fj_function_debug cgroup_fj_function.sh debug
 cgroup_fj_function_cpuset cgroup_fj_function.sh cpuset
diff --git a/testcases/kernel/controllers/memcg/.gitignore b/testcases/kernel/controllers/memcg/.gitignore
index 49df1582c..3883cede6 100644
--- a/testcases/kernel/controllers/memcg/.gitignore
+++ b/testcases/kernel/controllers/memcg/.gitignore
@@ -8,3 +8,4 @@ 
 memcontrol01
 memcontrol02
 memcontrol03
+memcontrol04
diff --git a/testcases/kernel/controllers/memcg/memcontrol04.c b/testcases/kernel/controllers/memcg/memcontrol04.c
new file mode 100644
index 000000000..cdfeff4a4
--- /dev/null
+++ b/testcases/kernel/controllers/memcg/memcontrol04.c
@@ -0,0 +1,228 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*\
+ *
+ * [Description]
+ *
+ * Conversion of the forth kself test in cgroup/test_memcontrol.c.
+ *
+ * Original description:
+ * "First, this test creates the following hierarchy:
+ * A       memory.low = 50M,  memory.max = 200M
+ * A/B     memory.low = 50M,  memory.current = 50M
+ * A/B/C   memory.low = 75M,  memory.current = 50M
+ * A/B/D   memory.low = 25M,  memory.current = 50M
+ * A/B/E   memory.low = 500M, memory.current = 0
+ * A/B/F   memory.low = 0,    memory.current = 50M
+ *
+ * Usages are pagecache
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B    memory.current ~= 50M
+ * A/B/C  memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is unprotected
+ * memory in A available, and checks that memory.low protects
+ * pagecache even in this case."
+ *
+ * The closest thing to memory.low on V1 is soft_limit_in_bytes which
+ * uses a different mechanism and has different semantics. So we only
+ * test on V2 like the selftest. We do test on more file systems, but
+ * not tempfs becaue it can't evict the page cache without swap. Also
+ * we avoid filesystems which allocate extra memory for buffer heads.
+ *
+ * The tolerances have been increased from the self tests.
+ */
+
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+
+#include "memcontrol_common.h"
+
+#define TMPDIR "mntdir"
+
+static struct tst_cg_group *trunk_cg[3];
+static struct tst_cg_group *leaf_cg[4];
+static int fd = -1;
+
+enum checkpoints {
+	CHILD_IDLE
+};
+
+enum trunk_cg {
+	A,
+	B,
+	G
+};
+
+enum leaf_cg {
+	C,
+	D,
+	E,
+	F
+};
+
+static void cleanup_sub_groups(void)
+{
+	size_t i;
+
+	for (i = ARRAY_SIZE(leaf_cg); i > 0; i--) {
+		if (!leaf_cg[i - 1])
+			continue;
+
+		leaf_cg[i - 1] = tst_cg_group_rm(leaf_cg[i - 1]);
+	}
+
+	for (i = ARRAY_SIZE(trunk_cg); i > 0; i--) {
+		if (!trunk_cg[i - 1])
+			continue;
+
+		trunk_cg[i - 1] = tst_cg_group_rm(trunk_cg[i - 1]);
+	}
+}
+
+static void alloc_anon_in_child(const struct tst_cg_group *const cg,
+				const size_t size)
+{
+	const pid_t pid = SAFE_FORK();
+
+	if (pid) {
+		tst_reap_children();
+		return;
+	}
+
+	SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
+
+	tst_res(TINFO, "Child %d in %s: Allocating anon: %"PRIdPTR,
+		getpid(), tst_cg_group_name(cg), size);
+	alloc_anon(size);
+
+	exit(0);
+}
+
+static void alloc_pagecache_in_child(const struct tst_cg_group *const cg,
+				     const size_t size)
+{
+	const pid_t pid = SAFE_FORK();
+
+	if (pid) {
+		tst_reap_children();
+		return;
+	}
+
+	SAFE_CG_PRINTF(cg, "cgroup.procs", "%d", getpid());
+
+	tst_res(TINFO, "Child %d in %s: Allocating pagecache: %"PRIdPTR,
+		getpid(), tst_cg_group_name(cg), size);
+	alloc_pagecache(fd, size);
+
+	exit(0);
+}
+
+static void test_memcg_min(void)
+{
+	long c[4];
+	unsigned int i;
+
+	fd = SAFE_OPEN(TMPDIR"/tmpfile", O_RDWR | O_CREAT, 0600);
+	trunk_cg[A] = tst_cg_group_mk(tst_cg, "trunk_A");
+
+	SAFE_CG_SCANF(trunk_cg[A], "memory.low", "%ld", c);
+	if (c[0]) {
+		tst_brk(TCONF,
+			"memory.low already set to %ld on parent group", c[0]);
+	}
+
+	SAFE_CG_PRINT(trunk_cg[A], "cgroup.subtree_control", "+memory");
+
+	SAFE_CG_PRINT(trunk_cg[A], "memory.max", "200M");
+	SAFE_CG_PRINT(trunk_cg[A], "memory.swap.max", "0");
+
+	trunk_cg[B] = tst_cg_group_mk(trunk_cg[A], "trunk_B");
+
+	SAFE_CG_PRINT(trunk_cg[B], "cgroup.subtree_control", "+memory");
+
+	trunk_cg[G] = tst_cg_group_mk(trunk_cg[A], "trunk_G");
+
+	for (i = 0; i < ARRAY_SIZE(leaf_cg); i++) {
+		leaf_cg[i] = tst_cg_group_mk(trunk_cg[B],
+						 "leaf_%c", 'C' + i);
+
+		if (i == E)
+			continue;
+
+		alloc_pagecache_in_child(leaf_cg[i], MB(50));
+	}
+
+	SAFE_CG_PRINT(trunk_cg[A], "memory.low", "50M");
+	SAFE_CG_PRINT(trunk_cg[B], "memory.low", "50M");
+	SAFE_CG_PRINT(leaf_cg[C], "memory.low", "75M");
+	SAFE_CG_PRINT(leaf_cg[D], "memory.low", "25M");
+	SAFE_CG_PRINT(leaf_cg[E], "memory.low", "500M");
+	SAFE_CG_PRINT(leaf_cg[F], "memory.low", "0");
+
+	alloc_anon_in_child(trunk_cg[G], MB(148));
+
+	SAFE_CG_SCANF(trunk_cg[B], "memory.current", "%ld", c);
+	TST_EXP_EXPR(values_close(c[0], MB(50), 5),
+		     "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
+
+	for (i = 0; i < ARRAY_SIZE(leaf_cg); i++)
+		SAFE_CG_SCANF(leaf_cg[i], "memory.current", "%ld", c + i);
+
+	TST_EXP_EXPR(values_close(c[0], MB(33), 20),
+		     "(A/B/C memory.current=%ld) ~= %d", c[0], MB(33));
+	TST_EXP_EXPR(values_close(c[1], MB(17), 20),
+		     "(A/B/D memory.current=%ld) ~= %d", c[1], MB(17));
+	TST_EXP_EXPR(values_close(c[2], 0, 1),
+		     "(A/B/E memory.current=%ld) ~= 0", c[2]);
+
+	alloc_anon_in_child(trunk_cg[G], MB(166));
+
+	for (i = 0; i < ARRAY_SIZE(leaf_cg); i++) {
+		long low, oom;
+
+		SAFE_CG_LINES_SCANF(leaf_cg[i], "memory.events",
+					"low %ld", &low);
+		SAFE_CG_LINES_SCANF(leaf_cg[i], "memory.events",
+					"oom %ld", &oom);
+
+		TST_EXP_EXPR(oom == 0, "(oom events=%ld) == 0", oom);
+
+		if (i < E)
+			TST_EXP_EXPR(low > 0, "(low events=%ld) > 0", low)
+		else
+			TST_EXP_EXPR(low == 0, "(low events=%ld) == 0", low);
+	}
+
+	cleanup_sub_groups();
+	SAFE_CLOSE(fd);
+	SAFE_UNLINK(TMPDIR"/tmpfile");
+}
+
+static void cleanup(void)
+{
+	cleanup_sub_groups();
+	if (fd > -1)
+		SAFE_CLOSE(fd);
+}
+
+static struct tst_test test = {
+	.cleanup = cleanup,
+	.test_all = test_memcg_min,
+	.mount_device = 1,
+	.dev_min_size = 256,
+	.mntpoint = TMPDIR,
+	.all_filesystems = 1,
+	.skip_filesystems = (const char *const[]){
+		"exfat", "vfat", "fuse", "ntfs", "tmpfs", NULL
+	},
+	.forks_child = 1,
+	.needs_root = 1,
+	.needs_checkpoints = 1,
+	.needs_cgroup_ver = TST_CG_V2,
+	.needs_cgroup_ctrls = (const char *const[]){ "memory", NULL },
+};