diff mbox series

[6/6] memcontrol03: Copy from kselftest

Message ID 20220127061225.23368-7-rpalethorpe@suse.com
State Accepted
Headers show
Series Add memcontrol03 and declarative CG API | expand

Commit Message

Richard Palethorpe Jan. 27, 2022, 6:12 a.m. UTC
Note that the tolerances had to be increased slightly otherwise the
test only passed on ext4 in upstream 5.16 on x86_64. In all cases it
seems more memory is evicted from C than expected and not enough from
D. This may indicate some tuning is possible, but does not look like a
serious regression.

Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
---
 .../kernel/controllers/memcg/memcontrol03.c   | 231 ++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100644 testcases/kernel/controllers/memcg/memcontrol03.c

Comments

Richard Palethorpe Jan. 27, 2022, 8:26 a.m. UTC | #1
Hello,

Richard Palethorpe <rpalethorpe@suse.com> writes:

> Note that the tolerances had to be increased slightly otherwise the
> test only passed on ext4 in upstream 5.16 on x86_64. In all cases it
> seems more memory is evicted from C than expected and not enough from
> D. This may indicate some tuning is possible, but does not look like a
> serious regression.
>
> Signed-off-by: Richard Palethorpe <rpalethorpe@suse.com>
> ---
>  .../kernel/controllers/memcg/memcontrol03.c   | 231 ++++++++++++++++++
>  1 file changed, 231 insertions(+)
>  create mode 100644 testcases/kernel/controllers/memcg/memcontrol03.c

Oops, I forgot to add the gitignore and runtest entries!
Cyril Hrubis Jan. 28, 2022, 10:57 a.m. UTC | #2
Hi!
> +// SPDX-License-Identifier: GPL-2.0
> +/*\
> + *
> + * [Description]
> + *
> + * Conversion of the third kself test in cgroup/test_memcontrol.c.
> + *
> + * Original description:
> + * "First, this test creates the following hierarchy:
> + * A       memory.min = 50M,  memory.max = 200M
> + * A/B     memory.min = 50M,  memory.current = 50M
> + * A/B/C   memory.min = 75M,  memory.current = 50M
> + * A/B/D   memory.min = 25M,  memory.current = 50M
> + * A/B/E   memory.min = 500M, memory.current = 0
> + * A/B/F   memory.min = 0,    memory.current = 50M
> + *
> + * Usages are pagecache, but the test keeps a running
> + * process in every leaf cgroup.
> + * Then it creates A/G and creates a significant
> + * memory pressure in it.
> + *
> + * A/B    memory.current ~= 50M
> + * A/B/C  memory.current ~= 33M
> + * A/B/D  memory.current ~= 17M
> + * A/B/E  memory.current ~= 0
> + *
> + * After that it tries to allocate more than there is unprotected
> + * memory in A available, and checks that memory.min protects
> + * pagecache even in this case."
> + *
> + * memory.min doesn't appear to exist on V1 so we only test on V2 like
> + * the selftest. We do test on more file systems, but not tempfs
> + * becaue it can't evict the page cache without swap. Also we avoid
> + * filesystems which allocate extra memory for buffer heads.
> + *
> + * The tolerances have been increased from the self tests.
> + *
> + */
> +
> +#define _GNU_SOURCE
> +
> +#include <inttypes.h>
> +
> +#include "memcontrol_common.h"
> +
> +#define TMPDIR "mntdir"
> +
> +static const struct tst_cgroup_group *cg_test;
> +static struct tst_cgroup_group *parent[3];
> +static struct tst_cgroup_group *children[4];
> +static int fd;
> +
> +enum checkpoints {
> +	CHILD_IDLE,
> +	TEST_DONE,
> +};
> +
> +static void cleanup_sub_groups(void)
> +{
> +	size_t i;
> +
> +	for (i = ARRAY_SIZE(children); i > 0; i--) {
> +		if (!children[i - 1])
> +			continue;
> +
> +		TST_CHECKPOINT_WAKE2(TEST_DONE,
> +				     ARRAY_SIZE(children) - 1);
> +		tst_reap_children();
> +		break;
> +	}
> +
> +	for (i = ARRAY_SIZE(children); i > 0; i--) {
> +		if (!children[i - 1])
> +			continue;
> +
> +		children[i - 1] = tst_cgroup_group_rm(children[i - 1]);
> +	}
> +
> +	for (i = ARRAY_SIZE(parent); i > 0; i--) {
> +		if (!parent[i - 1])
> +			continue;
> +
> +		parent[i - 1] = tst_cgroup_group_rm(parent[i - 1]);
> +	}
> +}
> +
> +static void alloc_anon_in_child(const struct tst_cgroup_group *const cg,
> +				const size_t size, const int expect_oom)
> +{
> +	int status;
> +	const pid_t pid = SAFE_FORK();
> +
> +	if (!pid) {
> +		SAFE_CGROUP_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +
> +		tst_res(TINFO, "%d in %s: Allocating anon: %"PRIdPTR,
> +		getpid(), tst_cgroup_group_name(cg), size);
> +		alloc_anon(size);
> +		exit(0);
> +	}
> +
> +	if (expect_oom)
> +		SAFE_WAITPID(pid, &status, 0);
> +	else
> +		tst_reap_child(pid);

I guess that we can do the SAFE_WAITPID() here in both cases but just
adjust expectations:

	SAFE_WAITPID(pid, &status, 0);

	if (WIFEXITTED(status) && WEXITSTATUS(status) == 0)
		return;

	if (expect_oom && WIFSIGNALED(status) && WTERMSIG(status) == SIGKILL)
		return;

	tst_res(TFAIL, "Child %s", tst_strstatus(status);

> +}
> +
> +static void alloc_pagecache_in_child(const struct tst_cgroup_group *const cg,
> +				     const size_t size)
> +{
> +	const pid_t pid = SAFE_FORK();
> +
> +	if (pid) {
> +		TST_CHECKPOINT_WAIT(CHILD_IDLE);
> +		return;
> +	}
> +
> +	SAFE_CGROUP_PRINTF(cg, "cgroup.procs", "%d", getpid());
> +
> +	tst_res(TINFO, "%d in %s: Allocating pagecache: %"PRIdPTR,
> +		getpid(), tst_cgroup_group_name(cg), size);
> +	alloc_pagecache(fd, size);
> +
> +	TST_CHECKPOINT_WAKE(CHILD_IDLE);
> +	TST_CHECKPOINT_WAIT(TEST_DONE);
> +	exit(0);
> +}
> +
> +/*
> + */

Empty comment.

> +static void test_memcg_min(void)
> +{
> +	long c[4];
> +	size_t i, attempts;
> +	char child_name[64];
> +
> +	fd = SAFE_OPEN(TMPDIR"/tmpfile", O_RDWR | O_CREAT, 0600);
> +	parent[0] = tst_cgroup_group_mk(cg_test, "memcg_test_0");
> +
> +	SAFE_CGROUP_SCANF(parent[0], "memory.min", "%ld", c);
> +	if (c[0]) {
> +		tst_brk(TCONF,
> +			"memory.min already set to %ld on parent group", c[0]);
> +	}
> +
> +	if (!TST_CGROUP_VER_IS_V1(parent[0], "memory")) {
> +		SAFE_CGROUP_PRINT(parent[0], "cgroup.subtree_control",
> +				  "+memory");
> +	}
> +	SAFE_CGROUP_PRINT(parent[0], "memory.max", "200M");
> +	SAFE_CGROUP_PRINT(parent[0], "memory.swap.max", "0");
> +
> +	parent[1] = tst_cgroup_group_mk(parent[0], "memcg_test_1");
> +	if (!TST_CGROUP_VER_IS_V1(parent[0], "memory")) {
> +		SAFE_CGROUP_PRINT(parent[1], "cgroup.subtree_control",
> +				  "+memory");

Don't we enable the memory controller automatically in the LTP library?

> +	}
> +
> +	parent[2] = tst_cgroup_group_mk(parent[0], "memcg_test_2");
> +
> +	for (i = 0; i < ARRAY_SIZE(children); i++) {
> +		sprintf(child_name, "child_memcg_%"PRIdPTR, i);
> +
> +		children[i] = tst_cgroup_group_mk(parent[1], child_name);

I guess that it would be a bit more elegant if the tst_cgroup_group_mk()
would have been printf-like function.

> +		if (i == 2)
> +			continue;
> +
> +		alloc_pagecache_in_child(children[i], MB(50));
> +	}
> +
> +	SAFE_CGROUP_PRINT(parent[0], "memory.min", "50M");
> +	SAFE_CGROUP_PRINT(parent[1], "memory.min", "50M");
> +	SAFE_CGROUP_PRINT(children[0], "memory.min", "75M");
> +	SAFE_CGROUP_PRINT(children[1], "memory.min", "25M");
> +	SAFE_CGROUP_PRINT(children[2], "memory.min", "500M");
> +	SAFE_CGROUP_PRINT(children[3], "memory.min", "0");
> +
> +	for (attempts = 0; attempts < 5; attempts++) {
> +		SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
> +		if (values_close(c[0], MB(150), 3))
> +			break;
> +
> +		sleep(1);
> +	}
> +
> +	alloc_anon_in_child(parent[2], MB(148), 0);

I find the usage of parent and child in the code a bit confusing, the
parent[2] is actually a child of parent[0].

Maybe we should call them "generation" such as gen1, gen2, etc.

So that we would have:

	gen1 = tst_cgroup_group_mk(cg_test, "memcg_test_gen1");
	gen2[0] = tst_cgroup_group_mk(gen1, "memcg_test_gen2_0");
	gen2[1] = tst_cgroup_group_mk(gen2, "memcg_test_get2_1");

	for (...)
		gen3[i] = tst_cgroup_group_mk(gen2[0], "memcg_test_gen3_%i", i);

> +	SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
> +	TST_EXP_EXPR(values_close(c[0], MB(50), 5), "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
> +
> +	for (i = 0; i < ARRAY_SIZE(children); i++)
> +		SAFE_CGROUP_SCANF(children[i], "memory.current", "%ld", c + i);
> +
> +	TST_EXP_EXPR(values_close(c[0], MB(33), 20), "(A/B/C memory.current=%ld) ~= %d", c[0], MB(33));
> +	TST_EXP_EXPR(values_close(c[1], MB(17), 20), "(A/B/D memory.current=%ld) ~= %d", c[1], MB(17));
> +	TST_EXP_EXPR(values_close(c[2], 0, 1), "(A/B/E memory.current=%ld) ~= 0", c[2]);
> +
> +	alloc_anon_in_child(parent[2], MB(170), 1);
> +
> +	SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
> +	TST_EXP_EXPR(values_close(c[0], MB(50), 5), "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
> +
> +	cleanup_sub_groups();
> +	SAFE_CLOSE(fd);
> +}
> +
> +static void cleanup(void)
> +{
> +	cleanup_sub_groups();
> +	if (fd > -1)
> +		SAFE_CLOSE(fd);

Technically this should either be fd > 0 or the global should be
initialized to -1.

> +}
> +
> +static struct tst_test test = {
> +	.cleanup = cleanup,
> +	.test_all = test_memcg_min,
> +	.mount_device = 1,
> +	.dev_min_size = 256,
> +	.mntpoint = TMPDIR,
> +	.all_filesystems = 1,
> +	.skip_filesystems = (const char *const[]){
> +		"exfat", "vfat", "fuse", "ntfs", "tmpfs", NULL
> +	},
> +	.forks_child = 1,
> +	.needs_root = 1,
> +	.needs_checkpoints = 1,
> +	.needs_cgroup_ver = TST_CGROUP_V2,
> +	.needs_cgroup_controllers = (const char *const[]){ "memory", NULL },
> +	.test_cgroup = &cg_test,
> +};
> -- 
> 2.34.1
> 
> 
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp
diff mbox series

Patch

diff --git a/testcases/kernel/controllers/memcg/memcontrol03.c b/testcases/kernel/controllers/memcg/memcontrol03.c
new file mode 100644
index 000000000..4d6ca9761
--- /dev/null
+++ b/testcases/kernel/controllers/memcg/memcontrol03.c
@@ -0,0 +1,231 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*\
+ *
+ * [Description]
+ *
+ * Conversion of the third kself test in cgroup/test_memcontrol.c.
+ *
+ * Original description:
+ * "First, this test creates the following hierarchy:
+ * A       memory.min = 50M,  memory.max = 200M
+ * A/B     memory.min = 50M,  memory.current = 50M
+ * A/B/C   memory.min = 75M,  memory.current = 50M
+ * A/B/D   memory.min = 25M,  memory.current = 50M
+ * A/B/E   memory.min = 500M, memory.current = 0
+ * A/B/F   memory.min = 0,    memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B    memory.current ~= 50M
+ * A/B/C  memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is unprotected
+ * memory in A available, and checks that memory.min protects
+ * pagecache even in this case."
+ *
+ * memory.min doesn't appear to exist on V1 so we only test on V2 like
+ * the selftest. We do test on more file systems, but not tempfs
+ * becaue it can't evict the page cache without swap. Also we avoid
+ * filesystems which allocate extra memory for buffer heads.
+ *
+ * The tolerances have been increased from the self tests.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+
+#include "memcontrol_common.h"
+
+#define TMPDIR "mntdir"
+
+static const struct tst_cgroup_group *cg_test;
+static struct tst_cgroup_group *parent[3];
+static struct tst_cgroup_group *children[4];
+static int fd;
+
+enum checkpoints {
+	CHILD_IDLE,
+	TEST_DONE,
+};
+
+static void cleanup_sub_groups(void)
+{
+	size_t i;
+
+	for (i = ARRAY_SIZE(children); i > 0; i--) {
+		if (!children[i - 1])
+			continue;
+
+		TST_CHECKPOINT_WAKE2(TEST_DONE,
+				     ARRAY_SIZE(children) - 1);
+		tst_reap_children();
+		break;
+	}
+
+	for (i = ARRAY_SIZE(children); i > 0; i--) {
+		if (!children[i - 1])
+			continue;
+
+		children[i - 1] = tst_cgroup_group_rm(children[i - 1]);
+	}
+
+	for (i = ARRAY_SIZE(parent); i > 0; i--) {
+		if (!parent[i - 1])
+			continue;
+
+		parent[i - 1] = tst_cgroup_group_rm(parent[i - 1]);
+	}
+}
+
+static void alloc_anon_in_child(const struct tst_cgroup_group *const cg,
+				const size_t size, const int expect_oom)
+{
+	int status;
+	const pid_t pid = SAFE_FORK();
+
+	if (!pid) {
+		SAFE_CGROUP_PRINTF(cg, "cgroup.procs", "%d", getpid());
+
+		tst_res(TINFO, "%d in %s: Allocating anon: %"PRIdPTR,
+		getpid(), tst_cgroup_group_name(cg), size);
+		alloc_anon(size);
+		exit(0);
+	}
+
+	if (expect_oom)
+		SAFE_WAITPID(pid, &status, 0);
+	else
+		tst_reap_child(pid);
+}
+
+static void alloc_pagecache_in_child(const struct tst_cgroup_group *const cg,
+				     const size_t size)
+{
+	const pid_t pid = SAFE_FORK();
+
+	if (pid) {
+		TST_CHECKPOINT_WAIT(CHILD_IDLE);
+		return;
+	}
+
+	SAFE_CGROUP_PRINTF(cg, "cgroup.procs", "%d", getpid());
+
+	tst_res(TINFO, "%d in %s: Allocating pagecache: %"PRIdPTR,
+		getpid(), tst_cgroup_group_name(cg), size);
+	alloc_pagecache(fd, size);
+
+	TST_CHECKPOINT_WAKE(CHILD_IDLE);
+	TST_CHECKPOINT_WAIT(TEST_DONE);
+	exit(0);
+}
+
+/*
+ */
+static void test_memcg_min(void)
+{
+	long c[4];
+	size_t i, attempts;
+	char child_name[64];
+
+	fd = SAFE_OPEN(TMPDIR"/tmpfile", O_RDWR | O_CREAT, 0600);
+	parent[0] = tst_cgroup_group_mk(cg_test, "memcg_test_0");
+
+	SAFE_CGROUP_SCANF(parent[0], "memory.min", "%ld", c);
+	if (c[0]) {
+		tst_brk(TCONF,
+			"memory.min already set to %ld on parent group", c[0]);
+	}
+
+	if (!TST_CGROUP_VER_IS_V1(parent[0], "memory")) {
+		SAFE_CGROUP_PRINT(parent[0], "cgroup.subtree_control",
+				  "+memory");
+	}
+	SAFE_CGROUP_PRINT(parent[0], "memory.max", "200M");
+	SAFE_CGROUP_PRINT(parent[0], "memory.swap.max", "0");
+
+	parent[1] = tst_cgroup_group_mk(parent[0], "memcg_test_1");
+	if (!TST_CGROUP_VER_IS_V1(parent[0], "memory")) {
+		SAFE_CGROUP_PRINT(parent[1], "cgroup.subtree_control",
+				  "+memory");
+	}
+
+	parent[2] = tst_cgroup_group_mk(parent[0], "memcg_test_2");
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		sprintf(child_name, "child_memcg_%"PRIdPTR, i);
+
+		children[i] = tst_cgroup_group_mk(parent[1], child_name);
+
+		if (i == 2)
+			continue;
+
+		alloc_pagecache_in_child(children[i], MB(50));
+	}
+
+	SAFE_CGROUP_PRINT(parent[0], "memory.min", "50M");
+	SAFE_CGROUP_PRINT(parent[1], "memory.min", "50M");
+	SAFE_CGROUP_PRINT(children[0], "memory.min", "75M");
+	SAFE_CGROUP_PRINT(children[1], "memory.min", "25M");
+	SAFE_CGROUP_PRINT(children[2], "memory.min", "500M");
+	SAFE_CGROUP_PRINT(children[3], "memory.min", "0");
+
+	for (attempts = 0; attempts < 5; attempts++) {
+		SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
+		if (values_close(c[0], MB(150), 3))
+			break;
+
+		sleep(1);
+	}
+
+	alloc_anon_in_child(parent[2], MB(148), 0);
+
+	SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
+	TST_EXP_EXPR(values_close(c[0], MB(50), 5), "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		SAFE_CGROUP_SCANF(children[i], "memory.current", "%ld", c + i);
+
+	TST_EXP_EXPR(values_close(c[0], MB(33), 20), "(A/B/C memory.current=%ld) ~= %d", c[0], MB(33));
+	TST_EXP_EXPR(values_close(c[1], MB(17), 20), "(A/B/D memory.current=%ld) ~= %d", c[1], MB(17));
+	TST_EXP_EXPR(values_close(c[2], 0, 1), "(A/B/E memory.current=%ld) ~= 0", c[2]);
+
+	alloc_anon_in_child(parent[2], MB(170), 1);
+
+	SAFE_CGROUP_SCANF(parent[1], "memory.current", "%ld", c);
+	TST_EXP_EXPR(values_close(c[0], MB(50), 5), "(A/B memory.current=%ld) ~= %d", c[0], MB(50));
+
+	cleanup_sub_groups();
+	SAFE_CLOSE(fd);
+}
+
+static void cleanup(void)
+{
+	cleanup_sub_groups();
+	if (fd > -1)
+		SAFE_CLOSE(fd);
+}
+
+static struct tst_test test = {
+	.cleanup = cleanup,
+	.test_all = test_memcg_min,
+	.mount_device = 1,
+	.dev_min_size = 256,
+	.mntpoint = TMPDIR,
+	.all_filesystems = 1,
+	.skip_filesystems = (const char *const[]){
+		"exfat", "vfat", "fuse", "ntfs", "tmpfs", NULL
+	},
+	.forks_child = 1,
+	.needs_root = 1,
+	.needs_checkpoints = 1,
+	.needs_cgroup_ver = TST_CGROUP_V2,
+	.needs_cgroup_controllers = (const char *const[]){ "memory", NULL },
+	.test_cgroup = &cg_test,
+};