diff mbox series

Hugetlb: Test to detect bug with freeing gigantic hugetlb pages

Message ID 20230413090753.883953-1-tsahu@linux.ibm.com
State Changes Requested
Headers show
Series Hugetlb: Test to detect bug with freeing gigantic hugetlb pages | expand

Commit Message

Tarun Sahu April 13, 2023, 9:07 a.m. UTC
Before kernel version 5.10-rc7, there was a bug that resulted in a
"Bad Page State" error when freeing gigantic hugepages. This happened
because the struct page entry compound_nr, which overlapped with
page->mapping in the first tail page, was not cleared, causing the
error. To ensure that this issue does not reoccur as struct page keeps
changes and some fields are managed by folio, this test checks that
freeing gigantic hugepages does not produce the above-mentioned error.

Signed-off-by: Tarun Sahu <tsahu@linux.ibm.com>
---
 .../kernel/mem/hugetlb/hugemmap/hugemmap32.c  | 82 +++++++++++++++++++
 testcases/kernel/mem/hugetlb/lib/hugetlb.h    |  6 ++
 2 files changed, 88 insertions(+)
 create mode 100644 testcases/kernel/mem/hugetlb/hugemmap/hugemmap32.c

Comments

Cyril Hrubis April 13, 2023, 9:22 a.m. UTC | #1
Hi!
> +/*\
> + * [Description]
> + *
> + * Before kernel version 5.10-rc7, there was a bug that resulted in a "Bad Page
> + * State" error when freeing gigantic hugepages. This happened because the
> + * struct page entry compound_nr, which overlapped with page->mapping in the
> + * first tail page, was not cleared, causing the error. To ensure that this
> + * issue does not reoccur as struct page keeps changing and some fields are
> + * managed by folio, this test checks that freeing gigantic hugepages does not
> + * produce the above-mentioned error.
> + */
> +
> +#define _GNU_SOURCE
> +#include <dirent.h>
> +
> +#include <stdio.h>
> +
> +#include "hugetlb.h"
> +
> +#define PATH_GIGANTIC_HUGEPAGE "/sys/kernel/mm/hugepages"
> +#define GIGANTIC_MIN_ORDER 10
> +
> +static int org_g_hpages;
> +static char g_hpage_path[4096];
> +
> +static void run_test(void)
> +{
> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", 1);

I suppose this may still fail if there is not enough memory or the
memory is fragmented, right? I suppose that SAFE_FILE_PRINTF() will
cause TBROK here, right?

Maybe we should just use FILE_PRINTF() and ignore the errors.

> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", 0);


> +	if (tst_taint_check())
> +		tst_res(TFAIL, "Freeing Gigantic pages resulted in Bad Page State bug.");
> +	else
> +		tst_res(TPASS, "Successfully freed the gigantic hugepages");
> +}
> +
> +static void setup(void)
> +{
> +	DIR *dir;
> +	struct dirent *ent;
> +	char *hpage_size_str;
> +	unsigned long hpage_size;
> +
> +	dir = SAFE_OPENDIR(PATH_GIGANTIC_HUGEPAGE);
> +	while ((ent = SAFE_READDIR(dir)) != NULL) {
                                             ^
					     The != NULL is redundant

> +		if (strstr(ent->d_name, "hugepages-") != NULL) {
> +			hpage_size_str = ent->d_name + strlen("hugepages-");
> +			hpage_size = atoi(hpage_size_str) * 1024;

Can we just do:

	if ((sscanf(ent->d_name, "hugepages-%lu", &hpage_size) == 1) &&
	    is_hugetlb_gigantic(hpage_size * 1024)) {
		sprintf(...);
	}

> +			if (is_hugetlb_gigantic(hpage_size)) {
> +				sprintf(g_hpage_path, "%s/%s/%s",
> +						PATH_GIGANTIC_HUGEPAGE, ent->d_name, "nr_hugepages");
> +				break;
> +			}
> +		}
> +	}
> +	SAFE_CLOSEDIR(dir);

We should handle the case that there were no large hugepages found on
the system. On one of my machines:

# ls /sys/kernel/mm/hugepages/
hugepages-2048kB
#

I suppose:

	if (!g_hpage_path[0])
		tst_brk(TCONF, "Gigantic hugepages not supported");


> +	SAFE_FILE_LINES_SCANF(g_hpage_path, "%d", &org_g_hpages);
> +}
> +
> +static void cleanup(void)
> +{
> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", org_g_hpages);
> +}
> +
> +static struct tst_test test = {
> +	.tags = (struct tst_tag[]) {
> +	    {"linux-git", "ba9c1201beaa"},
> +	    {"linux-git", "7118fc2906e9"},
> +	    {}
> +	},
> +	.needs_root = 1,
> +	.setup = setup,
> +	.cleanup = cleanup,
> +	.test_all = run_test,
> +	.taint_check = TST_TAINT_B,
> +};
> diff --git a/testcases/kernel/mem/hugetlb/lib/hugetlb.h b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
> index 241dab708..34fe08c24 100644
> --- a/testcases/kernel/mem/hugetlb/lib/hugetlb.h
> +++ b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
> @@ -39,6 +39,12 @@
>  # endif
>  #endif
>  
> +/* Check if hugetlb page is gigantic */
> +static inline int is_hugetlb_gigantic(unsigned long hpage_size)
> +{
> +	return (hpage_size / getpagesize()) >> 11;
> +}
> +
>  /*
>   * to get the lower nine permission bits
>   * from shmid_ds.ipc_perm.mode
> -- 
> 2.31.1
>
Tarun Sahu April 13, 2023, 10:49 a.m. UTC | #2
Hi Cyril,
Thanks for reviewing. I posted my comments inline.
Will soon send v2.

Cyril Hrubis <chrubis@suse.cz> writes:

> Hi!
>> +/*\
>> + * [Description]
>> + *
>> + * Before kernel version 5.10-rc7, there was a bug that resulted in a "Bad Page
>> + * State" error when freeing gigantic hugepages. This happened because the
>> + * struct page entry compound_nr, which overlapped with page->mapping in the
>> + * first tail page, was not cleared, causing the error. To ensure that this
>> + * issue does not reoccur as struct page keeps changing and some fields are
>> + * managed by folio, this test checks that freeing gigantic hugepages does not
>> + * produce the above-mentioned error.
>> + */
>> +
>> +#define _GNU_SOURCE
>> +#include <dirent.h>
>> +
>> +#include <stdio.h>
>> +
>> +#include "hugetlb.h"
>> +
>> +#define PATH_GIGANTIC_HUGEPAGE "/sys/kernel/mm/hugepages"
>> +#define GIGANTIC_MIN_ORDER 10
>> +
>> +static int org_g_hpages;
>> +static char g_hpage_path[4096];
>> +
>> +static void run_test(void)
>> +{
>> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", 1);
>
> I suppose this may still fail if there is not enough memory or the
> memory is fragmented, right? I suppose that SAFE_FILE_PRINTF() will
> cause TBROK here, right?
>
> Maybe we should just use FILE_PRINTF() and ignore the errors.

 I understand that it doesn't sound good that test is broken, when system
 is not in state to allocate the gigantic pages, I will break it with
 TCONF.

I agree to below comments. I will update them in v2.


~ Tarun
>
>> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", 0);
>
>
>> +	if (tst_taint_check())
>> +		tst_res(TFAIL, "Freeing Gigantic pages resulted in Bad Page State bug.");
>> +	else
>> +		tst_res(TPASS, "Successfully freed the gigantic hugepages");
>> +}
>> +
>> +static void setup(void)
>> +{
>> +	DIR *dir;
>> +	struct dirent *ent;
>> +	char *hpage_size_str;
>> +	unsigned long hpage_size;
>> +
>> +	dir = SAFE_OPENDIR(PATH_GIGANTIC_HUGEPAGE);
>> +	while ((ent = SAFE_READDIR(dir)) != NULL) {
>                                              ^
> 					     The != NULL is redundant
>
>> +		if (strstr(ent->d_name, "hugepages-") != NULL) {
>> +			hpage_size_str = ent->d_name + strlen("hugepages-");
>> +			hpage_size = atoi(hpage_size_str) * 1024;
>
> Can we just do:
>
> 	if ((sscanf(ent->d_name, "hugepages-%lu", &hpage_size) == 1) &&
> 	    is_hugetlb_gigantic(hpage_size * 1024)) {
> 		sprintf(...);
> 	}
>
>> +			if (is_hugetlb_gigantic(hpage_size)) {
>> +				sprintf(g_hpage_path, "%s/%s/%s",
>> +						PATH_GIGANTIC_HUGEPAGE, ent->d_name, "nr_hugepages");
>> +				break;
>> +			}
>> +		}
>> +	}
>> +	SAFE_CLOSEDIR(dir);
>
> We should handle the case that there were no large hugepages found on
> the system. On one of my machines:
>
> # ls /sys/kernel/mm/hugepages/
> hugepages-2048kB
> #
>
> I suppose:
>
> 	if (!g_hpage_path[0])
> 		tst_brk(TCONF, "Gigantic hugepages not supported");
>
>
>> +	SAFE_FILE_LINES_SCANF(g_hpage_path, "%d", &org_g_hpages);
>> +}
>> +
>> +static void cleanup(void)
>> +{
>> +	SAFE_FILE_PRINTF(g_hpage_path, "%d", org_g_hpages);
>> +}
>> +
>> +static struct tst_test test = {
>> +	.tags = (struct tst_tag[]) {
>> +	    {"linux-git", "ba9c1201beaa"},
>> +	    {"linux-git", "7118fc2906e9"},
>> +	    {}
>> +	},
>> +	.needs_root = 1,
>> +	.setup = setup,
>> +	.cleanup = cleanup,
>> +	.test_all = run_test,
>> +	.taint_check = TST_TAINT_B,
>> +};
>> diff --git a/testcases/kernel/mem/hugetlb/lib/hugetlb.h b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
>> index 241dab708..34fe08c24 100644
>> --- a/testcases/kernel/mem/hugetlb/lib/hugetlb.h
>> +++ b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
>> @@ -39,6 +39,12 @@
>>  # endif
>>  #endif
>>  
>> +/* Check if hugetlb page is gigantic */
>> +static inline int is_hugetlb_gigantic(unsigned long hpage_size)
>> +{
>> +	return (hpage_size / getpagesize()) >> 11;
>> +}
>> +
>>  /*
>>   * to get the lower nine permission bits
>>   * from shmid_ds.ipc_perm.mode
>> -- 
>> 2.31.1
>> 
>
> -- 
> Cyril Hrubis
> chrubis@suse.cz
>
> -- 
> Mailing list info: https://lists.linux.it/listinfo/ltp
diff mbox series

Patch

diff --git a/testcases/kernel/mem/hugetlb/hugemmap/hugemmap32.c b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap32.c
new file mode 100644
index 000000000..3dbb42b1c
--- /dev/null
+++ b/testcases/kernel/mem/hugetlb/hugemmap/hugemmap32.c
@@ -0,0 +1,82 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023, IBM Corporation.
+ * Author: Tarun Sahu
+ */
+
+/*\
+ * [Description]
+ *
+ * Before kernel version 5.10-rc7, there was a bug that resulted in a "Bad Page
+ * State" error when freeing gigantic hugepages. This happened because the
+ * struct page entry compound_nr, which overlapped with page->mapping in the
+ * first tail page, was not cleared, causing the error. To ensure that this
+ * issue does not reoccur as struct page keeps changing and some fields are
+ * managed by folio, this test checks that freeing gigantic hugepages does not
+ * produce the above-mentioned error.
+ */
+
+#define _GNU_SOURCE
+#include <dirent.h>
+
+#include <stdio.h>
+
+#include "hugetlb.h"
+
+#define PATH_GIGANTIC_HUGEPAGE "/sys/kernel/mm/hugepages"
+#define GIGANTIC_MIN_ORDER 10
+
+static int org_g_hpages;
+static char g_hpage_path[4096];
+
+static void run_test(void)
+{
+	SAFE_FILE_PRINTF(g_hpage_path, "%d", 1);
+	SAFE_FILE_PRINTF(g_hpage_path, "%d", 0);
+
+	if (tst_taint_check())
+		tst_res(TFAIL, "Freeing Gigantic pages resulted in Bad Page State bug.");
+	else
+		tst_res(TPASS, "Successfully freed the gigantic hugepages");
+}
+
+static void setup(void)
+{
+	DIR *dir;
+	struct dirent *ent;
+	char *hpage_size_str;
+	unsigned long hpage_size;
+
+	dir = SAFE_OPENDIR(PATH_GIGANTIC_HUGEPAGE);
+	while ((ent = SAFE_READDIR(dir)) != NULL) {
+		if (strstr(ent->d_name, "hugepages-") != NULL) {
+			hpage_size_str = ent->d_name + strlen("hugepages-");
+			hpage_size = atoi(hpage_size_str) * 1024;
+			if (is_hugetlb_gigantic(hpage_size)) {
+				sprintf(g_hpage_path, "%s/%s/%s",
+						PATH_GIGANTIC_HUGEPAGE, ent->d_name, "nr_hugepages");
+				break;
+			}
+		}
+	}
+	SAFE_CLOSEDIR(dir);
+	SAFE_FILE_LINES_SCANF(g_hpage_path, "%d", &org_g_hpages);
+}
+
+static void cleanup(void)
+{
+	SAFE_FILE_PRINTF(g_hpage_path, "%d", org_g_hpages);
+}
+
+static struct tst_test test = {
+	.tags = (struct tst_tag[]) {
+	    {"linux-git", "ba9c1201beaa"},
+	    {"linux-git", "7118fc2906e9"},
+	    {}
+	},
+	.needs_root = 1,
+	.setup = setup,
+	.cleanup = cleanup,
+	.test_all = run_test,
+	.taint_check = TST_TAINT_B,
+};
diff --git a/testcases/kernel/mem/hugetlb/lib/hugetlb.h b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
index 241dab708..34fe08c24 100644
--- a/testcases/kernel/mem/hugetlb/lib/hugetlb.h
+++ b/testcases/kernel/mem/hugetlb/lib/hugetlb.h
@@ -39,6 +39,12 @@ 
 # endif
 #endif
 
+/* Check if hugetlb page is gigantic */
+static inline int is_hugetlb_gigantic(unsigned long hpage_size)
+{
+	return (hpage_size / getpagesize()) >> 11;
+}
+
 /*
  * to get the lower nine permission bits
  * from shmid_ds.ipc_perm.mode