diff mbox series

[v2] io:nftw/ftw:fix stack overflow when large nopenfd [BZ #26353]

Message ID 20200815070851.46403-1-nixiaoming@huawei.com
State New
Headers show
Series [v2] io:nftw/ftw:fix stack overflow when large nopenfd [BZ #26353] | expand

Commit Message

Xiaoming Ni Aug. 15, 2020, 7:08 a.m. UTC
In ftw_startup(), call alloca to apply for a large amount of stack space.
When descriptors is very large, stack overflow is triggered. BZ #26353

To fix the problem:
1. Set the upper limit of descriptors to getdtablesize().
2. Replace alloca() in ftw_startup() with malloc().

v2:
  not set errno after malloc fails.
  add check ftw return value on testcase
  add more testcase
v1: https://public-inbox.org/libc-alpha/20200808084640.49174-1-nixiaoming@huawei.com/
---
 io/Makefile      |  3 ++-
 io/ftw.c         | 15 +++++++++++++--
 io/tst-bz26353.c | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 io/tst-bz26353.c

Comments

Xiaoming Ni Aug. 22, 2020, 2:23 a.m. UTC | #1
ping

On 2020/8/15 15:08, Xiaoming Ni wrote:
> In ftw_startup(), call alloca to apply for a large amount of stack space.
> When descriptors is very large, stack overflow is triggered. BZ #26353
> 
> To fix the problem:
> 1. Set the upper limit of descriptors to getdtablesize().
> 2. Replace alloca() in ftw_startup() with malloc().
> 
> v2:
>    not set errno after malloc fails.
>    add check ftw return value on testcase
>    add more testcase
> v1: https://public-inbox.org/libc-alpha/20200808084640.49174-1-nixiaoming@huawei.com/
> ---
>   io/Makefile      |  3 ++-
>   io/ftw.c         | 15 +++++++++++++--
>   io/tst-bz26353.c | 34 ++++++++++++++++++++++++++++++++++
>   3 files changed, 49 insertions(+), 3 deletions(-)
>   create mode 100644 io/tst-bz26353.c
> 
> diff --git a/io/Makefile b/io/Makefile
> index cf380f3516..0f674c317f 100644
> --- a/io/Makefile
> +++ b/io/Makefile
> @@ -74,7 +74,8 @@ tests		:= test-utime test-stat test-stat2 test-lfs tst-getcwd \
>   		   tst-posix_fallocate tst-posix_fallocate64 \
>   		   tst-fts tst-fts-lfs tst-open-tmpfile \
>   		   tst-copy_file_range tst-getcwd-abspath tst-lockf \
> -		   tst-ftw-lnk tst-file_change_detection tst-lchmod
> +		   tst-ftw-lnk tst-file_change_detection tst-lchmod \
> +		   tst-bz26353
>   
>   # Likewise for statx, but we do not need static linking here.
>   tests-internal += tst-statx
> diff --git a/io/ftw.c b/io/ftw.c
> index 8c79d29a9e..f8771a257b 100644
> --- a/io/ftw.c
> +++ b/io/ftw.c
> @@ -643,18 +643,28 @@ ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
>         __set_errno (ENOENT);
>         return -1;
>       }
> +  if (descriptors > getdtablesize())
> +    {
> +      __set_errno (EINVAL);
> +      return -1;
> +    }
>   
>     data.maxdir = descriptors < 1 ? 1 : descriptors;
>     data.actdir = 0;
> -  data.dirstreams = (struct dir_data **) alloca (data.maxdir
> +  data.dirstreams = (struct dir_data **) malloc (data.maxdir
>   						 * sizeof (struct dir_data *));
> +  if (data.dirstreams == NULL)
> +    return -1;
>     memset (data.dirstreams, '\0', data.maxdir * sizeof (struct dir_data *));
>   
>     /* PATH_MAX is always defined when we get here.  */
>     data.dirbufsize = MAX (2 * strlen (dir), PATH_MAX);
>     data.dirbuf = (char *) malloc (data.dirbufsize);
>     if (data.dirbuf == NULL)
> -    return -1;
> +    {
> +      free (data.dirstreams);
> +      return -1;
> +    }
>     cp = __stpcpy (data.dirbuf, dir);
>     /* Strip trailing slashes.  */
>     while (cp > data.dirbuf + 1 && cp[-1] == '/')
> @@ -805,6 +815,7 @@ ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
>     __tdestroy (data.known_objects, free);
>     free (data.dirbuf);
>     __set_errno (save_err);
> +  free (data.dirstreams);
>   
>     return result;
>   }
> diff --git a/io/tst-bz26353.c b/io/tst-bz26353.c
> new file mode 100644
> index 0000000000..53e71bfe5e
> --- /dev/null
> +++ b/io/tst-bz26353.c
> @@ -0,0 +1,34 @@
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <ftw.h>
> +#include <sys/stat.h>
> +#include <errno.h>
> +#include <limits.h>
> +
> +int my_func(const char *file, const struct stat *sb ,int flag)
> +{
> +  printf ("%s\n", file);
> +  return 0;
> +}
> +
> +/*Check whether stack overflow occurs*/
> +int do_test(int large_nopenfd)
> +{
> +  int ret = ftw("./tst-bz26353", my_func, large_nopenfd);
> +  printf ("test big num %d, ret=%d errno=%d\n", large_nopenfd, ret, errno);
> +  if (ret != -1 || errno != EINVAL)
> +    return 1;
> +  return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +  int ret = 0;
> +  mkdir ("./tst-bz26353", 0755);
> +  ret += do_test(getdtablesize() + 1);
> +  ret += do_test(8192 * 1024);
> +  ret += do_test(INT_MAX);
> +  rmdir ("./tst-bz26353");
> +  return ret;
> +}
>
Paul Eggert Aug. 22, 2020, 2:51 a.m. UTC | #2
On 8/21/20 7:23 PM, Xiaoming Ni wrote:
> -  data.dirstreams = (struct dir_data **) alloca (data.maxdir
> +  data.dirstreams = (struct dir_data **) malloc (data.maxdir
>                             * sizeof (struct dir_data *));

Surely this should call alloca unless data.maxdir is too large, as malloc puts 
pressure on the heap. That's what's done elsewhere in glibc, anyway.
Xiaoming Ni Aug. 22, 2020, 3:27 a.m. UTC | #3
On 2020/8/22 10:51, Paul Eggert wrote:
> On 8/21/20 7:23 PM, Xiaoming Ni wrote:
>> -  data.dirstreams = (struct dir_data **) alloca (data.maxdir
>> +  data.dirstreams = (struct dir_data **) malloc (data.maxdir
>>                             * sizeof (struct dir_data *));
> 
> Surely this should call alloca unless data.maxdir is too large, as 
> malloc puts pressure on the heap. That's what's done elsewhere in glibc, 
> anyway.

The user may run the setrlimit() or unlimit -s command to modify the 
stack space limit.
When the user runs the ftw() command, the remaining stack space cannot 
be determined.
How do I determine whether data.maxdir is too large for alloca?

malloc puts pressure on the heap, cause ENOMEM
but alloca puts  pressure on the stack, cause stack overflow, Worse.

Thanks
Xiaoming Ni
Paul Eggert Aug. 22, 2020, 6:09 p.m. UTC | #4
On 8/21/20 8:27 PM, Xiaoming Ni wrote:
> How do I determine whether data.maxdir is too large for alloca?

__libc_use_alloca. Also see include/scratch_buffer.h, which is designed for this 
sort of situation.
Xiaoming Ni Aug. 24, 2020, 8:31 a.m. UTC | #5
On 2020/8/23 2:09, Paul Eggert wrote:
> On 8/21/20 8:27 PM, Xiaoming Ni wrote:
>> How do I determine whether data.maxdir is too large for alloca?
> 
> __libc_use_alloca. Also see include/scratch_buffer.h, which is designed 
> for this sort of situation.
> 
> .

is that ?

--- a/io/ftw.c
+++ b/io/ftw.c
@@ -645,6 +645,13 @@ ftw_startup (const char *dir, int is_nftw, void 
*func, int descriptors,
      }

    data.maxdir = descriptors < 1 ? 1 : descriptors;
+  if ((__glibc_unlikely (data.maxdir > SIZE_MAX / sizeof (struct 
dir_data *)))
+      || (! __libc_use_alloca (data.maxdir * sizeof (struct dir_data *))))
+    {
+      __set_errno (EINVAL);
+      return -1;
+    }
+
    data.actdir = 0;
    data.dirstreams = (struct dir_data **) alloca (data.maxdir
                                                  * sizeof (struct 
dir_data *));


Whether to use malloc or return an error message when the input is too 
large?

I still don't understand why libc uses alloca so much.
Is it for performance?

Thanks
Xiaoming Ni
Adhemerval Zanella Aug. 24, 2020, 2:32 p.m. UTC | #6
On 24/08/2020 05:31, Xiaoming Ni wrote:
> On 2020/8/23 2:09, Paul Eggert wrote:
>> On 8/21/20 8:27 PM, Xiaoming Ni wrote:
>>> How do I determine whether data.maxdir is too large for alloca?
>>
>> __libc_use_alloca. Also see include/scratch_buffer.h, which is designed for this sort of situation.
>>
>> .
> 
> is that ?
> 
> --- a/io/ftw.c
> +++ b/io/ftw.c
> @@ -645,6 +645,13 @@ ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
>      }
> 
>    data.maxdir = descriptors < 1 ? 1 : descriptors;
> +  if ((__glibc_unlikely (data.maxdir > SIZE_MAX / sizeof (struct dir_data *)))
> +      || (! __libc_use_alloca (data.maxdir * sizeof (struct dir_data *))))
> +    {
> +      __set_errno (EINVAL);
> +      return -1;
> +    }
> +
>    data.actdir = 0;
>    data.dirstreams = (struct dir_data **) alloca (data.maxdir
>                                                  * sizeof (struct dir_data *));
> 

I would prefer to just remove alloca altogether and either move to dynamic 
allocation or use hybrid approach and use a scratch_buffer or a dynarray.  
In this specific case, it already does a unconditional dynamic memory 
allocation for the 'data.dirbuf', so why not just allocate the 'dirstream' 
buffer on the same block as:

  data.maxdir = descriptors < 1 ? 1 : descriptors;
  data.actdir = 0;
  data.dirbufsize = MAX (2 * strlen (dir), PATH_MAX);

  data.dirstreams = malloc (data.maxdir * sizeof (struct dir_data *)
			    + data.dirbufsize);
  if (data.dirstreams == NULL)
    return -1;

  memset (data.dirstreams, '\0', data.maxdir * sizeof (struct dir_data *));

  data.dirbuf = (char *) data.dirstreams
		+ data.maxdir * sizeof (struct dir_data *);

It would require adjust the size of 'process_entry' for the realloc (to take
in consideration the 'dirstreams' size).

> 
> Whether to use malloc or return an error message when the input is too large?
> 
> I still don't understand why libc uses alloca so much.
> Is it for performance?

Most of the code that uses alloca is pre-C99 that provides VLA or are code where
there is no prior way to know how much stack would be allocated (that's why the
__libc_use_alloca which tries to put a high bound in stack usage).  However, 
dynamic stack allocation either with alloca or VLA tend to produce bad code gen
(as the Linux kernel developers has observed) and increases the possibility of
multiple issues (unbounded stack allocation, performance issues, etc.). There
were multiple issues with alloca usage over the years, so recently we were 
moving the code to either scratch_buffer or dynarray.
diff mbox series

Patch

diff --git a/io/Makefile b/io/Makefile
index cf380f3516..0f674c317f 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -74,7 +74,8 @@  tests		:= test-utime test-stat test-stat2 test-lfs tst-getcwd \
 		   tst-posix_fallocate tst-posix_fallocate64 \
 		   tst-fts tst-fts-lfs tst-open-tmpfile \
 		   tst-copy_file_range tst-getcwd-abspath tst-lockf \
-		   tst-ftw-lnk tst-file_change_detection tst-lchmod
+		   tst-ftw-lnk tst-file_change_detection tst-lchmod \
+		   tst-bz26353
 
 # Likewise for statx, but we do not need static linking here.
 tests-internal += tst-statx
diff --git a/io/ftw.c b/io/ftw.c
index 8c79d29a9e..f8771a257b 100644
--- a/io/ftw.c
+++ b/io/ftw.c
@@ -643,18 +643,28 @@  ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
       __set_errno (ENOENT);
       return -1;
     }
+  if (descriptors > getdtablesize())
+    {
+      __set_errno (EINVAL);
+      return -1;
+    }
 
   data.maxdir = descriptors < 1 ? 1 : descriptors;
   data.actdir = 0;
-  data.dirstreams = (struct dir_data **) alloca (data.maxdir
+  data.dirstreams = (struct dir_data **) malloc (data.maxdir
 						 * sizeof (struct dir_data *));
+  if (data.dirstreams == NULL)
+    return -1;
   memset (data.dirstreams, '\0', data.maxdir * sizeof (struct dir_data *));
 
   /* PATH_MAX is always defined when we get here.  */
   data.dirbufsize = MAX (2 * strlen (dir), PATH_MAX);
   data.dirbuf = (char *) malloc (data.dirbufsize);
   if (data.dirbuf == NULL)
-    return -1;
+    {
+      free (data.dirstreams);
+      return -1;
+    }
   cp = __stpcpy (data.dirbuf, dir);
   /* Strip trailing slashes.  */
   while (cp > data.dirbuf + 1 && cp[-1] == '/')
@@ -805,6 +815,7 @@  ftw_startup (const char *dir, int is_nftw, void *func, int descriptors,
   __tdestroy (data.known_objects, free);
   free (data.dirbuf);
   __set_errno (save_err);
+  free (data.dirstreams);
 
   return result;
 }
diff --git a/io/tst-bz26353.c b/io/tst-bz26353.c
new file mode 100644
index 0000000000..53e71bfe5e
--- /dev/null
+++ b/io/tst-bz26353.c
@@ -0,0 +1,34 @@ 
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <limits.h>
+
+int my_func(const char *file, const struct stat *sb ,int flag)
+{
+  printf ("%s\n", file);
+  return 0;
+}
+
+/*Check whether stack overflow occurs*/
+int do_test(int large_nopenfd)
+{
+  int ret = ftw("./tst-bz26353", my_func, large_nopenfd);
+  printf ("test big num %d, ret=%d errno=%d\n", large_nopenfd, ret, errno);
+  if (ret != -1 || errno != EINVAL)
+    return 1;
+  return 0;
+}
+
+int main(int argc, char *argv[])
+{
+  int ret = 0;
+  mkdir ("./tst-bz26353", 0755);
+  ret += do_test(getdtablesize() + 1);
+  ret += do_test(8192 * 1024);
+  ret += do_test(INT_MAX);
+  rmdir ("./tst-bz26353");
+  return ret;
+}