diff mbox series

[v2] sysdeps/posix/posix_fallocate*: Make emulated posix_fallocate() work properly

Message ID 20200115013151.905-1-yangx.jy@cn.fujitsu.com
State New
Headers show
Series [v2] sysdeps/posix/posix_fallocate*: Make emulated posix_fallocate() work properly | expand

Commit Message

Xiao Yang Jan. 15, 2020, 1:31 a.m. UTC
Emulated posix_fallocate() only writes data in one block if block
size is 4096, offset is 4095 and len is 2.  The emulated code should
write data in two blocks in the case because it actually crosses two
blocks.

Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
---
 sysdeps/posix/posix_fallocate.c   | 17 +++++++++++++++++
 sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++
 2 files changed, 34 insertions(+)

Comments

Adhemerval Zanella Netto Jan. 30, 2020, 4:34 p.m. UTC | #1
On 14/01/2020 22:31, Xiao Yang wrote:
> Emulated posix_fallocate() only writes data in one block if block
> size is 4096, offset is 4095 and len is 2.  The emulated code should
> write data in two blocks in the case because it actually crosses two
> blocks.

Thanks for catching it, comments below.

> 
> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

We do not use signed-off, but Copyright assignments. I am not sure
this particular fix can be considered trivial.

> ---
>  sysdeps/posix/posix_fallocate.c   | 17 +++++++++++++++++
>  sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++
>  2 files changed, 34 insertions(+)
> 
> diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c
> index e7fccfc1c8..22e5fea091 100644
> --- a/sysdeps/posix/posix_fallocate.c
> +++ b/sysdeps/posix/posix_fallocate.c
> @@ -93,6 +93,23 @@ posix_fallocate (int fd, __off_t offset, __off_t len)
>        increment = 4096;
>    }
>  
> +  if (offset % increment + len % increment > increment)
> +    {
> +      if (offset < st.st_size)
> +        {
> +          unsigned char b;
> +          ssize_t rdsize = __pread (fd, &b, 1, offset);
> +          if (rdsize < 0)
> +            return errno;
> +          if (rdsize == 1 && b != 0)
> +            goto next;
> +        }
> +
> +      if (__pwrite (fd, "", 1, offset) != 1)
> +        return errno;
> +    }
> +
> +next:


The patch looks ok, although I would prefer if we factor the logic where
or not to write the byte on its own function. Something like:

diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c
index e7fccfc1c8..855439bfcb 100644
--- a/sysdeps/posix/posix_fallocate.c
+++ b/sysdeps/posix/posix_fallocate.c
@@ -23,6 +23,26 @@
 #include <sys/stat.h>
 #include <sys/statfs.h>
 
+static inline int
+write_byte (int fd, off_t offset, off_t st_size)
+{
+  if (offset < st_size)
+    {
+      unsigned char b;
+      ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset);
+      if (rdsize < 0)
+	return errno;
+      /* If there is a non-zero byte, the block must have been
+	 allocated already.  */
+      if (rdsize == 1 && b != 0)
+	return 0;
+    }
+
+  if (__libc_pwrite64 (fd, "", 1, offset) != 1)
+    return errno;
+  return 0;
+}
+
 /* Reserve storage for the data of the file associated with FD.  This
    emulation is far from perfect, but the kernel cannot do not much
    better for network file systems, either.  */
@@ -31,6 +51,7 @@ int
 posix_fallocate (int fd, __off_t offset, __off_t len)
 {
   struct stat64 st;
+  int r;
 
   if (offset < 0 || len < 0)
     return EINVAL;
@@ -97,25 +118,21 @@ posix_fallocate (int fd, __off_t offset, __off_t len)
      lack a better option.  Compare-and-swap against a file mapping
      might additional local races, but requires interposition of a
      signal handler to catch SIGBUS.  */
+
+  if (offset % increment + len % increment > increment)
+    {
+      r = write_byte (fd, offset, st.st_size);
+      if (r != 0)
+	return r;
+    }
+
   for (offset += (len - 1) % increment; len > 0; offset += increment)
     {
       len -= increment;
 
-      if (offset < st.st_size)
-	{
-	  unsigned char c;
-	  ssize_t rsize = __pread (fd, &c, 1, offset);
-
-	  if (rsize < 0)
-	    return errno;
-	  /* If there is a non-zero byte, the block must have been
-	     allocated already.  */
-	  else if (rsize == 1 && c != 0)
-	    continue;
-	}
-
-      if (__pwrite (fd, "", 1, offset) != 1)
-	return errno;
+      r = write_byte (fd, offset, st.st_size);
+      if (r != 0)
+	return r;
     }
 
   return 0;

>    /* Write a null byte to every block.  This is racy; we currently
>       lack a better option.  Compare-and-swap against a file mapping
>       might additional local races, but requires interposition of a
> diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c
> index f9d4fe5ca3..1c46b186b6 100644
> --- a/sysdeps/posix/posix_fallocate64.c
> +++ b/sysdeps/posix/posix_fallocate64.c
> @@ -93,6 +93,23 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
>        increment = 4096;
>    }
>  
> +  if (offset % increment + len % increment > increment)
> +    {
> +      if (offset < st.st_size)
> +        {
> +          unsigned char b;
> +          ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset);
> +          if (rdsize < 0)
> +            return errno;
> +          if (rdsize == 1 && b != 0)
> +            goto next;
> +        }
> +
> +      if (__libc_pwrite64 (fd, "", 1, offset) != 1)
> +        return errno;
> +    }
> +
> +next:
>    /* Write a null byte to every block.  This is racy; we currently
>       lack a better option.  Compare-and-swap against a file mapping
>       might address local races, but requires interposition of a signal
>
Siddhesh Poyarekar Dec. 21, 2020, 4:13 a.m. UTC | #2
On 1/15/20 7:01 AM, Xiao Yang wrote:
> Emulated posix_fallocate() only writes data in one block if block
> size is 4096, offset is 4095 and len is 2.  The emulated code should
> write data in two blocks in the case because it actually crosses two
> blocks.
> 
> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>

Could you please clarify your copyright assignment status?  I suppose 
you ought to be covered by any agreement Fujitsu may have with the FSF 
but I'm not a steward and hence do not have a way to check.  I'll review 
anyway given that there are patches from @cn.fujitsu.com in the repo.

> ---
>   sysdeps/posix/posix_fallocate.c   | 17 +++++++++++++++++
>   sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++
>   2 files changed, 34 insertions(+)

Please file a bug report describing the issue and also add a test case 
to protect from regressions.

> diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c
> index e7fccfc1c8..22e5fea091 100644
> --- a/sysdeps/posix/posix_fallocate.c
> +++ b/sysdeps/posix/posix_fallocate.c
> @@ -93,6 +93,23 @@ posix_fallocate (int fd, __off_t offset, __off_t len)
>         increment = 4096;
>     }
>   
> +  if (offset % increment + len % increment > increment)
> +    {
> +      if (offset < st.st_size)
> +        {
> +          unsigned char b;
> +          ssize_t rdsize = __pread (fd, &b, 1, offset);
> +          if (rdsize < 0)
> +            return errno;
> +          if (rdsize == 1 && b != 0)
> +            goto next;
> +        }
> +
> +      if (__pwrite (fd, "", 1, offset) != 1)
> +        return errno;
> +    }

A better fix may be to fix the loop conditions to fold this corner case in.

Thanks,
Siddhesh
Carlos O'Donell Dec. 21, 2020, 4:23 a.m. UTC | #3
On 12/20/20 11:13 PM, Siddhesh Poyarekar wrote:
> On 1/15/20 7:01 AM, Xiao Yang wrote:
>> Emulated posix_fallocate() only writes data in one block if block 
>> size is 4096, offset is 4095 and len is 2.  The emulated code
>> should write data in two blocks in the case because it actually
>> crosses two blocks.
>> 
>> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
> 
> Could you please clarify your copyright assignment status?  I suppose
> you ought to be covered by any agreement Fujitsu may have with the
> FSF but I'm not a steward and hence do not have a way to check.  I'll
> review anyway given that there are patches from @cn.fujitsu.com in
> the repo.

There is no assignment on file with the FSF for Fujitsu regarding glibc.
Siddhesh Poyarekar Dec. 21, 2020, 4:24 a.m. UTC | #4
On 12/21/20 9:53 AM, Carlos O'Donell wrote:
> On 12/20/20 11:13 PM, Siddhesh Poyarekar wrote:
>> On 1/15/20 7:01 AM, Xiao Yang wrote:
>>> Emulated posix_fallocate() only writes data in one block if block
>>> size is 4096, offset is 4095 and len is 2.  The emulated code
>>> should write data in two blocks in the case because it actually
>>> crosses two blocks.
>>>
>>> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com>
>>
>> Could you please clarify your copyright assignment status?  I suppose
>> you ought to be covered by any agreement Fujitsu may have with the
>> FSF but I'm not a steward and hence do not have a way to check.  I'll
>> review anyway given that there are patches from @cn.fujitsu.com in
>> the repo.
> 
> There is no assignment on file with the FSF for Fujitsu regarding glibc.
> 

Thanks for confirming Carlos, I've marked the patch as such in patchwork.

Siddhesh
diff mbox series

Patch

diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c
index e7fccfc1c8..22e5fea091 100644
--- a/sysdeps/posix/posix_fallocate.c
+++ b/sysdeps/posix/posix_fallocate.c
@@ -93,6 +93,23 @@  posix_fallocate (int fd, __off_t offset, __off_t len)
       increment = 4096;
   }
 
+  if (offset % increment + len % increment > increment)
+    {
+      if (offset < st.st_size)
+        {
+          unsigned char b;
+          ssize_t rdsize = __pread (fd, &b, 1, offset);
+          if (rdsize < 0)
+            return errno;
+          if (rdsize == 1 && b != 0)
+            goto next;
+        }
+
+      if (__pwrite (fd, "", 1, offset) != 1)
+        return errno;
+    }
+
+next:
   /* Write a null byte to every block.  This is racy; we currently
      lack a better option.  Compare-and-swap against a file mapping
      might additional local races, but requires interposition of a
diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c
index f9d4fe5ca3..1c46b186b6 100644
--- a/sysdeps/posix/posix_fallocate64.c
+++ b/sysdeps/posix/posix_fallocate64.c
@@ -93,6 +93,23 @@  __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len)
       increment = 4096;
   }
 
+  if (offset % increment + len % increment > increment)
+    {
+      if (offset < st.st_size)
+        {
+          unsigned char b;
+          ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset);
+          if (rdsize < 0)
+            return errno;
+          if (rdsize == 1 && b != 0)
+            goto next;
+        }
+
+      if (__libc_pwrite64 (fd, "", 1, offset) != 1)
+        return errno;
+    }
+
+next:
   /* Write a null byte to every block.  This is racy; we currently
      lack a better option.  Compare-and-swap against a file mapping
      might address local races, but requires interposition of a signal