Message ID | 20200115013151.905-1-yangx.jy@cn.fujitsu.com |
---|---|
State | New |
Headers | show |
Series | [v2] sysdeps/posix/posix_fallocate*: Make emulated posix_fallocate() work properly | expand |
On 14/01/2020 22:31, Xiao Yang wrote: > Emulated posix_fallocate() only writes data in one block if block > size is 4096, offset is 4095 and len is 2. The emulated code should > write data in two blocks in the case because it actually crosses two > blocks. Thanks for catching it, comments below. > > Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com> We do not use signed-off, but Copyright assignments. I am not sure this particular fix can be considered trivial. > --- > sysdeps/posix/posix_fallocate.c | 17 +++++++++++++++++ > sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++ > 2 files changed, 34 insertions(+) > > diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c > index e7fccfc1c8..22e5fea091 100644 > --- a/sysdeps/posix/posix_fallocate.c > +++ b/sysdeps/posix/posix_fallocate.c > @@ -93,6 +93,23 @@ posix_fallocate (int fd, __off_t offset, __off_t len) > increment = 4096; > } > > + if (offset % increment + len % increment > increment) > + { > + if (offset < st.st_size) > + { > + unsigned char b; > + ssize_t rdsize = __pread (fd, &b, 1, offset); > + if (rdsize < 0) > + return errno; > + if (rdsize == 1 && b != 0) > + goto next; > + } > + > + if (__pwrite (fd, "", 1, offset) != 1) > + return errno; > + } > + > +next: The patch looks ok, although I would prefer if we factor the logic where or not to write the byte on its own function. Something like: diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c index e7fccfc1c8..855439bfcb 100644 --- a/sysdeps/posix/posix_fallocate.c +++ b/sysdeps/posix/posix_fallocate.c @@ -23,6 +23,26 @@ #include <sys/stat.h> #include <sys/statfs.h> +static inline int +write_byte (int fd, off_t offset, off_t st_size) +{ + if (offset < st_size) + { + unsigned char b; + ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset); + if (rdsize < 0) + return errno; + /* If there is a non-zero byte, the block must have been + allocated already. */ + if (rdsize == 1 && b != 0) + return 0; + } + + if (__libc_pwrite64 (fd, "", 1, offset) != 1) + return errno; + return 0; +} + /* Reserve storage for the data of the file associated with FD. This emulation is far from perfect, but the kernel cannot do not much better for network file systems, either. */ @@ -31,6 +51,7 @@ int posix_fallocate (int fd, __off_t offset, __off_t len) { struct stat64 st; + int r; if (offset < 0 || len < 0) return EINVAL; @@ -97,25 +118,21 @@ posix_fallocate (int fd, __off_t offset, __off_t len) lack a better option. Compare-and-swap against a file mapping might additional local races, but requires interposition of a signal handler to catch SIGBUS. */ + + if (offset % increment + len % increment > increment) + { + r = write_byte (fd, offset, st.st_size); + if (r != 0) + return r; + } + for (offset += (len - 1) % increment; len > 0; offset += increment) { len -= increment; - if (offset < st.st_size) - { - unsigned char c; - ssize_t rsize = __pread (fd, &c, 1, offset); - - if (rsize < 0) - return errno; - /* If there is a non-zero byte, the block must have been - allocated already. */ - else if (rsize == 1 && c != 0) - continue; - } - - if (__pwrite (fd, "", 1, offset) != 1) - return errno; + r = write_byte (fd, offset, st.st_size); + if (r != 0) + return r; } return 0; > /* Write a null byte to every block. This is racy; we currently > lack a better option. Compare-and-swap against a file mapping > might additional local races, but requires interposition of a > diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c > index f9d4fe5ca3..1c46b186b6 100644 > --- a/sysdeps/posix/posix_fallocate64.c > +++ b/sysdeps/posix/posix_fallocate64.c > @@ -93,6 +93,23 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) > increment = 4096; > } > > + if (offset % increment + len % increment > increment) > + { > + if (offset < st.st_size) > + { > + unsigned char b; > + ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset); > + if (rdsize < 0) > + return errno; > + if (rdsize == 1 && b != 0) > + goto next; > + } > + > + if (__libc_pwrite64 (fd, "", 1, offset) != 1) > + return errno; > + } > + > +next: > /* Write a null byte to every block. This is racy; we currently > lack a better option. Compare-and-swap against a file mapping > might address local races, but requires interposition of a signal >
On 1/15/20 7:01 AM, Xiao Yang wrote: > Emulated posix_fallocate() only writes data in one block if block > size is 4096, offset is 4095 and len is 2. The emulated code should > write data in two blocks in the case because it actually crosses two > blocks. > > Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com> Could you please clarify your copyright assignment status? I suppose you ought to be covered by any agreement Fujitsu may have with the FSF but I'm not a steward and hence do not have a way to check. I'll review anyway given that there are patches from @cn.fujitsu.com in the repo. > --- > sysdeps/posix/posix_fallocate.c | 17 +++++++++++++++++ > sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++ > 2 files changed, 34 insertions(+) Please file a bug report describing the issue and also add a test case to protect from regressions. > diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c > index e7fccfc1c8..22e5fea091 100644 > --- a/sysdeps/posix/posix_fallocate.c > +++ b/sysdeps/posix/posix_fallocate.c > @@ -93,6 +93,23 @@ posix_fallocate (int fd, __off_t offset, __off_t len) > increment = 4096; > } > > + if (offset % increment + len % increment > increment) > + { > + if (offset < st.st_size) > + { > + unsigned char b; > + ssize_t rdsize = __pread (fd, &b, 1, offset); > + if (rdsize < 0) > + return errno; > + if (rdsize == 1 && b != 0) > + goto next; > + } > + > + if (__pwrite (fd, "", 1, offset) != 1) > + return errno; > + } A better fix may be to fix the loop conditions to fold this corner case in. Thanks, Siddhesh
On 12/20/20 11:13 PM, Siddhesh Poyarekar wrote: > On 1/15/20 7:01 AM, Xiao Yang wrote: >> Emulated posix_fallocate() only writes data in one block if block >> size is 4096, offset is 4095 and len is 2. The emulated code >> should write data in two blocks in the case because it actually >> crosses two blocks. >> >> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com> > > Could you please clarify your copyright assignment status? I suppose > you ought to be covered by any agreement Fujitsu may have with the > FSF but I'm not a steward and hence do not have a way to check. I'll > review anyway given that there are patches from @cn.fujitsu.com in > the repo. There is no assignment on file with the FSF for Fujitsu regarding glibc.
On 12/21/20 9:53 AM, Carlos O'Donell wrote: > On 12/20/20 11:13 PM, Siddhesh Poyarekar wrote: >> On 1/15/20 7:01 AM, Xiao Yang wrote: >>> Emulated posix_fallocate() only writes data in one block if block >>> size is 4096, offset is 4095 and len is 2. The emulated code >>> should write data in two blocks in the case because it actually >>> crosses two blocks. >>> >>> Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com> >> >> Could you please clarify your copyright assignment status? I suppose >> you ought to be covered by any agreement Fujitsu may have with the >> FSF but I'm not a steward and hence do not have a way to check. I'll >> review anyway given that there are patches from @cn.fujitsu.com in >> the repo. > > There is no assignment on file with the FSF for Fujitsu regarding glibc. > Thanks for confirming Carlos, I've marked the patch as such in patchwork. Siddhesh
diff --git a/sysdeps/posix/posix_fallocate.c b/sysdeps/posix/posix_fallocate.c index e7fccfc1c8..22e5fea091 100644 --- a/sysdeps/posix/posix_fallocate.c +++ b/sysdeps/posix/posix_fallocate.c @@ -93,6 +93,23 @@ posix_fallocate (int fd, __off_t offset, __off_t len) increment = 4096; } + if (offset % increment + len % increment > increment) + { + if (offset < st.st_size) + { + unsigned char b; + ssize_t rdsize = __pread (fd, &b, 1, offset); + if (rdsize < 0) + return errno; + if (rdsize == 1 && b != 0) + goto next; + } + + if (__pwrite (fd, "", 1, offset) != 1) + return errno; + } + +next: /* Write a null byte to every block. This is racy; we currently lack a better option. Compare-and-swap against a file mapping might additional local races, but requires interposition of a diff --git a/sysdeps/posix/posix_fallocate64.c b/sysdeps/posix/posix_fallocate64.c index f9d4fe5ca3..1c46b186b6 100644 --- a/sysdeps/posix/posix_fallocate64.c +++ b/sysdeps/posix/posix_fallocate64.c @@ -93,6 +93,23 @@ __posix_fallocate64_l64 (int fd, __off64_t offset, __off64_t len) increment = 4096; } + if (offset % increment + len % increment > increment) + { + if (offset < st.st_size) + { + unsigned char b; + ssize_t rdsize = __libc_pread64 (fd, &b, 1, offset); + if (rdsize < 0) + return errno; + if (rdsize == 1 && b != 0) + goto next; + } + + if (__libc_pwrite64 (fd, "", 1, offset) != 1) + return errno; + } + +next: /* Write a null byte to every block. This is racy; we currently lack a better option. Compare-and-swap against a file mapping might address local races, but requires interposition of a signal
Emulated posix_fallocate() only writes data in one block if block size is 4096, offset is 4095 and len is 2. The emulated code should write data in two blocks in the case because it actually crosses two blocks. Signed-off-by: Xiao Yang <yangx.jy@cn.fujitsu.com> --- sysdeps/posix/posix_fallocate.c | 17 +++++++++++++++++ sysdeps/posix/posix_fallocate64.c | 17 +++++++++++++++++ 2 files changed, 34 insertions(+)