Message ID | 1335983451-30160-1-git-send-email-tytso@mit.edu |
---|---|
State | Accepted, archived |
Headers | show |
On 2012-05-02, at 12:30 PM, Theodore Ts'o wrote: > MacOS 10.5 doesn't have posix_memalign() nor memalign(), but it does > have valloc(). The Android SDK would like to be built on MacOS 10.5, > so I've added support for a good-enough emulation of memalign()'s > functionality using valloc(), with an explicit test to make sure > valloc() is returning a pointer which is sufficiently aligned given > the requested alignment. This won't work if you try to operate on a > file system with a 16k blocksize using an e2fsprogs built on MacOS > 10.5 system, but it is good enough for the common case of 4k > blocksize file systems, and we will let the memory allocation fail in > the alignment is not good enough. Won't this cause e.g. the 16kB/64kB blocksize regression tests to fail on MacOS? I've been assuming that the memalign functionality is only necessary for O_DIRECT, which isn't even working on MacOS, so it is enough to just return an unaligned memory chunk and it will work for normal buffered IO on MacOS. > I've also added a unit test for ext2fs_get_memalign() so we can be > sure it's working as expected. I've tested the code paths with > HAVE_POSIX_MEMALIGN defined, HAVE_POSIX_MEMALIGN undefined, and > HAVE_POSIX_MEMALIGN and HAVE_MEMALIGN undefined on an x86 Linux > system, and so I know the valloc() code path works OK. The simplistic > (and less safe) patch at: > > https://trac.macports.org/attachment/ticket/33692/patch-lib-ext2fs-inline.c.diff > > Shows that using valloc() apparently works OK for MacOS 10.5 (but if > it doesn't the unit test will catch a problem). > > Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> > --- > lib/ext2fs/Makefile.in | 9 +++++++- > lib/ext2fs/inline.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++-- > 2 files changed, 62 insertions(+), 3 deletions(-) > > diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in > index 507a459..f9200fa 100644 > --- a/lib/ext2fs/Makefile.in > +++ b/lib/ext2fs/Makefile.in > @@ -369,6 +369,11 @@ tst_extents: $(srcdir)/extent.c extent_dbg.c $(DEBUG_OBJS) $(DEPLIBSS) \ > $(STATIC_LIBEXT2FS) $(LIBBLKID) $(LIBUUID) $(LIBCOM_ERR) \ > -I $(top_srcdir)/debugfs > > +tst_inline: $(srcdir)/inline.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) > + $(E) " LD $@" > + $(Q) $(CC) -o tst_inline $(srcdir)/inline.c $(ALL_CFLAGS) -DDEBUG \ > + $(STATIC_LIBEXT2FS) $(LIBCOM_ERR) > + > tst_csum: csum.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) \ > $(top_srcdir)/lib/e2p/e2p.h > $(E) " LD $@" > @@ -384,7 +389,8 @@ mkjournal: mkjournal.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) > $(Q) $(CC) -o mkjournal $(srcdir)/mkjournal.c -DDEBUG $(STATIC_LIBEXT2FS) $(LIBCOM_ERR) $(ALL_CFLAGS) > > check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount \ > - tst_super_size tst_types tst_inode_size tst_csum tst_crc32c tst_bitmaps > + tst_super_size tst_types tst_inode_size tst_csum tst_crc32c tst_bitmaps \ > + tst_inline > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_bitops > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_badblocks > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_iscan > @@ -393,6 +399,7 @@ check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount \ > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_super_size > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_inode_size > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_csum > + LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_inline > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_crc32c > LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) \ > ./tst_bitmaps -f $(srcdir)/tst_bitmaps_cmds > tst_bitmaps_out > diff --git a/lib/ext2fs/inline.c b/lib/ext2fs/inline.c > index ad0c368..335db55 100644 > --- a/lib/ext2fs/inline.c > +++ b/lib/ext2fs/inline.c > @@ -45,7 +45,7 @@ errcode_t ext2fs_get_memalign(unsigned long size, > errcode_t retval; > void **p = ptr; > > - if (align == 0) > + if (align < 8) > align = 8; > #ifdef HAVE_POSIX_MEMALIGN > retval = posix_memalign(p, align, size); > @@ -64,9 +64,61 @@ errcode_t ext2fs_get_memalign(unsigned long size, > return EXT2_ET_NO_MEMORY; > } > #else > -#error memalign or posix_memalign must be defined! > +#ifdef HAVE_VALLOC > + if (align > sizeof(long long)) > + *p = valloc(size); > + else > +#endif > + *p = malloc(size); > + if ((unsigned long) *p & (align - 1)) { > + free(*p); > + *p = 0; > + } > + if (*p == 0) > + return EXT2_ET_NO_MEMORY; > #endif > #endif > return 0; > } > > +#ifdef DEBUG > +static int isaligned(void *ptr, unsigned long align) > +{ > + return (((unsigned long) ptr & (align - 1)) == 0); > +} > + > +static errcode_t test_memalign(unsigned long align) > +{ > + void *ptr = 0; > + errcode_t retval; > + > + retval = ext2fs_get_memalign(32, align, &ptr); > + if (!retval && !isaligned(ptr, align)) > + retval = EINVAL; > + free(ptr); > + printf("tst_memliagn(%lu): %s\n", align, > + retval ? error_message(retval) : "OK"); > + return retval; > +} > + > +int main(int argc, char **argv) > +{ > + int err = 0; > + > + if (test_memalign(4)) > + err++; > + if (test_memalign(32)) > + err++; > + if (test_memalign(1024)) > + err++; > + if (test_memalign(4096)) > + err++; > +#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) > + if (test_memalign(16384)) > + err++; > + if (test_memalign(32768)) > + err++; > +#endif > + return err; > +} > +#endif > -- > 1.7.10.rc3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html Cheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 02, 2012 at 12:52:06PM -0600, Andreas Dilger wrote: > > Won't this cause e.g. the 16kB/64kB blocksize regression tests to > fail on MacOS? I've been assuming that the memalign functionality > is only necessary for O_DIRECT, which isn't even working on MacOS, > so it is enough to just return an unaligned memory chunk and it > will work for normal buffered IO on MacOS. Hmm, good point. What we should probably do is fix things so we only try to use ext2fs_get_memalign() if we are doing O_DIRECT in the first place, since can be be overhead using ext2fs_memalign if it's not needed, especially if we are using valloc(). I don't want to leave ext2fs_get_memalign doing something which isn't as the function is documented, even if it's good enough for our current needs and it will just work today. That leaves behind an accident waiting to happen later on --- for example if some progam uses ext2fs_get_memalign() for some use other than what we currently assume that it will be used for... - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I thought this patch enabled O_DIRECT functionality on the Mac: http://sourceforge.net/tracker/index.php?func=detail&aid=3140289&group_id=2406&atid=102406
On Wed, May 02, 2012 at 01:01:35PM -0700, Ken Sumrall wrote: > I thought this patch enabled O_DIRECT functionality on the Mac: > > http://sourceforge.net/tracker/index.php?func=detail&aid=3140289&group_id=2406&atid=102406 Yeah, I was looking at this. It turns out that F_NOCACHE has the effect of O_DIRECT, but it does not require memory alignment. There are some hints that you get really bad performance if the read/write size is not a multiple of 4k, and/or is not well-aligned --- presumably in that case Mac OS uses a bounce buffer. In practice, Linux works the same way --- we'll fall back to using the page cache if the alignment requirements are not met. However, there have historically been bugs that cause this not to work correctly (and in fact caused data corruption): https://bugzilla.redhat.com/show_bug.cgi?id=471613 http://kerneltrap.org/mailarchive/linux-fsdevel/2008/11/14/4099714 ... and the formal documentation of O_DIRECT in the open(2) man page in Linux makes very little guarantees about what the requirements are for O_DIRECT to work correctly: The O_DIRECT flag may impose alignment restrictions on the length and address of userspace buffers and the file offset of I/Os. In Linux alignment restrictions vary by file system and kernel version and might be absent entirely. So there are two possibilities; one is that we change ext2fs_get_memalign() so it gets aligned memory on a best-efforts basis, but does not guarantee that the memory will be aligned. The other is we keep ext2fs_get_memalign() as an interface which returns an error if the request memory alignment can not be honored, and then add code so that we only try using ext2fs_get_memalign() if we are trying to use O_DIRECT (and make an exception for F_NOCACHE on Mac systems that don't have memalign or posix_memalign). The first is tempting, but it just seems a little dirty to me. On the other hand, we've never published a formal API specification for ext2fs_get_memalign(), and it's relatively unlikely there are users of the API outside of e2fsprogs. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in index 507a459..f9200fa 100644 --- a/lib/ext2fs/Makefile.in +++ b/lib/ext2fs/Makefile.in @@ -369,6 +369,11 @@ tst_extents: $(srcdir)/extent.c extent_dbg.c $(DEBUG_OBJS) $(DEPLIBSS) \ $(STATIC_LIBEXT2FS) $(LIBBLKID) $(LIBUUID) $(LIBCOM_ERR) \ -I $(top_srcdir)/debugfs +tst_inline: $(srcdir)/inline.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) + $(E) " LD $@" + $(Q) $(CC) -o tst_inline $(srcdir)/inline.c $(ALL_CFLAGS) -DDEBUG \ + $(STATIC_LIBEXT2FS) $(LIBCOM_ERR) + tst_csum: csum.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) \ $(top_srcdir)/lib/e2p/e2p.h $(E) " LD $@" @@ -384,7 +389,8 @@ mkjournal: mkjournal.c $(STATIC_LIBEXT2FS) $(DEPLIBCOM_ERR) $(Q) $(CC) -o mkjournal $(srcdir)/mkjournal.c -DDEBUG $(STATIC_LIBEXT2FS) $(LIBCOM_ERR) $(ALL_CFLAGS) check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount \ - tst_super_size tst_types tst_inode_size tst_csum tst_crc32c tst_bitmaps + tst_super_size tst_types tst_inode_size tst_csum tst_crc32c tst_bitmaps \ + tst_inline LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_bitops LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_badblocks LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_iscan @@ -393,6 +399,7 @@ check:: tst_bitops tst_badblocks tst_iscan tst_types tst_icount \ LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_super_size LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_inode_size LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_csum + LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_inline LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) ./tst_crc32c LD_LIBRARY_PATH=$(LIB) DYLD_LIBRARY_PATH=$(LIB) \ ./tst_bitmaps -f $(srcdir)/tst_bitmaps_cmds > tst_bitmaps_out diff --git a/lib/ext2fs/inline.c b/lib/ext2fs/inline.c index ad0c368..335db55 100644 --- a/lib/ext2fs/inline.c +++ b/lib/ext2fs/inline.c @@ -45,7 +45,7 @@ errcode_t ext2fs_get_memalign(unsigned long size, errcode_t retval; void **p = ptr; - if (align == 0) + if (align < 8) align = 8; #ifdef HAVE_POSIX_MEMALIGN retval = posix_memalign(p, align, size); @@ -64,9 +64,61 @@ errcode_t ext2fs_get_memalign(unsigned long size, return EXT2_ET_NO_MEMORY; } #else -#error memalign or posix_memalign must be defined! +#ifdef HAVE_VALLOC + if (align > sizeof(long long)) + *p = valloc(size); + else +#endif + *p = malloc(size); + if ((unsigned long) *p & (align - 1)) { + free(*p); + *p = 0; + } + if (*p == 0) + return EXT2_ET_NO_MEMORY; #endif #endif return 0; } +#ifdef DEBUG +static int isaligned(void *ptr, unsigned long align) +{ + return (((unsigned long) ptr & (align - 1)) == 0); +} + +static errcode_t test_memalign(unsigned long align) +{ + void *ptr = 0; + errcode_t retval; + + retval = ext2fs_get_memalign(32, align, &ptr); + if (!retval && !isaligned(ptr, align)) + retval = EINVAL; + free(ptr); + printf("tst_memliagn(%lu): %s\n", align, + retval ? error_message(retval) : "OK"); + return retval; +} + +int main(int argc, char **argv) +{ + int err = 0; + + if (test_memalign(4)) + err++; + if (test_memalign(32)) + err++; + if (test_memalign(1024)) + err++; + if (test_memalign(4096)) + err++; +#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) + if (test_memalign(16384)) + err++; + if (test_memalign(32768)) + err++; +#endif + return err; +} +#endif
MacOS 10.5 doesn't have posix_memalign() nor memalign(), but it does have valloc(). The Android SDK would like to be built on MacOS 10.5, so I've added support for a good-enough emulation of memalign()'s functionality using valloc(), with an explicit test to make sure valloc() is returning a pointer which is sufficiently aligned given the requested alignment. This won't work if you try to operate on a file system with a 16k blocksize using an e2fsprogs built on MacOS 10.5 system, but it is good enough for the common case of 4k blocksize file systems, and we will let the memory allocation fail in the alignment is not good enough. I've also added a unit test for ext2fs_get_memalign() so we can be sure it's working as expected. I've tested the code paths with HAVE_POSIX_MEMALIGN defined, HAVE_POSIX_MEMALIGN undefined, and HAVE_POSIX_MEMALIGN and HAVE_MEMALIGN undefined on an x86 Linux system, and so I know the valloc() code path works OK. The simplistic (and less safe) patch at: https://trac.macports.org/attachment/ticket/33692/patch-lib-ext2fs-inline.c.diff Shows that using valloc() apparently works OK for MacOS 10.5 (but if it doesn't the unit test will catch a problem). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> --- lib/ext2fs/Makefile.in | 9 +++++++- lib/ext2fs/inline.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 3 deletions(-)