Message ID | 4DF8522F.2020304@sx.jp.nec.com |
---|---|
State | Superseded, archived |
Headers | show |
On 2011-06-15, at 12:33 AM, Kazuya Mio <k-mio@sx.jp.nec.com> wrote: > This patch adds get_fragment_score() to libe2p. get_fragment_score() returns > the fragmentation score. It shows the percentage of extents whose size is > smaller than the input argument "threshold". > However, there are some cases that cannot be merged into a next extent. > The following extents are excepted from the calculation of fragmentation score: > - The extent whose initialize status is different from the next extent > - There is a hole between the extent and its next extent > - The extent is a tail This description of the fragmentation score should also go into a comment at the function itself. Otherwise there is no description in the code of what the "score" is or what it's used for. > Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com> > --- > lib/e2p/Makefile.in | 6 + > lib/e2p/e2p.h | 2 > lib/e2p/fragment_score.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 148 insertions(+), 2 deletions(-) > diff --git a/lib/e2p/Makefile.in b/lib/e2p/Makefile.in > index 9775a98..c62a81d 100644 > --- a/lib/e2p/Makefile.in > +++ b/lib/e2p/Makefile.in > @@ -19,7 +19,7 @@ all:: e2p.pc > OBJS= feature.o fgetflags.o fsetflags.o fgetversion.o fsetversion.o \ > getflags.o getversion.o hashstr.o iod.o ls.o mntopts.o \ > parse_num.o pe.o pf.o ps.o setflags.o setversion.o uuid.o \ > - ostype.o percent.o > + ostype.o percent.o fragment_score.o > > SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ > $(srcdir)/fsetflags.c $(srcdir)/fgetversion.c \ > @@ -28,7 +28,7 @@ SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ > $(srcdir)/ls.c $(srcdir)/mntopts.c $(srcdir)/parse_num.c \ > $(srcdir)/pe.c $(srcdir)/pf.c $(srcdir)/ps.c \ > $(srcdir)/setflags.c $(srcdir)/setversion.c $(srcdir)/uuid.c \ > - $(srcdir)/ostype.c $(srcdir)/percent.c > + $(srcdir)/ostype.c $(srcdir)/percent.c $(srcdir)/fragment_score.c > HFILES= e2p.h > > LIBRARY= libe2p > @@ -162,3 +162,5 @@ ostype.o: $(srcdir)/ostype.c $(srcdir)/e2p.h \ > $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h > percent.o: $(srcdir)/percent.c $(srcdir)/e2p.h \ > $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h > +fragment_score.o: $(srcdir)/fragment_score.c $(srcdir)/e2p.h \ > + $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h > diff --git a/lib/e2p/e2p.h b/lib/e2p/e2p.h > index 4a68dd9..52a8e51 100644 > --- a/lib/e2p/e2p.h > +++ b/lib/e2p/e2p.h > @@ -72,3 +72,5 @@ char *e2p_os2string(int os_type); > int e2p_string2os(char *str); > > unsigned int e2p_percent(int percent, unsigned int base); > + > +int get_fragment_score(int fd, size_t threshold); This function name should be prefixed with e2p_ to avoid name clashes and make it clear that it us part of the e2p library. > diff --git a/lib/e2p/fragment_score.c b/lib/e2p/fragment_score.c > new file mode 100644 > index 0000000..3ad21b9 > --- /dev/null > +++ b/lib/e2p/fragment_score.c > @@ -0,0 +1,142 @@ > +/* > + * fragment_score.c --- Get file fragmentation score on ext4 filesystem. > + * > + * Copyright (C) 2011 Kazuya Mio <k-mio@sx.jp.nec.com> > + * NEC Software Tohoku, Ltd. > + * > + * %Begin-Header% > + * This file may be redistributed under the terms of the GNU Library > + * General Public License, version 2. > + * %End-Header% > + */ > + > +#define _LARGEFILE64_SOURCE > + > +#if HAVE_ERRNO_H > +#include <errno.h> > +#endif > +#include <unistd.h> > +#include <string.h> > +#include <sys/ioctl.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/vfs.h> > + > +#include <ext2fs/ext2_types.h> > +#include <ext2fs/fiemap.h> > + > +#include "e2p.h" > + > +#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) > + > +#ifdef HAVE_FSTAT64 > +#define FSTAT fstat64 > +#define STRUCT_STAT struct stat64 > +#else > +#define FSTAT fstat > +#define STRUCT_STAT struct stat > +#endif > + > +static int is_target_extent(struct fiemap_extent *cur_fm_ext, > + struct fiemap_extent *next_fm_ext) > +{ > + /* Check a hole between the extent */ > + if ((cur_fm_ext->fe_logical + cur_fm_ext->fe_length) > + < next_fm_ext->fe_logical) > + return 0; > + /* Check a defference of unwritten flag */ > + if ((cur_fm_ext->fe_flags & FIEMAP_EXTENT_UNWRITTEN) > + != (next_fm_ext->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) > + return 0; > + > + /* target extent */ > + return 1; > +} > + > +int get_fragment_score(int fd, size_t threshold) > +{ > + unsigned int flags = 0; > + STRUCT_STAT fileinfo; > + struct statfs fsinfo; > + char buf[4096] = ""; > + struct fiemap *fiemap = (struct fiemap *)buf; > + struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; > + struct fiemap_extent prev_fm_ext; > + int count = (sizeof(buf) - sizeof(*fiemap)) / > + sizeof(struct fiemap_extent); > + int tot_extents = 0; > + int frag_extents = 0; > + unsigned int i; > + int first = 1, last = 0; > + > + if (FSTAT(fd, &fileinfo) < 0 || > + fstatfs(fd, &fsinfo) < 0) > + return -1; > + if (ioctl(fd, EXT3_IOC_GETFLAGS, &flags) < 0) > + flags = 0; > + > + /* > + * Return an error if the target file is not the following cases. > + * - regular file > + * - extent format file on ext4 filesystem > + */ > + if (!S_ISREG(fileinfo.st_mode) || > + fsinfo.f_type != EXT2_SUPER_MAGIC || > + !(flags & EXT4_EXTENTS_FL)) { > + errno = EOPNOTSUPP; > + return -1; > + } I don't think there is a particular reason why this function shouldn't be usable for any filesytem that can return FIEMAP data. The fragmentation score should be independent of the underlying implementation, and it is the job of the caller to decide what to do with this information. In the case of e4defrag it couldn't defrag a directory (ioctl should fail) and it will likely fail on other filesystem types but that is fine also. If other filesystems begin to support this ioctl they may want to begin using the same defrag tool. > + memset(fiemap, 0, sizeof(struct fiemap)); > + fiemap->fm_start = 0; > + fiemap->fm_length = ~0ULL; > + fiemap->fm_extent_count = count; > + > + do { > + fiemap->fm_flags = 0; > + if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) > + return -1; > + > + /* If 0 extents are returned, then more ioctls are not needed */ > + if (fiemap->fm_mapped_extents == 0) > + break; > + > + if (first != 0) > + first = 0; > + else { > + /* Check the last extent gotten by previous FIEMAP */ > + if (is_target_extent(&prev_fm_ext, &fm_ext[0])) { > + tot_extents++; > + if (prev_fm_ext.fe_length < threshold) > + frag_extents++; > + } > + } > + > + for (i = 0; i < fiemap->fm_mapped_extents; i++) { > + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) { > + last = 1; > + continue; > + } > + > + if (fiemap->fm_mapped_extents - 1 == i) { > + memcpy(&prev_fm_ext, &fm_ext[i], > + sizeof(struct fiemap_extent)); > + continue; > + } > + > + /* Check target extent */ > + if (!is_target_extent(&fm_ext[i], &fm_ext[i+1])) > + continue; > + > + tot_extents++; > + > + if (fm_ext[i].fe_length < threshold) > + frag_extents++; > + } > + > + fiemap->fm_start = (fm_ext[i-1].fe_logical + > + fm_ext[i-1].fe_length); > + } while (last == 0); > + > + return tot_extents == 0 ? 0 : (100 * frag_extents) / tot_extents; > +} Cheers, Andreas-- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Andreas, Thanks for your comments. I'm fixing my patchset now. I will send it next week. Regards, Kazuya Mio 2011/06/16 12:06, Andreas Dilger wrote: > On 2011-06-15, at 12:33 AM, Kazuya Mio<k-mio@sx.jp.nec.com> wrote: >> This patch adds get_fragment_score() to libe2p. get_fragment_score() returns >> the fragmentation score. It shows the percentage of extents whose size is >> smaller than the input argument "threshold". > >> However, there are some cases that cannot be merged into a next extent. >> The following extents are excepted from the calculation of fragmentation score: >> - The extent whose initialize status is different from the next extent >> - There is a hole between the extent and its next extent >> - The extent is a tail > > This description of the fragmentation score should also go into a comment at the function itself. Otherwise there is no description in the code of what the "score" is or what it's used for. > >> Signed-off-by: Kazuya Mio<k-mio@sx.jp.nec.com> >> --- >> lib/e2p/Makefile.in | 6 + >> lib/e2p/e2p.h | 2 >> lib/e2p/fragment_score.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 148 insertions(+), 2 deletions(-) >> diff --git a/lib/e2p/Makefile.in b/lib/e2p/Makefile.in >> index 9775a98..c62a81d 100644 >> --- a/lib/e2p/Makefile.in >> +++ b/lib/e2p/Makefile.in >> @@ -19,7 +19,7 @@ all:: e2p.pc >> OBJS= feature.o fgetflags.o fsetflags.o fgetversion.o fsetversion.o \ >> getflags.o getversion.o hashstr.o iod.o ls.o mntopts.o \ >> parse_num.o pe.o pf.o ps.o setflags.o setversion.o uuid.o \ >> - ostype.o percent.o >> + ostype.o percent.o fragment_score.o >> >> SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ >> $(srcdir)/fsetflags.c $(srcdir)/fgetversion.c \ >> @@ -28,7 +28,7 @@ SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ >> $(srcdir)/ls.c $(srcdir)/mntopts.c $(srcdir)/parse_num.c \ >> $(srcdir)/pe.c $(srcdir)/pf.c $(srcdir)/ps.c \ >> $(srcdir)/setflags.c $(srcdir)/setversion.c $(srcdir)/uuid.c \ >> - $(srcdir)/ostype.c $(srcdir)/percent.c >> + $(srcdir)/ostype.c $(srcdir)/percent.c $(srcdir)/fragment_score.c >> HFILES= e2p.h >> >> LIBRARY= libe2p >> @@ -162,3 +162,5 @@ ostype.o: $(srcdir)/ostype.c $(srcdir)/e2p.h \ >> $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h >> percent.o: $(srcdir)/percent.c $(srcdir)/e2p.h \ >> $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h >> +fragment_score.o: $(srcdir)/fragment_score.c $(srcdir)/e2p.h \ >> + $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h >> diff --git a/lib/e2p/e2p.h b/lib/e2p/e2p.h >> index 4a68dd9..52a8e51 100644 >> --- a/lib/e2p/e2p.h >> +++ b/lib/e2p/e2p.h >> @@ -72,3 +72,5 @@ char *e2p_os2string(int os_type); >> int e2p_string2os(char *str); >> >> unsigned int e2p_percent(int percent, unsigned int base); >> + >> +int get_fragment_score(int fd, size_t threshold); > > This function name should be prefixed with e2p_ to avoid name clashes and make it clear that it us part of the e2p library. > >> diff --git a/lib/e2p/fragment_score.c b/lib/e2p/fragment_score.c >> new file mode 100644 >> index 0000000..3ad21b9 >> --- /dev/null >> +++ b/lib/e2p/fragment_score.c >> @@ -0,0 +1,142 @@ >> +/* >> + * fragment_score.c --- Get file fragmentation score on ext4 filesystem. >> + * >> + * Copyright (C) 2011 Kazuya Mio<k-mio@sx.jp.nec.com> >> + * NEC Software Tohoku, Ltd. >> + * >> + * %Begin-Header% >> + * This file may be redistributed under the terms of the GNU Library >> + * General Public License, version 2. >> + * %End-Header% >> + */ >> + >> +#define _LARGEFILE64_SOURCE >> + >> +#if HAVE_ERRNO_H >> +#include<errno.h> >> +#endif >> +#include<unistd.h> >> +#include<string.h> >> +#include<sys/ioctl.h> >> +#include<sys/types.h> >> +#include<sys/stat.h> >> +#include<sys/vfs.h> >> + >> +#include<ext2fs/ext2_types.h> >> +#include<ext2fs/fiemap.h> >> + >> +#include "e2p.h" >> + >> +#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) >> + >> +#ifdef HAVE_FSTAT64 >> +#define FSTAT fstat64 >> +#define STRUCT_STAT struct stat64 >> +#else >> +#define FSTAT fstat >> +#define STRUCT_STAT struct stat >> +#endif >> + >> +static int is_target_extent(struct fiemap_extent *cur_fm_ext, >> + struct fiemap_extent *next_fm_ext) >> +{ >> + /* Check a hole between the extent */ >> + if ((cur_fm_ext->fe_logical + cur_fm_ext->fe_length) >> +< next_fm_ext->fe_logical) >> + return 0; >> + /* Check a defference of unwritten flag */ >> + if ((cur_fm_ext->fe_flags& FIEMAP_EXTENT_UNWRITTEN) >> + != (next_fm_ext->fe_flags& FIEMAP_EXTENT_UNWRITTEN)) >> + return 0; >> + >> + /* target extent */ >> + return 1; >> +} >> + >> +int get_fragment_score(int fd, size_t threshold) >> +{ >> + unsigned int flags = 0; >> + STRUCT_STAT fileinfo; >> + struct statfs fsinfo; >> + char buf[4096] = ""; >> + struct fiemap *fiemap = (struct fiemap *)buf; >> + struct fiemap_extent *fm_ext =&fiemap->fm_extents[0]; >> + struct fiemap_extent prev_fm_ext; >> + int count = (sizeof(buf) - sizeof(*fiemap)) / >> + sizeof(struct fiemap_extent); >> + int tot_extents = 0; >> + int frag_extents = 0; >> + unsigned int i; >> + int first = 1, last = 0; >> + >> + if (FSTAT(fd,&fileinfo)< 0 || >> + fstatfs(fd,&fsinfo)< 0) >> + return -1; >> + if (ioctl(fd, EXT3_IOC_GETFLAGS,&flags)< 0) >> + flags = 0; >> + >> + /* >> + * Return an error if the target file is not the following cases. >> + * - regular file >> + * - extent format file on ext4 filesystem >> + */ >> + if (!S_ISREG(fileinfo.st_mode) || >> + fsinfo.f_type != EXT2_SUPER_MAGIC || >> + !(flags& EXT4_EXTENTS_FL)) { >> + errno = EOPNOTSUPP; >> + return -1; >> + } > > I don't think there is a particular reason why this function shouldn't be usable for any filesytem that can return FIEMAP data. > > The fragmentation score should be independent of the underlying implementation, and it is the job of the caller to decide what to do with this information. In the case of e4defrag it couldn't defrag a directory (ioctl should fail) and it will likely fail on other filesystem types but that is fine also. If other filesystems begin to support this ioctl they may want to begin using the same defrag tool. > >> + memset(fiemap, 0, sizeof(struct fiemap)); >> + fiemap->fm_start = 0; >> + fiemap->fm_length = ~0ULL; >> + fiemap->fm_extent_count = count; >> + >> + do { >> + fiemap->fm_flags = 0; >> + if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap)< 0) >> + return -1; >> + >> + /* If 0 extents are returned, then more ioctls are not needed */ >> + if (fiemap->fm_mapped_extents == 0) >> + break; >> + >> + if (first != 0) >> + first = 0; >> + else { >> + /* Check the last extent gotten by previous FIEMAP */ >> + if (is_target_extent(&prev_fm_ext,&fm_ext[0])) { >> + tot_extents++; >> + if (prev_fm_ext.fe_length< threshold) >> + frag_extents++; >> + } >> + } >> + >> + for (i = 0; i< fiemap->fm_mapped_extents; i++) { >> + if (fm_ext[i].fe_flags& FIEMAP_EXTENT_LAST) { >> + last = 1; >> + continue; >> + } >> + >> + if (fiemap->fm_mapped_extents - 1 == i) { >> + memcpy(&prev_fm_ext,&fm_ext[i], >> + sizeof(struct fiemap_extent)); >> + continue; >> + } >> + >> + /* Check target extent */ >> + if (!is_target_extent(&fm_ext[i],&fm_ext[i+1])) >> + continue; >> + >> + tot_extents++; >> + >> + if (fm_ext[i].fe_length< threshold) >> + frag_extents++; >> + } >> + >> + fiemap->fm_start = (fm_ext[i-1].fe_logical + >> + fm_ext[i-1].fe_length); >> + } while (last == 0); >> + >> + return tot_extents == 0 ? 0 : (100 * frag_extents) / tot_extents; >> +} > > Cheers, Andreas -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Jun 15, 2011 at 03:33:19PM +0900, Kazuya Mio wrote: > This patch adds get_fragment_score() to libe2p. get_fragment_score() returns > the fragmentation score. It shows the percentage of extents whose size is > smaller than the input argument "threshold". It perhaps might be useful to also articulate what are the goals of this metric. Is just just to decide which files should be defragmented, and which should be left alone? Or do you want to be able to compare which file is "worse off"? I can imagine two files that have a score of 100%, but one is much worse off than the other. Does that matter? It may or might not, depending how you plan to use the fragmentation score, both now and in the future. So it might be good to explicitly declare what are the goals for this metrics, and its planned use cases. Regards, - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 6/16/11 10:18 PM, Ted Ts'o wrote: > On Wed, Jun 15, 2011 at 03:33:19PM +0900, Kazuya Mio wrote: >> This patch adds get_fragment_score() to libe2p. get_fragment_score() returns >> the fragmentation score. It shows the percentage of extents whose size is >> smaller than the input argument "threshold". > > It perhaps might be useful to also articulate what are the goals of > this metric. Is just just to decide which files should be > defragmented, and which should be left alone? Or do you want to be > able to compare which file is "worse off"? > > I can imagine two files that have a score of 100%, but one is much > worse off than the other. Does that matter? It may or might not, > depending how you plan to use the fragmentation score, both now and in > the future. So it might be good to explicitly declare what are the > goals for this metrics, and its planned use cases. > > Regards, Just as a random datapoint, the xfs_db "frag factor" has been a constant source of misunderstanding and woe for us. (Granted, it works differently; it is an fs-wide number representing ((actual - ideal) / actual) extents in the fs.) This "% of fragments smaller than threshold" is more easily understandable and possibly more descriptive, but I think Ted makes good points; think about how this will be used, and whether the metric is useful. It's hard to make a single number a) make sense to the user, and b) be usefully representative of fragmentation "badness" - so I am feeling very cautious about this idea overall. To really convey fragmentation "badness" you'd almost want a histogram of fragment sizes, which is a bit hard to present concisely... -Eric > - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I was thinking about this, and am wondering if it makes sense to have an absolute score for fragmentation instead of a relative one? By absolute I mean something like fragments per MB or similar. A bad score might be anything > 1. For files smaller than 1 MB in size it would scale the ratio to the equivalent if the file was 1MB in size (e.g. a 16kB file with 4 fragments would have a score of 256, which is clearly bad). Large files can have a score much less than 1, which is good. Cheers, Andreas On 2011-06-17, at 8:20 AM, Eric Sandeen <sandeen@redhat.com> wrote: > On 6/16/11 10:18 PM, Ted Ts'o wrote: >> On Wed, Jun 15, 2011 at 03:33:19PM +0900, Kazuya Mio wrote: >>> This patch adds get_fragment_score() to libe2p. get_fragment_score() returns >>> the fragmentation score. It shows the percentage of extents whose size is >>> smaller than the input argument "threshold". >> >> It perhaps might be useful to also articulate what are the goals of >> this metric. Is just just to decide which files should be >> defragmented, and which should be left alone? Or do you want to be >> able to compare which file is "worse off"? >> >> I can imagine two files that have a score of 100%, but one is much >> worse off than the other. Does that matter? It may or might not, >> depending how you plan to use the fragmentation score, both now and in >> the future. So it might be good to explicitly declare what are the >> goals for this metrics, and its planned use cases. >> >> Regards, > > Just as a random datapoint, the xfs_db "frag factor" has been a constant > source of misunderstanding and woe for us. (Granted, it works differently; > it is an fs-wide number representing > > ((actual - ideal) / actual) > > extents in the fs.) > > This "% of fragments smaller than threshold" is more easily understandable > and possibly more descriptive, but I think Ted makes good points; > think about how this will be used, and whether the metric is useful. > > It's hard to make a single number a) make sense to the user, and b) > be usefully representative of fragmentation "badness" - so I am > feeling very cautious about this idea overall. > > To really convey fragmentation "badness" you'd almost want a histogram > of fragment sizes, which is a bit hard to present concisely... > > > -Eric > >> - Ted > > -- > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, Jun 18, 2011 at 3:19 AM, Andreas Dilger <aedilger@gmail.com> wrote: > I was thinking about this, and am wondering if it makes sense to have an absolute score for fragmentation instead of a relative one? > > By absolute I mean something like fragments per MB or similar. A bad score might be anything > 1. For files smaller than 1 MB in size it would scale the ratio to the equivalent if the file was 1MB in size (e.g. a 16kB file with 4 fragments would have a score of 256, which is clearly bad). Large files can have a score much less than 1, which is good. > > Cheers, Andreas Shouldn't be based on fragments per max extent size for ext4? And I think the max extent size for a 4KB page is 128 MB, right? Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 2011-06-18, at 11:00 AM, Greg Freemyer <greg.freemyer@gmail.com> wrote: > On Sat, Jun 18, 2011 at 3:19 AM, Andreas Dilger <aedilger@gmail.com> wrote: >> I was thinking about this, and am wondering if it makes sense to have an absolute score for fragmentation instead of a relative one? >> >> By absolute I mean something like fragments per MB or similar. A bad score might be anything > 1. For files smaller than 1 MB in size it would scale the ratio to the equivalent if the file was 1MB in size (e.g. a 16kB file with 4 fragments would have a score of 256, which is clearly bad). Large files can have a score much less than 1, which is good. >> >> Cheers, Andreas > > Shouldn't be based on fragments per max extent size for ext4? > > And I think the max extent size for a 4KB page is 128 MB, right? I was thinking about that, but in most cases it is unrealistic that all files have 128MB extents except on empty test filesystems, and I don't think that files with "only" 32MB extents should be considered that badly off. I don't particularly care what the exact scale is, but I like the idea of an absolute measure instead of a relative measure (i.e. 33% fragmented). Cheers, Andreas-- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/6/15 Kazuya Mio <k-mio@sx.jp.nec.com>: > This patch adds get_fragment_score() to libe2p. get_fragment_score() returns > the fragmentation score. It shows the percentage of extents whose size is > smaller than the input argument "threshold". > > However, there are some cases that cannot be merged into a next extent. > The following extents are excepted from the calculation of fragmentation score: > - The extent whose initialize status is different from the next extent > - There is a hole between the extent and its next extent > - The extent is a tail > > Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com> > --- Kazuya, If you're are going to stick with a percentage based approach I agree with special casing the tail of the file or the tail of each block_group in a sparse file. But I think you should also special case the head extent of a block_group. More clearly, whatever special treatment that is afforded to the tail_extent of a block_group, should also be afforded to head_extent. Maybe something like this for sparse files: M_total=0 N_total=0 For (each block group) { M = extents in block group below threshold N = extents in block group above threshold if first_extent is below threshold { --M; } # don't count it as below threshold if last_extent is below threshold { --M; } # don't count it as below threshold M_total +=M N_totatl += N block_group_fragmentation_score = M / (N+M) } file_fragmentation_score = M_total / (N_total + M_total) ===> overall e4defrag design feedback e4defrag makes the decision to defrag sparse files at the full file level last I looked and I believe it still does. I think it should treat each non-sparse file and each block_group of a sparse file separately as a target for defragmentation. The patch you are submitting just embeds the "file" defragmentation methodology further into the code. It very much seems to me that the e4defrag code and scoring needs to work at the block_group size at the largest. That is for sparse files it should "score" a block_group at a time at most. And then it should compare the target's block_group layout vs. the donor files block_group layout. Once it is decided to defragment a block_group, then I think it should fallocate the largest available extent (that may require a kernel patch). If that newly allocated extent is bigger than the first extent in the block_group, then call into the kernel to swap out the target blocks for the donor blocks. Then move to the next target extent and repeat. For a large file, making the overall defrag choice at the block_group level just seems right, but for actually replacing target extents with donor extents, I really think that needs to be decided one extent at a time as I describe above. After all, what is the value taking a block group with a 100 full extents followed by two partial extents and moving all of that data around in order to defrag it. Only the last 2 extents need to be consolidated. Using the approach I described above, the code would quickly bypass the first 100 full extents and then just defrag the last 2 partial extents. Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/06/17 12:18, Ted Ts'o wrote: > It perhaps might be useful to also articulate what are the goals of > this metric. Is just just to decide which files should be > defragmented, and which should be left alone? Or do you want to be > able to compare which file is "worse off"? I decided to implement a fragmentation score for the two purposes: one is for filefrag that outputs the score to decide which files should be defragmented, and the other is for e4defrag that compares two files' fragmentation to prevent the worse fragmentation. > I can imagine two files that have a score of 100%, but one is much > worse off than the other. Does that matter? It may or might not, > depending how you plan to use the fragmentation score, both now and in > the future. So it might be good to explicitly declare what are the > goals for this metrics, and its planned use cases. Certainly, the same fragmentation score doesn't always mean the same fragmentation. Just as Andreas said, "fragments per MB" is a good idea. It's easy to understand, and other filesystem also would be able to use it without change. Moreover, there is no worry about what threshold we use to the application. What do you think about this idea? Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/06/18 16:19, Andreas Dilger wrote: > I was thinking about this, and am wondering if it makes sense to have an absolute score for fragmentation > instead of a relative one? > > By absolute I mean something like fragments per MB or similar. A bad score might be anything> 1. For > files smaller than 1 MB in size it would scale the ratio to the equivalent if the file was 1MB in size > (e.g. a 16kB file with 4 fragments would have a score of 256, which is clearly bad). Large files can > have a score much less than 1, which is good. I think fragments per MB is easy to understand. I will fix the library function to "double e2p_get_fragscore(int fd)". To return fragments per MB, it will get the number of extents and the total length of extents except the following special cases: - The extent whose initialize status is different from the next extent - There is a hole between the extent and the next extent - The extent is a tail The output of filefrag would be as follows: # filefrag /mnt/mp1/testfile /mnt/mp1/testfile: 4 extents found, 0.75 fragments/MB Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 21, 2011 at 08:26:25PM +0900, Kazuya Mio wrote: > > I decided to implement a fragmentation score for the two purposes: > one is for filefrag that outputs the score to decide which files should be > defragmented, and the other is for e4defrag that compares two files' > fragmentation to prevent the worse fragmentation. I'm really nervous about having filefrag print a "fragmentation score". The problem is that the problem is invariably far more complex than can be boiled into a single number, and so users look at it and start worrying when they shouldn't. And the statement, "so that e4defrag can compare two files' fragmentation to prevent the worse fragmentation" begs the question of what is "worse". The real issue here is that it's a multidimensional problem. > Certainly, the same fragmentation score doesn't always mean the same > fragmentation. Just as Andreas said, "fragments per MB" is a good idea. It's > easy to understand, and other filesystem also would be able to use it without > change. Moreover, there is no worry about what threshold we use to > the application. "fragments per megabyte" is definitely better, especially if you disregard the tail. It's worth consider how it works for files smaller than a megabyte. Do you round the file size up to the nearest megabyte? Is it an integer score, or does it need to be floating point? An integer score where the size is rounded up to the nearest megabyte sounds like a best plan, but I'm sure we could still find some interesting non-linearities that lead to surprising results. - Ted -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/06/21 22:56, Ted Ts'o wrote: > I'm really nervous about having filefrag print a "fragmentation > score". The problem is that the problem is invariably far more > complex than can be boiled into a single number, and so users look at > it and start worrying when they shouldn't. It's possible that could happen. I suppose filefrag should output "fragmented" or "not fragmented" to understand when they shouldn't do e4defag. But it would be difficult to implement this idea because the threshold for the determination of which file is fragmented is different of each filesystem. As it stands now, I think filefrag shouldn't output fragmentation score. However, I think I add get_fragment_score() to libe2p because e4defrag will still use it. > And the statement, "so that e4defrag can compare two files' > fragmentation to prevent the worse fragmentation" begs the question of > what is "worse". The real issue here is that it's a multidimensional > problem. We need to define "what is worse" for e4defrag. If fragments per megabyte of the file is bigger than the threshold, e4defrag will call EXT4_IOC_MOVE_EXT ioctl. The threshold may be customizable by an option. > "fragments per megabyte" is definitely better, especially if you > disregard the tail. It's worth consider how it works for files > smaller than a megabyte. Do you round the file size up to the nearest > megabyte? Is it an integer score, or does it need to be floating > point? An integer score where the size is rounded up to the nearest > megabyte sounds like a best plan, but I'm sure we could still find > some interesting non-linearities that lead to surprising results. An integer score sounds good to me. Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 21, 2011 at 7:28 AM, Kazuya Mio <k-mio@sx.jp.nec.com> wrote: > 2011/06/18 16:19, Andreas Dilger wrote: >> >> I was thinking about this, and am wondering if it makes sense to have an >> absolute score for fragmentation >> instead of a relative one? >> >> By absolute I mean something like fragments per MB or similar. A bad score >> might be anything> 1. For >> files smaller than 1 MB in size it would scale the ratio to the equivalent >> if the file was 1MB in size >> (e.g. a 16kB file with 4 fragments would have a score of 256, which is >> clearly bad). Large files can >> have a score much less than 1, which is good. > > I think fragments per MB is easy to understand. I will fix the library > function > to "double e2p_get_fragscore(int fd)". To return fragments per MB, it will > get the number of extents and the total length of extents except the > following > special cases: > - The extent whose initialize status is different from the next extent > - There is a hole between the extent and the next extent > - The extent is a tail For a sparse file, can you explain why you treat the head and tail extents of a block group differently? The issue is totally symetric in my mind, so either include both or exclude both in my opinion. The above description only excludes the block group tail extents. Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Jun 23, 2011 at 7:16 AM, Greg Freemyer <greg.freemyer@gmail.com> wrote: > On Tue, Jun 21, 2011 at 7:28 AM, Kazuya Mio <k-mio@sx.jp.nec.com> wrote: >> 2011/06/18 16:19, Andreas Dilger wrote: >>> >>> I was thinking about this, and am wondering if it makes sense to have an >>> absolute score for fragmentation >>> instead of a relative one? >>> >>> By absolute I mean something like fragments per MB or similar. A bad score >>> might be anything> 1. For >>> files smaller than 1 MB in size it would scale the ratio to the equivalent >>> if the file was 1MB in size >>> (e.g. a 16kB file with 4 fragments would have a score of 256, which is >>> clearly bad). Large files can >>> have a score much less than 1, which is good. >> >> I think fragments per MB is easy to understand. I will fix the library >> function >> to "double e2p_get_fragscore(int fd)". To return fragments per MB, it will >> get the number of extents and the total length of extents except the >> following >> special cases: >> - The extent whose initialize status is different from the next extent >> - There is a hole between the extent and the next extent >> - The extent is a tail > > For a sparse file, can you explain why you treat the head and tail > extents of a block group differently? > > The issue is totally symetric in my mind, so either include both or > exclude both in my opinion. The above description only excludes the > block group tail extents. > > Greg I forgot to say, fragments per MB is not a large enough unit in my mind. A storage system that can transfer 100MB / sec , but takes 5 msecs to seek and do a half rotation of the platter would spend 33% of its time seeking if it had 1MB contiguous sections of data all over the drive. Fragments per max extent really seems the only logical thng for e4defrag to use in my mind. Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Greg, I'm sorry for the late reply. 2011/06/23 20:16, Greg Freemyer wrote: > For a sparse file, can you explain why you treat the head and tail > extents of a block group differently? Could you tell me what "block group" you said means? If "block group" means the ext4 block group, I will treat the head and tail extents of a block group the same way. And if "block group" means the chunk of the extents whose offset is continued, I will treat only the tail extents as a special case. # filefrag -v /mnt/mp1/file Filesystem type is: ef53 File size of /mnt/mp1/file is 285212672 (69632 blocks, blocksize 4096) ext logical physical expected length flags 0 0 34816 30720 1 30720 65536 2048 unwritten 2 65536 67584 4096 unwritten,eof /mnt/mp1/file: 1 extent found The case is not fragmented. The length of #1 extent is a little bit short, but there is no point in doing defragmentation because of the hole existence. Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Jun 24, 2011 at 4:28 AM, Kazuya Mio <k-mio@sx.jp.nec.com> wrote: > Hi Greg, > I'm sorry for the late reply. > > 2011/06/23 20:16, Greg Freemyer wrote: >> >> For a sparse file, can you explain why you treat the head and tail >> extents of a block group differently? > > Could you tell me what "block group" you said means? Kazuya, Sorry if I've got the terminology wrong. I'm talking about sparse files. Where you might have: block_group1 - hole - block_group2 - hole - block_group3. Block_group2 has a head and a tail extent. In my mind, from a performance perspective, they are symmetric. Meaning that having a small extent at the beginning is no better and no worse than having a small extent at the end. > If "block group" means the ext4 block group, I will treat the head and tail > extents of a block group the same way. > > And if "block group" means the chunk of the extents whose offset is > continued, > I will treat only the tail extents as a special case. > > # filefrag -v /mnt/mp1/file > Filesystem type is: ef53 > File size of /mnt/mp1/file is 285212672 (69632 blocks, blocksize 4096) > ext logical physical expected length flags > 0 0 34816 30720 > 1 30720 65536 2048 unwritten > 2 65536 67584 4096 unwritten,eof > /mnt/mp1/file: 1 extent found > > The case is not fragmented. The length of #1 extent is a little bit short, > but there is no point in doing defragmentation because of the hole > existence. Please consider this caseL Assume for a minute a large sparse file with lots of holes. This is one like a VM might create. And that in the middle of the file is a block_group with a hole in front of it and a hole after it. Assume it is 1 extent long exactly, and that one extent is a maximum extent (ie 128 MB with 4KB blocks I believe). Now assume the VM writes a single short extent at the end of block_group. Your logic says full_extent+small_extent, no need to defrag because its already optimal. I agree with that. Now assume instead of the block_group being extended at the tail, it is extended at the head. Now you have: small_extent+full_extent Your logic as I understand it will score that as not as good as the first case. I disagree, Both are optimally defragged and both should get the same score. > > Regards, > Kazuya Mio Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/06/26 11:16, Greg Freemyer wrote: > I'm talking about sparse files. > > Where you might have: > > block_group1 - hole - block_group2 - hole - block_group3. > > Block_group2 has a head and a tail extent. In my mind, from a > performance perspective, they are symmetric. Meaning that having a > small extent at the beginning is no better and no worse than having a > small extent at the end. Thanks for your kind description. Your point is right. I'll think a little more about the fragmentation score. Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Jun 28, 2011 at 6:21 AM, Kazuya Mio <k-mio@sx.jp.nec.com> wrote: > 2011/06/26 11:16, Greg Freemyer wrote: >> >> I'm talking about sparse files. >> >> Where you might have: >> >> block_group1 - hole - block_group2 - hole - block_group3. >> >> Block_group2 has a head and a tail extent. In my mind, from a >> performance perspective, they are symmetric. Meaning that having a >> small extent at the beginning is no better and no worse than having a >> small extent at the end. > > Thanks for your kind description. Your point is right. I'll think a little > more > about the fragmentation score. > > Regards, > Kazuya Mio Kazuya, While you're thinking about the issue: As I hope I've said before, for sparse file I think e4defrag should score and defrag one block_group at a time. Thus if a VM backing storage file has 100 block_groups (as I'm using the term), then it should score each of the 100 separately and if needed defrag them one at a time. I can see no benefit from treating a large sparse file as monolithic for the decision process. fyi: Is there an agreed on term for what I'm calling a block_group. I believe e4defrag uses the term "extent group" in the comments, but sparse files exist in non-extent based filesystems, so it's not a very portable name. Greg -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/06/28 22:53, Greg Freemyer wrote: > While you're thinking about the issue: > > As I hope I've said before, for sparse file I think e4defrag should > score and defrag one block_group at a time. Thus if a VM backing > storage file has 100 block_groups (as I'm using the term), then it > should score each of the 100 separately and if needed defrag them one > at a time. > > I can see no benefit from treating a large sparse file as monolithic > for the decision process. If e4defrag does defrag one block_group at a time, this block_group may be allocated far away from the other block_groups. If so, seek time increases even if the number of extents is less than before. Of course, I'm aware of the advantage of your suggestion. I may also try to consider it to defrag only a part of a file in the future, but before that I think I should do the cleanup and bugfix. > fyi: Is there an agreed on term for what I'm calling a block_group. I > believe e4defrag uses the term "extent group" in the comments, but > sparse files exist in non-extent based filesystems, so it's not a very > portable name. e4defrag supports only an extent based filesystem, so I think it's no problem. And I associate "block_group" with the physical layout of the blocks on the disk. I guess we shouldn't use the same word with different meanings. Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi all, I come up with the new fragmentation score based on your comments. The interface of new libe2p function will be as follows: int e2p_get_fragscore(int fd, __u32 threshold, __u32 max_chunk_blks) This function returns the fragmentation score that shows how badly fragmented the file might be. The score is extents per @threshold, so the higher score means the worse fragmentation. Fragmentation score treats extents, whose file offset continues and whose status is the same, as one chunk. If the number of extents in chunk is equal to the ideal number of extents, the chunk is not used for the fragmentation score because there is no fragment in the chunk. The ideal number of extents is calculated based on @max_chunk_blks. In case of ext4, @max_chunk_blks is 32768 blocks. If you have 128MB file with two extents, these two extents are used for the calculation of the fragmentation score because the ideal number of extents in this file is one. e4defrag will judge the necessity of calling EXT4_IOC_MOVE_EXT ioctl by fragmentation score. If the fragmentation score of defrag target file is more than one, and the score of the file created by fallocate is zero, e4defrag will call the ioctl. I'll decide the better value of threshold during some tests. If you must do e4defrag to the specified file, you will be able to force defrag by using new option of e4defrag. Two examples (@threshold=256, @max_chunk_blks=32768): # filefrag -v fragment Filesystem type is: ef53 File size of fragment is 409600 (100 blocks, blocksize 4096) ext logical physical expected length flags 0 0 33807 16 1 16 32848 33822 84 eof fragment: 2 extents found This small file has two extents, so it is fragmented. The calculation is as follows: fragmentation score = 2 / (100 / 256) = 5 extents / MB. # filefrag -v not_fragment Filesystem type is: ef53 File size of not_fragment is 125829120 (30720 blocks, blocksize 4096) ext logical physical expected length flags 0 0 33823 16 1 16 32944 33838 48 2 64 33152 32991 64 3 128 34304 33215 30592 eof not_fragment: 4 extents found This file has four extents included one large extent. So the score is zero in this case. fragmentation score = 4 / (30720 / 256) = 0 extents / MB. Regards, Kazuya Mio -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/lib/e2p/Makefile.in b/lib/e2p/Makefile.in index 9775a98..c62a81d 100644 --- a/lib/e2p/Makefile.in +++ b/lib/e2p/Makefile.in @@ -19,7 +19,7 @@ all:: e2p.pc OBJS= feature.o fgetflags.o fsetflags.o fgetversion.o fsetversion.o \ getflags.o getversion.o hashstr.o iod.o ls.o mntopts.o \ parse_num.o pe.o pf.o ps.o setflags.o setversion.o uuid.o \ - ostype.o percent.o + ostype.o percent.o fragment_score.o SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ $(srcdir)/fsetflags.c $(srcdir)/fgetversion.c \ @@ -28,7 +28,7 @@ SRCS= $(srcdir)/feature.c $(srcdir)/fgetflags.c \ $(srcdir)/ls.c $(srcdir)/mntopts.c $(srcdir)/parse_num.c \ $(srcdir)/pe.c $(srcdir)/pf.c $(srcdir)/ps.c \ $(srcdir)/setflags.c $(srcdir)/setversion.c $(srcdir)/uuid.c \ - $(srcdir)/ostype.c $(srcdir)/percent.c + $(srcdir)/ostype.c $(srcdir)/percent.c $(srcdir)/fragment_score.c HFILES= e2p.h LIBRARY= libe2p @@ -162,3 +162,5 @@ ostype.o: $(srcdir)/ostype.c $(srcdir)/e2p.h \ $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h percent.o: $(srcdir)/percent.c $(srcdir)/e2p.h \ $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h +fragment_score.o: $(srcdir)/fragment_score.c $(srcdir)/e2p.h \ + $(top_srcdir)/lib/ext2fs/ext2_fs.h $(top_builddir)/lib/ext2fs/ext2_types.h diff --git a/lib/e2p/e2p.h b/lib/e2p/e2p.h index 4a68dd9..52a8e51 100644 --- a/lib/e2p/e2p.h +++ b/lib/e2p/e2p.h @@ -72,3 +72,5 @@ char *e2p_os2string(int os_type); int e2p_string2os(char *str); unsigned int e2p_percent(int percent, unsigned int base); + +int get_fragment_score(int fd, size_t threshold); diff --git a/lib/e2p/fragment_score.c b/lib/e2p/fragment_score.c new file mode 100644 index 0000000..3ad21b9 --- /dev/null +++ b/lib/e2p/fragment_score.c @@ -0,0 +1,142 @@ +/* + * fragment_score.c --- Get file fragmentation score on ext4 filesystem. + * + * Copyright (C) 2011 Kazuya Mio <k-mio@sx.jp.nec.com> + * NEC Software Tohoku, Ltd. + * + * %Begin-Header% + * This file may be redistributed under the terms of the GNU Library + * General Public License, version 2. + * %End-Header% + */ + +#define _LARGEFILE64_SOURCE + +#if HAVE_ERRNO_H +#include <errno.h> +#endif +#include <unistd.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/vfs.h> + +#include <ext2fs/ext2_types.h> +#include <ext2fs/fiemap.h> + +#include "e2p.h" + +#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) + +#ifdef HAVE_FSTAT64 +#define FSTAT fstat64 +#define STRUCT_STAT struct stat64 +#else +#define FSTAT fstat +#define STRUCT_STAT struct stat +#endif + +static int is_target_extent(struct fiemap_extent *cur_fm_ext, + struct fiemap_extent *next_fm_ext) +{ + /* Check a hole between the extent */ + if ((cur_fm_ext->fe_logical + cur_fm_ext->fe_length) + < next_fm_ext->fe_logical) + return 0; + /* Check a defference of unwritten flag */ + if ((cur_fm_ext->fe_flags & FIEMAP_EXTENT_UNWRITTEN) + != (next_fm_ext->fe_flags & FIEMAP_EXTENT_UNWRITTEN)) + return 0; + + /* target extent */ + return 1; +} + +int get_fragment_score(int fd, size_t threshold) +{ + unsigned int flags = 0; + STRUCT_STAT fileinfo; + struct statfs fsinfo; + char buf[4096] = ""; + struct fiemap *fiemap = (struct fiemap *)buf; + struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; + struct fiemap_extent prev_fm_ext; + int count = (sizeof(buf) - sizeof(*fiemap)) / + sizeof(struct fiemap_extent); + int tot_extents = 0; + int frag_extents = 0; + unsigned int i; + int first = 1, last = 0; + + if (FSTAT(fd, &fileinfo) < 0 || + fstatfs(fd, &fsinfo) < 0) + return -1; + if (ioctl(fd, EXT3_IOC_GETFLAGS, &flags) < 0) + flags = 0; + + /* + * Return an error if the target file is not the following cases. + * - regular file + * - extent format file on ext4 filesystem + */ + if (!S_ISREG(fileinfo.st_mode) || + fsinfo.f_type != EXT2_SUPER_MAGIC || + !(flags & EXT4_EXTENTS_FL)) { + errno = EOPNOTSUPP; + return -1; + } + + memset(fiemap, 0, sizeof(struct fiemap)); + fiemap->fm_start = 0; + fiemap->fm_length = ~0ULL; + fiemap->fm_extent_count = count; + + do { + fiemap->fm_flags = 0; + if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) + return -1; + + /* If 0 extents are returned, then more ioctls are not needed */ + if (fiemap->fm_mapped_extents == 0) + break; + + if (first != 0) + first = 0; + else { + /* Check the last extent gotten by previous FIEMAP */ + if (is_target_extent(&prev_fm_ext, &fm_ext[0])) { + tot_extents++; + if (prev_fm_ext.fe_length < threshold) + frag_extents++; + } + } + + for (i = 0; i < fiemap->fm_mapped_extents; i++) { + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) { + last = 1; + continue; + } + + if (fiemap->fm_mapped_extents - 1 == i) { + memcpy(&prev_fm_ext, &fm_ext[i], + sizeof(struct fiemap_extent)); + continue; + } + + /* Check target extent */ + if (!is_target_extent(&fm_ext[i], &fm_ext[i+1])) + continue; + + tot_extents++; + + if (fm_ext[i].fe_length < threshold) + frag_extents++; + } + + fiemap->fm_start = (fm_ext[i-1].fe_logical + + fm_ext[i-1].fe_length); + } while (last == 0); + + return tot_extents == 0 ? 0 : (100 * frag_extents) / tot_extents; +}
This patch adds get_fragment_score() to libe2p. get_fragment_score() returns the fragmentation score. It shows the percentage of extents whose size is smaller than the input argument "threshold". However, there are some cases that cannot be merged into a next extent. The following extents are excepted from the calculation of fragmentation score: - The extent whose initialize status is different from the next extent - There is a hole between the extent and its next extent - The extent is a tail Signed-off-by: Kazuya Mio <k-mio@sx.jp.nec.com> --- lib/e2p/Makefile.in | 6 + lib/e2p/e2p.h | 2 lib/e2p/fragment_score.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 148 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html