Patchwork debugfs: dump a sparse file as a new sparse file

login
register
mail settings
Submitter Zheng Liu
Date Nov. 15, 2012, 2:46 p.m.
Message ID <20121115144613.GA11706@gmail.com>
Download mbox | patch
Permalink /patch/199303/
State Superseded
Headers show

Comments

Zheng Liu - Nov. 15, 2012, 2:46 p.m.
On Tue, Nov 13, 2012 at 11:07:57PM -0500, George Spelvin wrote:
> When dumping a file with some messed-up block pointers, I discovered
> that I had a huge, and fully allocated destination file.  (ls -s on the
> dump displayed some very large number, even though the original file
> only had about 8 blocks allocated.)
> 
> It wouldprobably make more sense to seek over the gaps to create a
> sparse file.

Hi George,

Would you like to try this patch?

Thanks,
                                                - Zheng

Subject: [PATCH] debugfs: dump a sparse file as a new sparse file

From: Zheng Liu <wenqing.lz@taobao.com>

For dumping a sparse file ext2fs_file_read2() is defined to expand the
interface.  It returns the size of hole and we can call lseek64(2) to skip it.

Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
---
 debugfs/dump.c      | 16 ++++++++++++++--
 lib/ext2fs/ext2fs.h |  3 +++
 lib/ext2fs/fileio.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 54 insertions(+), 13 deletions(-)
Theodore Ts'o - Jan. 1, 2013, 2:08 a.m.
On Thu, Nov 15, 2012 at 04:46:13AM -0000, Zheng Liu wrote:
> --- a/debugfs/dump.c
> +++ b/debugfs/dump.c
> @@ -105,10 +105,11 @@ static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
>  {
>  	errcode_t retval;
>  	struct ext2_inode	inode;
> -	char 		buf[8192];
> +	char 		buf[current_fs->blocksize];

Note: this is a non-standard/non-portable GCC extension.  The best way
to fix this is to explicitly malloc the buffer and then free it before
dump_file exits.

Also, fixing the hard-coded maximum 8k block size, so these sorts of
changes should be done in a separate commit.

>  extern errcode_t ext2fs_file_llseek(ext2_file_t file, __u64 offset,
> diff --git a/lib/ext2fs/fileio.c b/lib/ext2fs/fileio.c
> index 1f7002c..d25ebb4 100644
> --- a/lib/ext2fs/fileio.c
> +++ b/lib/ext2fs/fileio.c
> @@ -176,23 +176,26 @@ static errcode_t sync_buffer_position(ext2_file_t file)
>   * This function loads the file's block buffer with valid data from
>   * the disk as necessary.
>   *
> - * If dontfill is true, then skip initializing the buffer since we're
> + * If flags is true, then skip initializing the buffer since we're
>   * going to be replacing its entire contents anyway.  If set, then the
>   * function basically only sets file->physblock and EXT2_FILE_BUF_VALID
>   */
>  #define DONTFILL 1

The comment "If flags is true..." should be changed to "if the
DONTFILL flag is set..."  

@@ -202,7 +205,11 @@ static errcode_t load_buffer(ext2_file_t file, int dontfill)
 			} else
 				memset(file->buf, 0, fs->blocksize);
 		}
-		file->flags |= EXT2_FILE_BUF_VALID;
+		if (flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
+		    file->physblock == 0))
+			valid = 0;
+		if (valid)
+			file->flags |= EXT2_FILE_BUF_VALID;


valid is initialized to 1, and then set to zero above.  So I'm not
sure what the point is having the valid variable.  Why not do
something like this:

		if (!(flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
				      file->physblock == 0)))
			file->flags |= EXT2_FILE_BUF_VALID;

or...
		if (((flags != SEEK) ||
		     (!(ret_flags & BMAP_RET_UNINIT) && file->physblock)))
			file->flags |= EXT2_FILE_BUF_VALID;


Also, can you add a definition of what SEEK actually means in the
context of load_buffer()?

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zheng Liu - Jan. 1, 2013, 12:33 p.m.
On Mon, Dec 31, 2012 at 09:08:41PM -0500, Theodore Ts'o wrote:
> On Thu, Nov 15, 2012 at 04:46:13AM -0000, Zheng Liu wrote:
> > --- a/debugfs/dump.c
> > +++ b/debugfs/dump.c
> > @@ -105,10 +105,11 @@ static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
> >  {
> >  	errcode_t retval;
> >  	struct ext2_inode	inode;
> > -	char 		buf[8192];
> > +	char 		buf[current_fs->blocksize];
> 
> Note: this is a non-standard/non-portable GCC extension.  The best way
> to fix this is to explicitly malloc the buffer and then free it before
> dump_file exits.
> 
> Also, fixing the hard-coded maximum 8k block size, so these sorts of
> changes should be done in a separate commit.
> 
> >  extern errcode_t ext2fs_file_llseek(ext2_file_t file, __u64 offset,
> > diff --git a/lib/ext2fs/fileio.c b/lib/ext2fs/fileio.c
> > index 1f7002c..d25ebb4 100644
> > --- a/lib/ext2fs/fileio.c
> > +++ b/lib/ext2fs/fileio.c
> > @@ -176,23 +176,26 @@ static errcode_t sync_buffer_position(ext2_file_t file)
> >   * This function loads the file's block buffer with valid data from
> >   * the disk as necessary.
> >   *
> > - * If dontfill is true, then skip initializing the buffer since we're
> > + * If flags is true, then skip initializing the buffer since we're
> >   * going to be replacing its entire contents anyway.  If set, then the
> >   * function basically only sets file->physblock and EXT2_FILE_BUF_VALID
> >   */
> >  #define DONTFILL 1
> 
> The comment "If flags is true..." should be changed to "if the
> DONTFILL flag is set..."  
> 
> @@ -202,7 +205,11 @@ static errcode_t load_buffer(ext2_file_t file, int dontfill)
>  			} else
>  				memset(file->buf, 0, fs->blocksize);
>  		}
> -		file->flags |= EXT2_FILE_BUF_VALID;
> +		if (flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
> +		    file->physblock == 0))
> +			valid = 0;
> +		if (valid)
> +			file->flags |= EXT2_FILE_BUF_VALID;
> 
> 
> valid is initialized to 1, and then set to zero above.  So I'm not
> sure what the point is having the valid variable.  Why not do
> something like this:
> 
> 		if (!(flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
> 				      file->physblock == 0)))
> 			file->flags |= EXT2_FILE_BUF_VALID;
> 
> or...
> 		if (((flags != SEEK) ||
> 		     (!(ret_flags & BMAP_RET_UNINIT) && file->physblock)))
> 			file->flags |= EXT2_FILE_BUF_VALID;
> 
> 
> Also, can you add a definition of what SEEK actually means in the
> context of load_buffer()?

Hi Ted,

I have sent the latest patch series out.  Could you please review it?
Thanks for your time.

Regards,
                                                - Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
George Spelvin - Jan. 1, 2013, 8:10 p.m.
Theodore Ts'o <tytso@mit.edu> wrote:
> On Thu, Nov 15, 2012 at 04:46:13AM -0000, Zheng Liu wrote:
>> --- a/debugfs/dump.c
>> +++ b/debugfs/dump.c
>> @@ -105,10 +105,11 @@ static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
>>  {
>>  	errcode_t retval;
>>  	struct ext2_inode	inode;
>> -	char 		buf[8192];
>> +	char 		buf[current_fs->blocksize];

> Note: this is a non-standard/non-portable GCC extension.  The best way
> to fix this is to explicitly malloc the buffer and then free it before
> dump_file exits.

Er... isn't that also in C99?  That should be portable enough.
Is there an actual compiler of interest that doesn't support it?

Here's a set of C99 conformance tests:
http://p99.gforge.inria.fr/c99-conformance/

Gcc, clang, icc, opencc and pcc all support VLAs.
Only tcc lacks support.  Is tcc support an important goal?
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - Jan. 1, 2013, 8:57 p.m.
On Tue, Jan 01, 2013 at 03:10:12PM -0500, George Spelvin wrote:
> > Note: this is a non-standard/non-portable GCC extension.  The best way
> > to fix this is to explicitly malloc the buffer and then free it before
> > dump_file exits.
> 
> Er... isn't that also in C99?  That should be portable enough.
> Is there an actual compiler of interest that doesn't support it?

There are people who compile e2fsprogs under Windows (e.g. for FUSE
for Windows support), and MSVC does not support VLA's or C99 in
general In general I tend to be very conservative about what compiler
features are used in e2fsprogs, and I do care about portability beyond
just Linux systems.

Regards,

					- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
George Spelvin - Jan. 1, 2013, 9:25 p.m.
> There are people who compile e2fsprogs under Windows (e.g. for FUSE
> for Windows support), and MSVC does not support VLA's or C99 in
> general In general I tend to be very conservative about what compiler
> features are used in e2fsprogs, and I do care about portability beyond
> just Linux systems.

Ah, thank you!  I fully agree that's an important C compiler to
target; I had just erroneously assumed that commercial C compilers
would try to be "check-box feature complete" on such matters.

But apparently Microsoft don't support C99 and have no intention to:
http://herbsutter.com/2012/05/03/reader-qa-what-about-vc-and-c99/
http://visualstudio.uservoice.com/forums/121579-visual-studio/suggestions/2089423-c99-support
http://www.infoq.com/news/2012/05/vs_c99_support/

That's a good enough reason; my only remaining objection is calling it
"a non-standard/non-portable GCC extension".
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Theodore Ts'o - Jan. 1, 2013, 10:47 p.m.
On Tue, Jan 01, 2013 at 04:25:04PM -0500, George Spelvin wrote:
> 
> That's a good enough reason; my only remaining objection is calling it
> "a non-standard/non-portable GCC extension".

Fair enough.  Basically, e2fsprogs is targetting C89, and with the
exception of inline functions (which is optional; e2fsprogs will build
on compilers which don't handle inline functions, and it's only
recently that I switched us over to use C99 inline functions instead
of the old gcc's gnu89 inline declarations), as far as I know we're
not dependent on any C99 language features.

Because I tend to use a very conservative coding standard for
porability's sake, I sometimes lose track of what's actually allowed
by C99, and what's a GCC extension.

BTW, I'll note that in C11 (ISO/IEC 9899:2011), VLA's have been made
**optional**.  Hence, even for programs targetting C11-compliant
compilers, it's still not a good idea to use VLA's in your code if you
are striving for maximal portability.

Regards,

	      		      	    	 - Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/debugfs/dump.c b/debugfs/dump.c
index a15a0b7..013c0c8 100644
--- a/debugfs/dump.c
+++ b/debugfs/dump.c
@@ -105,10 +105,11 @@  static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
 {
 	errcode_t retval;
 	struct ext2_inode	inode;
-	char 		buf[8192];
+	char 		buf[current_fs->blocksize];
 	ext2_file_t	e2_file;
 	int		nbytes;
 	unsigned int	got;
+	ext2_off64_t	seek;
 
 	if (debugfs_read_inode(ino, &inode, cmdname))
 		return;
@@ -119,11 +120,22 @@  static void dump_file(const char *cmdname, ext2_ino_t ino, int fd,
 		return;
 	}
 	while (1) {
-		retval = ext2fs_file_read(e2_file, buf, sizeof(buf), &got);
+		if (fd == 1)
+			retval = ext2fs_file_read(e2_file, buf, sizeof(buf),
+						  &got);
+		else
+			retval = ext2fs_file_read2(e2_file, buf, sizeof(buf),
+						   &got, &seek);
 		if (retval)
 			com_err(cmdname, retval, "while reading ext2 file");
 		if (got == 0)
 			break;
+		if (fd != 1) {
+			nbytes = lseek64(fd, current_fs->blocksize * seek,
+					 SEEK_CUR);
+			if (nbytes < 0)
+				com_err(cmdname, errno, "while lseeking file");
+		}
 		nbytes = write(fd, buf, got);
 		if ((unsigned) nbytes != got)
 			com_err(cmdname, errno, "while writing file");
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 9148d4e..8576b1e 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1171,6 +1171,9 @@  extern errcode_t ext2fs_file_close(ext2_file_t file);
 extern errcode_t ext2fs_file_flush(ext2_file_t file);
 extern errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 				  unsigned int wanted, unsigned int *got);
+extern errcode_t ext2fs_file_read2(ext2_file_t file, void *buf,
+				  unsigned int wanted, unsigned int *got,
+				  ext2_off64_t *seek);
 extern errcode_t ext2fs_file_write(ext2_file_t file, const void *buf,
 				   unsigned int nbytes, unsigned int *written);
 extern errcode_t ext2fs_file_llseek(ext2_file_t file, __u64 offset,
diff --git a/lib/ext2fs/fileio.c b/lib/ext2fs/fileio.c
index 1f7002c..d25ebb4 100644
--- a/lib/ext2fs/fileio.c
+++ b/lib/ext2fs/fileio.c
@@ -176,23 +176,26 @@  static errcode_t sync_buffer_position(ext2_file_t file)
  * This function loads the file's block buffer with valid data from
  * the disk as necessary.
  *
- * If dontfill is true, then skip initializing the buffer since we're
+ * If flags is true, then skip initializing the buffer since we're
  * going to be replacing its entire contents anyway.  If set, then the
  * function basically only sets file->physblock and EXT2_FILE_BUF_VALID
  */
 #define DONTFILL 1
-static errcode_t load_buffer(ext2_file_t file, int dontfill)
+#define SEEK	 2
+static errcode_t load_buffer(ext2_file_t file, int flags)
 {
 	ext2_filsys	fs = file->fs;
 	errcode_t	retval;
+	int		ret_flags;
+	int		valid = 1;
 
 	if (!(file->flags & EXT2_FILE_BUF_VALID)) {
 		retval = ext2fs_bmap2(fs, file->ino, &file->inode,
-				     BMAP_BUFFER, 0, file->blockno, 0,
+				     BMAP_BUFFER, 0, file->blockno, &ret_flags,
 				     &file->physblock);
 		if (retval)
 			return retval;
-		if (!dontfill) {
+		if (flags != DONTFILL) {
 			if (file->physblock) {
 				retval = io_channel_read_blk(fs->io,
 							     file->physblock,
@@ -202,7 +205,11 @@  static errcode_t load_buffer(ext2_file_t file, int dontfill)
 			} else
 				memset(file->buf, 0, fs->blocksize);
 		}
-		file->flags |= EXT2_FILE_BUF_VALID;
+		if (flags == SEEK && ((ret_flags & BMAP_RET_UNINIT) ||
+		    file->physblock == 0))
+			valid = 0;
+		if (valid)
+			file->flags |= EXT2_FILE_BUF_VALID;
 	}
 	return 0;
 }
@@ -227,20 +234,33 @@  errcode_t ext2fs_file_close(ext2_file_t file)
 errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 			   unsigned int wanted, unsigned int *got)
 {
+	return ext2fs_file_read2(file, buf, wanted, got, 0);
+}
+
+
+errcode_t ext2fs_file_read2(ext2_file_t file, void *buf,
+			    unsigned int wanted, unsigned int *got,
+			    ext2_off64_t *seek)
+{
 	ext2_filsys	fs;
 	errcode_t	retval = 0;
 	unsigned int	start, c, count = 0;
 	__u64		left;
 	char		*ptr = (char *) buf;
+	int		seek_cnt = 0;
+	int		flags = 0;
 
 	EXT2_CHECK_MAGIC(file, EXT2_ET_MAGIC_EXT2_FILE);
 	fs = file->fs;
 
+	if (seek)
+		flags = SEEK;
+
 	while ((file->pos < EXT2_I_SIZE(&file->inode)) && (wanted > 0)) {
 		retval = sync_buffer_position(file);
 		if (retval)
 			goto fail;
-		retval = load_buffer(file, 0);
+		retval = load_buffer(file, flags);
 		if (retval)
 			goto fail;
 
@@ -248,20 +268,26 @@  errcode_t ext2fs_file_read(ext2_file_t file, void *buf,
 		c = fs->blocksize - start;
 		if (c > wanted)
 			c = wanted;
-		left = EXT2_I_SIZE(&file->inode) - file->pos ;
+		left = EXT2_I_SIZE(&file->inode) - file->pos;
 		if (c > left)
 			c = left;
 
-		memcpy(ptr, file->buf+start, c);
 		file->pos += c;
-		ptr += c;
-		count += c;
-		wanted -= c;
+		if (file->flags & EXT2_FILE_BUF_VALID) {
+			memcpy(ptr, file->buf+start, c);
+			ptr += c;
+			count += c;
+			wanted -= c;
+		} else {
+			seek_cnt++;
+		}
 	}
 
 fail:
 	if (got)
 		*got = count;
+	if (seek)
+		*seek = seek_cnt;
 	return retval;
 }