Patchwork [v3,17/18] ext4: make punch hole code path work with bigalloc

login
register
mail settings
Submitter Lukas Czerner
Date April 9, 2013, 9:14 a.m.
Message ID <1365498867-27782-18-git-send-email-lczerner@redhat.com>
Download mbox | patch
Permalink /patch/234994/
State Superseded
Headers show

Comments

Lukas Czerner - April 9, 2013, 9:14 a.m.
Currently punch hole is disabled in file systems with bigalloc
feature enabled. However the recent changes in punch hole patch should
make it easier to support punching holes on bigalloc enabled file
systems.

This commit changes partial_cluster handling in ext4_remove_blocks(),
ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently
partial_cluster is unsigned long long type and it makes sure that we
will free the partial cluster if all extents has been released from that
cluster. However it has been specifically designed only for truncate.

With punch hole we can be freeing just some extents in the cluster
leaving the rest untouched. So we have to make sure that we will notice
cluster which still has some extents. To do this I've changed
partial_cluster to be signed long long type. The only scenario where
this could be a problem is when cluster_size == block size, however in
that case there would not be any partial clusters so we're safe. For
bigger clusters the signed type is enough. Now we use the negative value
in partial_cluster to mark such cluster used, hence we know that we must
not free it even if all other extents has been freed from such cluster.

This scenario can be described in simple diagram:

|FFF...FF..FF.UUU|
 ^----------^
  punch hole

. - free space
| - cluster boundary
F - freed extent
U - used extent

Also update respective tracepoints to use signed long long type for
partial_cluster.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
---
 fs/ext4/extents.c           |   69 +++++++++++++++++++++++++++++++-----------
 include/trace/events/ext4.h |   25 ++++++++-------
 2 files changed, 64 insertions(+), 30 deletions(-)
Jan Kara - April 20, 2013, 1:42 p.m.
On Tue 09-04-13 11:14:26, Lukas Czerner wrote:
> Currently punch hole is disabled in file systems with bigalloc
> feature enabled. However the recent changes in punch hole patch should
> make it easier to support punching holes on bigalloc enabled file
> systems.
> 
> This commit changes partial_cluster handling in ext4_remove_blocks(),
> ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently
> partial_cluster is unsigned long long type and it makes sure that we
> will free the partial cluster if all extents has been released from that
> cluster. However it has been specifically designed only for truncate.
> 
> With punch hole we can be freeing just some extents in the cluster
> leaving the rest untouched. So we have to make sure that we will notice
> cluster which still has some extents. To do this I've changed
> partial_cluster to be signed long long type. The only scenario where
> this could be a problem is when cluster_size == block size, however in
> that case there would not be any partial clusters so we're safe. For
> bigger clusters the signed type is enough. Now we use the negative value
> in partial_cluster to mark such cluster used, hence we know that we must
> not free it even if all other extents has been freed from such cluster.
> 
> This scenario can be described in simple diagram:
> 
> |FFF...FF..FF.UUU|
>  ^----------^
>   punch hole
> 
> . - free space
> | - cluster boundary
> F - freed extent
> U - used extent
> 
> Also update respective tracepoints to use signed long long type for
> partial_cluster.
  The patch looks OK. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>

  Just a minor nit - sometimes you use 'signed long long', sometimes 'long
long int', sometimes just 'long long'. In kernel we tend to always use just
'long long' so it would be good to clean that up.

								Honza
> 
> Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> ---
>  fs/ext4/extents.c           |   69 +++++++++++++++++++++++++++++++-----------
>  include/trace/events/ext4.h |   25 ++++++++-------
>  2 files changed, 64 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 9023b76..577c4f5 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2362,7 +2362,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
>  
>  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
>  			      struct ext4_extent *ex,
> -			      ext4_fsblk_t *partial_cluster,
> +			      signed long long *partial_cluster,
>  			      ext4_lblk_t from, ext4_lblk_t to)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> @@ -2391,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
>  	 * partial cluster here.
>  	 */
>  	pblk = ext4_ext_pblock(ex) + ee_len - 1;
> -	if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
> +	if ((*partial_cluster > 0) &&
> +	    (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
>  		ext4_free_blocks(handle, inode, NULL,
>  				 EXT4_C2B(sbi, *partial_cluster),
>  				 sbi->s_cluster_ratio, flags);
> @@ -2417,23 +2418,41 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
>  	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
>  		/* tail removal */
>  		ext4_lblk_t num;
> +		unsigned int unaligned;
>  
>  		num = le32_to_cpu(ex->ee_block) + ee_len - from;
>  		pblk = ext4_ext_pblock(ex) + ee_len - num;
> -		ext_debug("free last %u blocks starting %llu\n", num, pblk);
> +		/*
> +		 * Usually we want to free partial cluster at the end of the
> +		 * extent, except for the situation when the cluster is still
> +		 * used by any other extent (partial_cluster is negative).
> +		 */
> +		if (*partial_cluster < 0 &&
> +		    -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
> +			flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
> +
> +		ext_debug("free last %u blocks starting %llu partial %lld\n",
> +			  num, pblk, *partial_cluster);
>  		ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
>  		/*
>  		 * If the block range to be freed didn't start at the
>  		 * beginning of a cluster, and we removed the entire
> -		 * extent, save the partial cluster here, since we
> -		 * might need to delete if we determine that the
> -		 * truncate operation has removed all of the blocks in
> -		 * the cluster.
> +		 * extent and the cluster is not used by any other extent,
> +		 * save the partial cluster here, since we might need to
> +		 * delete if we determine that the truncate operation has
> +		 * removed all of the blocks in the cluster.
> +		 *
> +		 * On the other hand, if we did not manage to free the whole
> +		 * extent, we have to mark the cluster as used (store negative
> +		 * cluster number in partial_cluster).
>  		 */
> -		if (pblk & (sbi->s_cluster_ratio - 1) &&
> -		    (ee_len == num))
> +		unaligned = pblk & (sbi->s_cluster_ratio - 1);
> +		if (unaligned && (ee_len == num) &&
> +		    (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
>  			*partial_cluster = EXT4_B2C(sbi, pblk);
> -		else
> +		else if (unaligned)
> +			*partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
> +		else if (*partial_cluster > 0)
>  			*partial_cluster = 0;
>  	} else
>  		ext4_error(sbi->s_sb, "strange request: removal(2) "
> @@ -2451,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
>   * @handle: The journal handle
>   * @inode:  The files inode
>   * @path:   The path to the leaf
> + * @partial_cluster: The cluster which we'll have to free if all extents
> + *                   has been released from it. It gets negative in case
> + *                   that the cluster is still used.
>   * @start:  The first block to remove
>   * @end:   The last block to remove
>   */
>  static int
>  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> -		 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
> +		 struct ext4_ext_path *path,
> +		 signed long long *partial_cluster,
>  		 ext4_lblk_t start, ext4_lblk_t end)
>  {
>  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> @@ -2469,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>  	unsigned short ex_ee_len;
>  	unsigned uninitialized = 0;
>  	struct ext4_extent *ex;
> +	ext4_fsblk_t pblk;
>  
>  	/* the header must be checked already in ext4_ext_remove_space() */
>  	ext_debug("truncate since %u in leaf to %u\n", start, end);
> @@ -2507,6 +2531,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>  
>  		/* If this extent is beyond the end of the hole, skip it */
>  		if (end < ex_ee_block) {
> +			/*
> +			 * We're going to skip this extent and move to another,
> +			 * so if this extent is not cluster aligned we have
> +			 * to mark the current cluster as used to avoid
> +			 * accidentally freeing it later on
> +			 */
> +			pblk = ext4_ext_pblock(ex);
> +			if (pblk & (sbi->s_cluster_ratio - 1))
> +				*partial_cluster =
> +					-((long long)EXT4_B2C(sbi, pblk));
>  			ex--;
>  			ex_ee_block = le32_to_cpu(ex->ee_block);
>  			ex_ee_len = ext4_ext_get_actual_len(ex);
> @@ -2582,7 +2616,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>  					sizeof(struct ext4_extent));
>  			}
>  			le16_add_cpu(&eh->eh_entries, -1);
> -		} else
> +		} else if (*partial_cluster > 0)
>  			*partial_cluster = 0;
>  
>  		err = ext4_ext_dirty(handle, inode, path + depth);
> @@ -2600,11 +2634,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
>  		err = ext4_ext_correct_indexes(handle, inode, path);
>  
>  	/*
> -	 * If there is still a entry in the leaf node, check to see if
> -	 * it references the partial cluster.  This is the only place
> -	 * where it could; if it doesn't, we can free the cluster.
> +	 * Free the partial cluster only if the current extent does not
> +	 * reference it. Otherwise we might free used cluster.
>  	 */
> -	if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
> +	if (*partial_cluster > 0 &&
>  	    (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
>  	     *partial_cluster)) {
>  		int flags = EXT4_FREE_BLOCKS_FORGET;
> @@ -2654,7 +2687,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
>  	struct super_block *sb = inode->i_sb;
>  	int depth = ext_depth(inode);
>  	struct ext4_ext_path *path = NULL;
> -	ext4_fsblk_t partial_cluster = 0;
> +	signed long long partial_cluster = 0;
>  	handle_t *handle;
>  	int i = 0, err = 0;
>  
> @@ -2838,7 +2871,7 @@ again:
>  	/* If we still have something in the partial cluster and we have removed
>  	 * even the first extent, then we should free the blocks in the partial
>  	 * cluster as well. */
> -	if (partial_cluster && path->p_hdr->eh_entries == 0) {
> +	if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
>  		int flags = EXT4_FREE_BLOCKS_FORGET;
>  
>  		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index c92500c..5028d05 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -1928,7 +1928,7 @@ TRACE_EVENT(ext4_ext_show_extent,
>  TRACE_EVENT(ext4_remove_blocks,
>  	    TP_PROTO(struct inode *inode, struct ext4_extent *ex,
>  		ext4_lblk_t from, ext4_fsblk_t to,
> -		ext4_fsblk_t partial_cluster),
> +		long long int partial_cluster),
>  
>  	TP_ARGS(inode, ex, from, to, partial_cluster),
>  
> @@ -1937,7 +1937,7 @@ TRACE_EVENT(ext4_remove_blocks,
>  		__field(	ino_t,		ino	)
>  		__field(	ext4_lblk_t,	from	)
>  		__field(	ext4_lblk_t,	to	)
> -		__field(	ext4_fsblk_t,	partial	)
> +		__field(	long long int,	partial	)
>  		__field(	ext4_fsblk_t,	ee_pblk	)
>  		__field(	ext4_lblk_t,	ee_lblk	)
>  		__field(	unsigned short,	ee_len	)
> @@ -1955,7 +1955,7 @@ TRACE_EVENT(ext4_remove_blocks,
>  	),
>  
>  	TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
> -		  "from %u to %u partial_cluster %u",
> +		  "from %u to %u partial_cluster %lld",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long) __entry->ino,
>  		  (unsigned) __entry->ee_lblk,
> @@ -1963,19 +1963,20 @@ TRACE_EVENT(ext4_remove_blocks,
>  		  (unsigned short) __entry->ee_len,
>  		  (unsigned) __entry->from,
>  		  (unsigned) __entry->to,
> -		  (unsigned) __entry->partial)
> +		  (long long int) __entry->partial)
>  );
>  
>  TRACE_EVENT(ext4_ext_rm_leaf,
>  	TP_PROTO(struct inode *inode, ext4_lblk_t start,
> -		 struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
> +		 struct ext4_extent *ex,
> +		 long long int partial_cluster),
>  
>  	TP_ARGS(inode, start, ex, partial_cluster),
>  
>  	TP_STRUCT__entry(
>  		__field(	dev_t,		dev	)
>  		__field(	ino_t,		ino	)
> -		__field(	ext4_fsblk_t,	partial	)
> +		__field(	long long int,	partial	)
>  		__field(	ext4_lblk_t,	start	)
>  		__field(	ext4_lblk_t,	ee_lblk	)
>  		__field(	ext4_fsblk_t,	ee_pblk	)
> @@ -1993,14 +1994,14 @@ TRACE_EVENT(ext4_ext_rm_leaf,
>  	),
>  
>  	TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
> -		  "partial_cluster %u",
> +		  "partial_cluster %lld",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long) __entry->ino,
>  		  (unsigned) __entry->start,
>  		  (unsigned) __entry->ee_lblk,
>  		  (unsigned long long) __entry->ee_pblk,
>  		  (unsigned short) __entry->ee_len,
> -		  (unsigned) __entry->partial)
> +		  (long long int) __entry->partial)
>  );
>  
>  TRACE_EVENT(ext4_ext_rm_idx,
> @@ -2058,7 +2059,7 @@ TRACE_EVENT(ext4_ext_remove_space,
>  
>  TRACE_EVENT(ext4_ext_remove_space_done,
>  	TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
> -		 int depth, ext4_lblk_t partial, unsigned short eh_entries),
> +		 int depth, long long int partial, unsigned short eh_entries),
>  
>  	TP_ARGS(inode, start, end, depth, partial, eh_entries),
>  
> @@ -2068,7 +2069,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
>  		__field(	ext4_lblk_t,	start		)
>  		__field(	ext4_lblk_t,	end		)
>  		__field(	int,		depth		)
> -		__field(	ext4_lblk_t,	partial		)
> +		__field(	long long int,	partial		)
>  		__field(	unsigned short,	eh_entries	)
>  	),
>  
> @@ -2082,14 +2083,14 @@ TRACE_EVENT(ext4_ext_remove_space_done,
>  		__entry->eh_entries	= eh_entries;
>  	),
>  
> -	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %u "
> +	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %lld "
>  		  "remaining_entries %u",
>  		  MAJOR(__entry->dev), MINOR(__entry->dev),
>  		  (unsigned long) __entry->ino,
>  		  (unsigned) __entry->start,
>  		  (unsigned) __entry->end,
>  		  __entry->depth,
> -		  (unsigned) __entry->partial,
> +		  (long long int) __entry->partial,
>  		  (unsigned short) __entry->eh_entries)
>  );
>  
> -- 
> 1.7.7.6
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zheng Liu - April 23, 2013, 9:19 a.m.
On Sat, Apr 20, 2013 at 03:42:41PM +0200, Jan Kara wrote:
> On Tue 09-04-13 11:14:26, Lukas Czerner wrote:
> > Currently punch hole is disabled in file systems with bigalloc
> > feature enabled. However the recent changes in punch hole patch should
> > make it easier to support punching holes on bigalloc enabled file
> > systems.
> > 
> > This commit changes partial_cluster handling in ext4_remove_blocks(),
> > ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently
> > partial_cluster is unsigned long long type and it makes sure that we
> > will free the partial cluster if all extents has been released from that
> > cluster. However it has been specifically designed only for truncate.
> > 
> > With punch hole we can be freeing just some extents in the cluster
> > leaving the rest untouched. So we have to make sure that we will notice
> > cluster which still has some extents. To do this I've changed
> > partial_cluster to be signed long long type. The only scenario where
> > this could be a problem is when cluster_size == block size, however in
> > that case there would not be any partial clusters so we're safe. For
> > bigger clusters the signed type is enough. Now we use the negative value
> > in partial_cluster to mark such cluster used, hence we know that we must
> > not free it even if all other extents has been freed from such cluster.
> > 
> > This scenario can be described in simple diagram:
> > 
> > |FFF...FF..FF.UUU|
> >  ^----------^
> >   punch hole
> > 
> > . - free space
> > | - cluster boundary
> > F - freed extent
> > U - used extent
> > 
> > Also update respective tracepoints to use signed long long type for
> > partial_cluster.
>   The patch looks OK. You can add:
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
>   Just a minor nit - sometimes you use 'signed long long', sometimes 'long
> long int', sometimes just 'long long'. In kernel we tend to always use just
> 'long long' so it would be good to clean that up.

Another question is that in patch 01/18 invalidatepage signature is
changed from
  int (*invalidatepage) (struct page *, unsigned long);
to
  void (*invalidatepage) (struct page *, unsigned int, unsigned int);

The argument type is changed from 'unsigned long' to 'unsigned int'.  My
question is why we need to change it.

Thanks,
                                                - Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Czerner - April 24, 2013, 10:57 a.m.
On Sat, 20 Apr 2013, Jan Kara wrote:

> Date: Sat, 20 Apr 2013 15:42:41 +0200
> From: Jan Kara <jack@suse.cz>
> To: Lukas Czerner <lczerner@redhat.com>
> Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
>     linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org
> Subject: Re: [PATCH v3 17/18] ext4: make punch hole code path work with
>     bigalloc
> 
> On Tue 09-04-13 11:14:26, Lukas Czerner wrote:
> > Currently punch hole is disabled in file systems with bigalloc
> > feature enabled. However the recent changes in punch hole patch should
> > make it easier to support punching holes on bigalloc enabled file
> > systems.
> > 
> > This commit changes partial_cluster handling in ext4_remove_blocks(),
> > ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently
> > partial_cluster is unsigned long long type and it makes sure that we
> > will free the partial cluster if all extents has been released from that
> > cluster. However it has been specifically designed only for truncate.
> > 
> > With punch hole we can be freeing just some extents in the cluster
> > leaving the rest untouched. So we have to make sure that we will notice
> > cluster which still has some extents. To do this I've changed
> > partial_cluster to be signed long long type. The only scenario where
> > this could be a problem is when cluster_size == block size, however in
> > that case there would not be any partial clusters so we're safe. For
> > bigger clusters the signed type is enough. Now we use the negative value
> > in partial_cluster to mark such cluster used, hence we know that we must
> > not free it even if all other extents has been freed from such cluster.
> > 
> > This scenario can be described in simple diagram:
> > 
> > |FFF...FF..FF.UUU|
> >  ^----------^
> >   punch hole
> > 
> > . - free space
> > | - cluster boundary
> > F - freed extent
> > U - used extent
> > 
> > Also update respective tracepoints to use signed long long type for
> > partial_cluster.
>   The patch looks OK. You can add:
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
>   Just a minor nit - sometimes you use 'signed long long', sometimes 'long
> long int', sometimes just 'long long'. In kernel we tend to always use just
> 'long long' so it would be good to clean that up.

Sure, I can do that.

Thanks!
-Lukas

> 
> 								Honza
> > 
> > Signed-off-by: Lukas Czerner <lczerner@redhat.com>
> > ---
> >  fs/ext4/extents.c           |   69 +++++++++++++++++++++++++++++++-----------
> >  include/trace/events/ext4.h |   25 ++++++++-------
> >  2 files changed, 64 insertions(+), 30 deletions(-)
> > 
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index 9023b76..577c4f5 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -2362,7 +2362,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
> >  
> >  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
> >  			      struct ext4_extent *ex,
> > -			      ext4_fsblk_t *partial_cluster,
> > +			      signed long long *partial_cluster,
> >  			      ext4_lblk_t from, ext4_lblk_t to)
> >  {
> >  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> > @@ -2391,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
> >  	 * partial cluster here.
> >  	 */
> >  	pblk = ext4_ext_pblock(ex) + ee_len - 1;
> > -	if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
> > +	if ((*partial_cluster > 0) &&
> > +	    (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
> >  		ext4_free_blocks(handle, inode, NULL,
> >  				 EXT4_C2B(sbi, *partial_cluster),
> >  				 sbi->s_cluster_ratio, flags);
> > @@ -2417,23 +2418,41 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
> >  	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
> >  		/* tail removal */
> >  		ext4_lblk_t num;
> > +		unsigned int unaligned;
> >  
> >  		num = le32_to_cpu(ex->ee_block) + ee_len - from;
> >  		pblk = ext4_ext_pblock(ex) + ee_len - num;
> > -		ext_debug("free last %u blocks starting %llu\n", num, pblk);
> > +		/*
> > +		 * Usually we want to free partial cluster at the end of the
> > +		 * extent, except for the situation when the cluster is still
> > +		 * used by any other extent (partial_cluster is negative).
> > +		 */
> > +		if (*partial_cluster < 0 &&
> > +		    -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
> > +			flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
> > +
> > +		ext_debug("free last %u blocks starting %llu partial %lld\n",
> > +			  num, pblk, *partial_cluster);
> >  		ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
> >  		/*
> >  		 * If the block range to be freed didn't start at the
> >  		 * beginning of a cluster, and we removed the entire
> > -		 * extent, save the partial cluster here, since we
> > -		 * might need to delete if we determine that the
> > -		 * truncate operation has removed all of the blocks in
> > -		 * the cluster.
> > +		 * extent and the cluster is not used by any other extent,
> > +		 * save the partial cluster here, since we might need to
> > +		 * delete if we determine that the truncate operation has
> > +		 * removed all of the blocks in the cluster.
> > +		 *
> > +		 * On the other hand, if we did not manage to free the whole
> > +		 * extent, we have to mark the cluster as used (store negative
> > +		 * cluster number in partial_cluster).
> >  		 */
> > -		if (pblk & (sbi->s_cluster_ratio - 1) &&
> > -		    (ee_len == num))
> > +		unaligned = pblk & (sbi->s_cluster_ratio - 1);
> > +		if (unaligned && (ee_len == num) &&
> > +		    (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
> >  			*partial_cluster = EXT4_B2C(sbi, pblk);
> > -		else
> > +		else if (unaligned)
> > +			*partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
> > +		else if (*partial_cluster > 0)
> >  			*partial_cluster = 0;
> >  	} else
> >  		ext4_error(sbi->s_sb, "strange request: removal(2) "
> > @@ -2451,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
> >   * @handle: The journal handle
> >   * @inode:  The files inode
> >   * @path:   The path to the leaf
> > + * @partial_cluster: The cluster which we'll have to free if all extents
> > + *                   has been released from it. It gets negative in case
> > + *                   that the cluster is still used.
> >   * @start:  The first block to remove
> >   * @end:   The last block to remove
> >   */
> >  static int
> >  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> > -		 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
> > +		 struct ext4_ext_path *path,
> > +		 signed long long *partial_cluster,
> >  		 ext4_lblk_t start, ext4_lblk_t end)
> >  {
> >  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> > @@ -2469,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> >  	unsigned short ex_ee_len;
> >  	unsigned uninitialized = 0;
> >  	struct ext4_extent *ex;
> > +	ext4_fsblk_t pblk;
> >  
> >  	/* the header must be checked already in ext4_ext_remove_space() */
> >  	ext_debug("truncate since %u in leaf to %u\n", start, end);
> > @@ -2507,6 +2531,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> >  
> >  		/* If this extent is beyond the end of the hole, skip it */
> >  		if (end < ex_ee_block) {
> > +			/*
> > +			 * We're going to skip this extent and move to another,
> > +			 * so if this extent is not cluster aligned we have
> > +			 * to mark the current cluster as used to avoid
> > +			 * accidentally freeing it later on
> > +			 */
> > +			pblk = ext4_ext_pblock(ex);
> > +			if (pblk & (sbi->s_cluster_ratio - 1))
> > +				*partial_cluster =
> > +					-((long long)EXT4_B2C(sbi, pblk));
> >  			ex--;
> >  			ex_ee_block = le32_to_cpu(ex->ee_block);
> >  			ex_ee_len = ext4_ext_get_actual_len(ex);
> > @@ -2582,7 +2616,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> >  					sizeof(struct ext4_extent));
> >  			}
> >  			le16_add_cpu(&eh->eh_entries, -1);
> > -		} else
> > +		} else if (*partial_cluster > 0)
> >  			*partial_cluster = 0;
> >  
> >  		err = ext4_ext_dirty(handle, inode, path + depth);
> > @@ -2600,11 +2634,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
> >  		err = ext4_ext_correct_indexes(handle, inode, path);
> >  
> >  	/*
> > -	 * If there is still a entry in the leaf node, check to see if
> > -	 * it references the partial cluster.  This is the only place
> > -	 * where it could; if it doesn't, we can free the cluster.
> > +	 * Free the partial cluster only if the current extent does not
> > +	 * reference it. Otherwise we might free used cluster.
> >  	 */
> > -	if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
> > +	if (*partial_cluster > 0 &&
> >  	    (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
> >  	     *partial_cluster)) {
> >  		int flags = EXT4_FREE_BLOCKS_FORGET;
> > @@ -2654,7 +2687,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
> >  	struct super_block *sb = inode->i_sb;
> >  	int depth = ext_depth(inode);
> >  	struct ext4_ext_path *path = NULL;
> > -	ext4_fsblk_t partial_cluster = 0;
> > +	signed long long partial_cluster = 0;
> >  	handle_t *handle;
> >  	int i = 0, err = 0;
> >  
> > @@ -2838,7 +2871,7 @@ again:
> >  	/* If we still have something in the partial cluster and we have removed
> >  	 * even the first extent, then we should free the blocks in the partial
> >  	 * cluster as well. */
> > -	if (partial_cluster && path->p_hdr->eh_entries == 0) {
> > +	if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
> >  		int flags = EXT4_FREE_BLOCKS_FORGET;
> >  
> >  		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> > index c92500c..5028d05 100644
> > --- a/include/trace/events/ext4.h
> > +++ b/include/trace/events/ext4.h
> > @@ -1928,7 +1928,7 @@ TRACE_EVENT(ext4_ext_show_extent,
> >  TRACE_EVENT(ext4_remove_blocks,
> >  	    TP_PROTO(struct inode *inode, struct ext4_extent *ex,
> >  		ext4_lblk_t from, ext4_fsblk_t to,
> > -		ext4_fsblk_t partial_cluster),
> > +		long long int partial_cluster),
> >  
> >  	TP_ARGS(inode, ex, from, to, partial_cluster),
> >  
> > @@ -1937,7 +1937,7 @@ TRACE_EVENT(ext4_remove_blocks,
> >  		__field(	ino_t,		ino	)
> >  		__field(	ext4_lblk_t,	from	)
> >  		__field(	ext4_lblk_t,	to	)
> > -		__field(	ext4_fsblk_t,	partial	)
> > +		__field(	long long int,	partial	)
> >  		__field(	ext4_fsblk_t,	ee_pblk	)
> >  		__field(	ext4_lblk_t,	ee_lblk	)
> >  		__field(	unsigned short,	ee_len	)
> > @@ -1955,7 +1955,7 @@ TRACE_EVENT(ext4_remove_blocks,
> >  	),
> >  
> >  	TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
> > -		  "from %u to %u partial_cluster %u",
> > +		  "from %u to %u partial_cluster %lld",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long) __entry->ino,
> >  		  (unsigned) __entry->ee_lblk,
> > @@ -1963,19 +1963,20 @@ TRACE_EVENT(ext4_remove_blocks,
> >  		  (unsigned short) __entry->ee_len,
> >  		  (unsigned) __entry->from,
> >  		  (unsigned) __entry->to,
> > -		  (unsigned) __entry->partial)
> > +		  (long long int) __entry->partial)
> >  );
> >  
> >  TRACE_EVENT(ext4_ext_rm_leaf,
> >  	TP_PROTO(struct inode *inode, ext4_lblk_t start,
> > -		 struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
> > +		 struct ext4_extent *ex,
> > +		 long long int partial_cluster),
> >  
> >  	TP_ARGS(inode, start, ex, partial_cluster),
> >  
> >  	TP_STRUCT__entry(
> >  		__field(	dev_t,		dev	)
> >  		__field(	ino_t,		ino	)
> > -		__field(	ext4_fsblk_t,	partial	)
> > +		__field(	long long int,	partial	)
> >  		__field(	ext4_lblk_t,	start	)
> >  		__field(	ext4_lblk_t,	ee_lblk	)
> >  		__field(	ext4_fsblk_t,	ee_pblk	)
> > @@ -1993,14 +1994,14 @@ TRACE_EVENT(ext4_ext_rm_leaf,
> >  	),
> >  
> >  	TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
> > -		  "partial_cluster %u",
> > +		  "partial_cluster %lld",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long) __entry->ino,
> >  		  (unsigned) __entry->start,
> >  		  (unsigned) __entry->ee_lblk,
> >  		  (unsigned long long) __entry->ee_pblk,
> >  		  (unsigned short) __entry->ee_len,
> > -		  (unsigned) __entry->partial)
> > +		  (long long int) __entry->partial)
> >  );
> >  
> >  TRACE_EVENT(ext4_ext_rm_idx,
> > @@ -2058,7 +2059,7 @@ TRACE_EVENT(ext4_ext_remove_space,
> >  
> >  TRACE_EVENT(ext4_ext_remove_space_done,
> >  	TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
> > -		 int depth, ext4_lblk_t partial, unsigned short eh_entries),
> > +		 int depth, long long int partial, unsigned short eh_entries),
> >  
> >  	TP_ARGS(inode, start, end, depth, partial, eh_entries),
> >  
> > @@ -2068,7 +2069,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
> >  		__field(	ext4_lblk_t,	start		)
> >  		__field(	ext4_lblk_t,	end		)
> >  		__field(	int,		depth		)
> > -		__field(	ext4_lblk_t,	partial		)
> > +		__field(	long long int,	partial		)
> >  		__field(	unsigned short,	eh_entries	)
> >  	),
> >  
> > @@ -2082,14 +2083,14 @@ TRACE_EVENT(ext4_ext_remove_space_done,
> >  		__entry->eh_entries	= eh_entries;
> >  	),
> >  
> > -	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %u "
> > +	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %lld "
> >  		  "remaining_entries %u",
> >  		  MAJOR(__entry->dev), MINOR(__entry->dev),
> >  		  (unsigned long) __entry->ino,
> >  		  (unsigned) __entry->start,
> >  		  (unsigned) __entry->end,
> >  		  __entry->depth,
> > -		  (unsigned) __entry->partial,
> > +		  (long long int) __entry->partial,
> >  		  (unsigned short) __entry->eh_entries)
> >  );
> >  
> > -- 
> > 1.7.7.6
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lukas Czerner - April 24, 2013, 11:08 a.m.
On Tue, 23 Apr 2013, Zheng Liu wrote:

> Date: Tue, 23 Apr 2013 17:19:28 +0800
> From: Zheng Liu <gnehzuil.liu@gmail.com>
> To: Jan Kara <jack@suse.cz>
> Cc: Lukas Czerner <lczerner@redhat.com>, linux-mm@kvack.org,
>     linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
>     linux-ext4@vger.kernel.org
> Subject: Re: [PATCH v3 17/18] ext4: make punch hole code path work with
>     bigalloc
> 
> On Sat, Apr 20, 2013 at 03:42:41PM +0200, Jan Kara wrote:
> > On Tue 09-04-13 11:14:26, Lukas Czerner wrote:
> > > Currently punch hole is disabled in file systems with bigalloc
> > > feature enabled. However the recent changes in punch hole patch should
> > > make it easier to support punching holes on bigalloc enabled file
> > > systems.
> > > 
> > > This commit changes partial_cluster handling in ext4_remove_blocks(),
> > > ext4_ext_rm_leaf() and ext4_ext_remove_space(). Currently
> > > partial_cluster is unsigned long long type and it makes sure that we
> > > will free the partial cluster if all extents has been released from that
> > > cluster. However it has been specifically designed only for truncate.
> > > 
> > > With punch hole we can be freeing just some extents in the cluster
> > > leaving the rest untouched. So we have to make sure that we will notice
> > > cluster which still has some extents. To do this I've changed
> > > partial_cluster to be signed long long type. The only scenario where
> > > this could be a problem is when cluster_size == block size, however in
> > > that case there would not be any partial clusters so we're safe. For
> > > bigger clusters the signed type is enough. Now we use the negative value
> > > in partial_cluster to mark such cluster used, hence we know that we must
> > > not free it even if all other extents has been freed from such cluster.
> > > 
> > > This scenario can be described in simple diagram:
> > > 
> > > |FFF...FF..FF.UUU|
> > >  ^----------^
> > >   punch hole
> > > 
> > > . - free space
> > > | - cluster boundary
> > > F - freed extent
> > > U - used extent
> > > 
> > > Also update respective tracepoints to use signed long long type for
> > > partial_cluster.
> >   The patch looks OK. You can add:
> > Reviewed-by: Jan Kara <jack@suse.cz>
> > 
> >   Just a minor nit - sometimes you use 'signed long long', sometimes 'long
> > long int', sometimes just 'long long'. In kernel we tend to always use just
> > 'long long' so it would be good to clean that up.
> 
> Another question is that in patch 01/18 invalidatepage signature is
> changed from
>   int (*invalidatepage) (struct page *, unsigned long);
> to
>   void (*invalidatepage) (struct page *, unsigned int, unsigned int);
> 
> The argument type is changed from 'unsigned long' to 'unsigned int'.  My
> question is why we need to change it.
> 
> Thanks,
>                                                 - Zheng
> 

Hi Zheng,

this was changed on Hugh Dickins request because it makes it clearer
that those args are indeed intended to be offsets within a page
(0..PAGE_CACHE_SIZE).

Even though PAGE_CACHE_SIZE can be defined as unsigned long, this is
only for convenience. Here is quote from Hugh:

  "
  They would be defined as unsigned long so that they can be used in
  masks like ~(PAGE_SIZE - 1), and behave as expected on addresses,
  without needing casts to be added all over.

  We do not (currently!) expect PAGE_SIZE or PAGE_CACHE_SIZE to grow
  beyond an unsigned int - but indeed they can be larger than what's
  held in an unsigned short (look no further than ia64 or ppc64).

  For more reassurance, see include/linux/highmem.h, which declares
  zero_user_segments() and others: unsigned int (well, unsigned with
  the int implicit) for offsets within a page.

  Hugh
  "

I should probably mention that in the description.

Thanks!
-Lukas
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Zheng Liu - April 24, 2013, 11:29 a.m.
On Wed, Apr 24, 2013 at 01:08:17PM +0200, Lukáš Czerner wrote:
> On Tue, 23 Apr 2013, Zheng Liu wrote:
[snip]
> > > > Also update respective tracepoints to use signed long long type for
> > > > partial_cluster.
> > >   The patch looks OK. You can add:
> > > Reviewed-by: Jan Kara <jack@suse.cz>
> > > 
> > >   Just a minor nit - sometimes you use 'signed long long', sometimes 'long
> > > long int', sometimes just 'long long'. In kernel we tend to always use just
> > > 'long long' so it would be good to clean that up.
> > 
> > Another question is that in patch 01/18 invalidatepage signature is
> > changed from
> >   int (*invalidatepage) (struct page *, unsigned long);
> > to
> >   void (*invalidatepage) (struct page *, unsigned int, unsigned int);
> > 
> > The argument type is changed from 'unsigned long' to 'unsigned int'.  My
> > question is why we need to change it.
> > 
> > Thanks,
> >                                                 - Zheng
> > 
> 
> Hi Zheng,
> 
> this was changed on Hugh Dickins request because it makes it clearer
> that those args are indeed intended to be offsets within a page
> (0..PAGE_CACHE_SIZE).
> 
> Even though PAGE_CACHE_SIZE can be defined as unsigned long, this is
> only for convenience. Here is quote from Hugh:
> 
>   "
>   They would be defined as unsigned long so that they can be used in
>   masks like ~(PAGE_SIZE - 1), and behave as expected on addresses,
>   without needing casts to be added all over.
> 
>   We do not (currently!) expect PAGE_SIZE or PAGE_CACHE_SIZE to grow
>   beyond an unsigned int - but indeed they can be larger than what's
>   held in an unsigned short (look no further than ia64 or ppc64).
> 
>   For more reassurance, see include/linux/highmem.h, which declares
>   zero_user_segments() and others: unsigned int (well, unsigned with
>   the int implicit) for offsets within a page.
> 
>   Hugh
>   "
> 
> I should probably mention that in the description.

Ah, thanks for your explanation.  I must miss something. :-(

Regards,
                                                - Zheng
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9023b76..577c4f5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2362,7 +2362,7 @@  int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 
 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 			      struct ext4_extent *ex,
-			      ext4_fsblk_t *partial_cluster,
+			      signed long long *partial_cluster,
 			      ext4_lblk_t from, ext4_lblk_t to)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2391,7 +2391,8 @@  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	 * partial cluster here.
 	 */
 	pblk = ext4_ext_pblock(ex) + ee_len - 1;
-	if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
+	if ((*partial_cluster > 0) &&
+	    (EXT4_B2C(sbi, pblk) != *partial_cluster)) {
 		ext4_free_blocks(handle, inode, NULL,
 				 EXT4_C2B(sbi, *partial_cluster),
 				 sbi->s_cluster_ratio, flags);
@@ -2417,23 +2418,41 @@  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	    && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
 		/* tail removal */
 		ext4_lblk_t num;
+		unsigned int unaligned;
 
 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
 		pblk = ext4_ext_pblock(ex) + ee_len - num;
-		ext_debug("free last %u blocks starting %llu\n", num, pblk);
+		/*
+		 * Usually we want to free partial cluster at the end of the
+		 * extent, except for the situation when the cluster is still
+		 * used by any other extent (partial_cluster is negative).
+		 */
+		if (*partial_cluster < 0 &&
+		    -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1))
+			flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
+
+		ext_debug("free last %u blocks starting %llu partial %lld\n",
+			  num, pblk, *partial_cluster);
 		ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
 		/*
 		 * If the block range to be freed didn't start at the
 		 * beginning of a cluster, and we removed the entire
-		 * extent, save the partial cluster here, since we
-		 * might need to delete if we determine that the
-		 * truncate operation has removed all of the blocks in
-		 * the cluster.
+		 * extent and the cluster is not used by any other extent,
+		 * save the partial cluster here, since we might need to
+		 * delete if we determine that the truncate operation has
+		 * removed all of the blocks in the cluster.
+		 *
+		 * On the other hand, if we did not manage to free the whole
+		 * extent, we have to mark the cluster as used (store negative
+		 * cluster number in partial_cluster).
 		 */
-		if (pblk & (sbi->s_cluster_ratio - 1) &&
-		    (ee_len == num))
+		unaligned = pblk & (sbi->s_cluster_ratio - 1);
+		if (unaligned && (ee_len == num) &&
+		    (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk))))
 			*partial_cluster = EXT4_B2C(sbi, pblk);
-		else
+		else if (unaligned)
+			*partial_cluster = -((long long)EXT4_B2C(sbi, pblk));
+		else if (*partial_cluster > 0)
 			*partial_cluster = 0;
 	} else
 		ext4_error(sbi->s_sb, "strange request: removal(2) "
@@ -2451,12 +2470,16 @@  static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
  * @handle: The journal handle
  * @inode:  The files inode
  * @path:   The path to the leaf
+ * @partial_cluster: The cluster which we'll have to free if all extents
+ *                   has been released from it. It gets negative in case
+ *                   that the cluster is still used.
  * @start:  The first block to remove
  * @end:   The last block to remove
  */
 static int
 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
-		 struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster,
+		 struct ext4_ext_path *path,
+		 signed long long *partial_cluster,
 		 ext4_lblk_t start, ext4_lblk_t end)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2469,6 +2492,7 @@  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	unsigned short ex_ee_len;
 	unsigned uninitialized = 0;
 	struct ext4_extent *ex;
+	ext4_fsblk_t pblk;
 
 	/* the header must be checked already in ext4_ext_remove_space() */
 	ext_debug("truncate since %u in leaf to %u\n", start, end);
@@ -2507,6 +2531,16 @@  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
 		/* If this extent is beyond the end of the hole, skip it */
 		if (end < ex_ee_block) {
+			/*
+			 * We're going to skip this extent and move to another,
+			 * so if this extent is not cluster aligned we have
+			 * to mark the current cluster as used to avoid
+			 * accidentally freeing it later on
+			 */
+			pblk = ext4_ext_pblock(ex);
+			if (pblk & (sbi->s_cluster_ratio - 1))
+				*partial_cluster =
+					-((long long)EXT4_B2C(sbi, pblk));
 			ex--;
 			ex_ee_block = le32_to_cpu(ex->ee_block);
 			ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2582,7 +2616,7 @@  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 					sizeof(struct ext4_extent));
 			}
 			le16_add_cpu(&eh->eh_entries, -1);
-		} else
+		} else if (*partial_cluster > 0)
 			*partial_cluster = 0;
 
 		err = ext4_ext_dirty(handle, inode, path + depth);
@@ -2600,11 +2634,10 @@  ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		err = ext4_ext_correct_indexes(handle, inode, path);
 
 	/*
-	 * If there is still a entry in the leaf node, check to see if
-	 * it references the partial cluster.  This is the only place
-	 * where it could; if it doesn't, we can free the cluster.
+	 * Free the partial cluster only if the current extent does not
+	 * reference it. Otherwise we might free used cluster.
 	 */
-	if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) &&
+	if (*partial_cluster > 0 &&
 	    (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) !=
 	     *partial_cluster)) {
 		int flags = EXT4_FREE_BLOCKS_FORGET;
@@ -2654,7 +2687,7 @@  int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
 	struct super_block *sb = inode->i_sb;
 	int depth = ext_depth(inode);
 	struct ext4_ext_path *path = NULL;
-	ext4_fsblk_t partial_cluster = 0;
+	signed long long partial_cluster = 0;
 	handle_t *handle;
 	int i = 0, err = 0;
 
@@ -2838,7 +2871,7 @@  again:
 	/* If we still have something in the partial cluster and we have removed
 	 * even the first extent, then we should free the blocks in the partial
 	 * cluster as well. */
-	if (partial_cluster && path->p_hdr->eh_entries == 0) {
+	if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) {
 		int flags = EXT4_FREE_BLOCKS_FORGET;
 
 		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index c92500c..5028d05 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1928,7 +1928,7 @@  TRACE_EVENT(ext4_ext_show_extent,
 TRACE_EVENT(ext4_remove_blocks,
 	    TP_PROTO(struct inode *inode, struct ext4_extent *ex,
 		ext4_lblk_t from, ext4_fsblk_t to,
-		ext4_fsblk_t partial_cluster),
+		long long int partial_cluster),
 
 	TP_ARGS(inode, ex, from, to, partial_cluster),
 
@@ -1937,7 +1937,7 @@  TRACE_EVENT(ext4_remove_blocks,
 		__field(	ino_t,		ino	)
 		__field(	ext4_lblk_t,	from	)
 		__field(	ext4_lblk_t,	to	)
-		__field(	ext4_fsblk_t,	partial	)
+		__field(	long long int,	partial	)
 		__field(	ext4_fsblk_t,	ee_pblk	)
 		__field(	ext4_lblk_t,	ee_lblk	)
 		__field(	unsigned short,	ee_len	)
@@ -1955,7 +1955,7 @@  TRACE_EVENT(ext4_remove_blocks,
 	),
 
 	TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
-		  "from %u to %u partial_cluster %u",
+		  "from %u to %u partial_cluster %lld",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned) __entry->ee_lblk,
@@ -1963,19 +1963,20 @@  TRACE_EVENT(ext4_remove_blocks,
 		  (unsigned short) __entry->ee_len,
 		  (unsigned) __entry->from,
 		  (unsigned) __entry->to,
-		  (unsigned) __entry->partial)
+		  (long long int) __entry->partial)
 );
 
 TRACE_EVENT(ext4_ext_rm_leaf,
 	TP_PROTO(struct inode *inode, ext4_lblk_t start,
-		 struct ext4_extent *ex, ext4_fsblk_t partial_cluster),
+		 struct ext4_extent *ex,
+		 long long int partial_cluster),
 
 	TP_ARGS(inode, start, ex, partial_cluster),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,		dev	)
 		__field(	ino_t,		ino	)
-		__field(	ext4_fsblk_t,	partial	)
+		__field(	long long int,	partial	)
 		__field(	ext4_lblk_t,	start	)
 		__field(	ext4_lblk_t,	ee_lblk	)
 		__field(	ext4_fsblk_t,	ee_pblk	)
@@ -1993,14 +1994,14 @@  TRACE_EVENT(ext4_ext_rm_leaf,
 	),
 
 	TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
-		  "partial_cluster %u",
+		  "partial_cluster %lld",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned) __entry->start,
 		  (unsigned) __entry->ee_lblk,
 		  (unsigned long long) __entry->ee_pblk,
 		  (unsigned short) __entry->ee_len,
-		  (unsigned) __entry->partial)
+		  (long long int) __entry->partial)
 );
 
 TRACE_EVENT(ext4_ext_rm_idx,
@@ -2058,7 +2059,7 @@  TRACE_EVENT(ext4_ext_remove_space,
 
 TRACE_EVENT(ext4_ext_remove_space_done,
 	TP_PROTO(struct inode *inode, ext4_lblk_t start, ext4_lblk_t end,
-		 int depth, ext4_lblk_t partial, unsigned short eh_entries),
+		 int depth, long long int partial, unsigned short eh_entries),
 
 	TP_ARGS(inode, start, end, depth, partial, eh_entries),
 
@@ -2068,7 +2069,7 @@  TRACE_EVENT(ext4_ext_remove_space_done,
 		__field(	ext4_lblk_t,	start		)
 		__field(	ext4_lblk_t,	end		)
 		__field(	int,		depth		)
-		__field(	ext4_lblk_t,	partial		)
+		__field(	long long int,	partial		)
 		__field(	unsigned short,	eh_entries	)
 	),
 
@@ -2082,14 +2083,14 @@  TRACE_EVENT(ext4_ext_remove_space_done,
 		__entry->eh_entries	= eh_entries;
 	),
 
-	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %u "
+	TP_printk("dev %d,%d ino %lu start %u end %u depth %d partial %lld "
 		  "remaining_entries %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned) __entry->start,
 		  (unsigned) __entry->end,
 		  __entry->depth,
-		  (unsigned) __entry->partial,
+		  (long long int) __entry->partial,
 		  (unsigned short) __entry->eh_entries)
 );