@@ -4511,70 +4511,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* block allocation which had been deferred till now.
*/
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
- unsigned int reserved_clusters;
- /*
- * Check how many clusters we had reserved this allocated range
- */
- reserved_clusters = get_reserved_cluster_alloc(inode,
- map->m_lblk, allocated);
if (!map_from_cluster) {
- BUG_ON(allocated_clusters < reserved_clusters);
- if (reserved_clusters < allocated_clusters) {
- struct ext4_inode_info *ei = EXT4_I(inode);
- int reservation = allocated_clusters -
- reserved_clusters;
- /*
- * It seems we claimed few clusters outside of
- * the range of this allocation. We should give
- * it back to the reservation pool. This can
- * happen in the following case:
- *
- * * Suppose s_cluster_ratio is 4 (i.e., each
- * cluster has 4 blocks. Thus, the clusters
- * are [0-3],[4-7],[8-11]...
- * * First comes delayed allocation write for
- * logical blocks 10 & 11. Since there were no
- * previous delayed allocated blocks in the
- * range [8-11], we would reserve 1 cluster
- * for this write.
- * * Next comes write for logical blocks 3 to 8.
- * In this case, we will reserve 2 clusters
- * (for [0-3] and [4-7]; and not for [8-11] as
- * that range has a delayed allocated blocks.
- * Thus total reserved clusters now becomes 3.
- * * Now, during the delayed allocation writeout
- * time, we will first write blocks [3-8] and
- * allocate 3 clusters for writing these
- * blocks. Also, we would claim all these
- * three clusters above.
- * * Now when we come here to writeout the
- * blocks [10-11], we would expect to claim
- * the reservation of 1 cluster we had made
- * (and we would claim it since there are no
- * more delayed allocated blocks in the range
- * [8-11]. But our reserved cluster count had
- * already gone to 0.
- *
- * Thus, at the step 4 above when we determine
- * that there are still some unwritten delayed
- * allocated blocks outside of our current
- * block range, we should increment the
- * reserved clusters count so that when the
- * remaining blocks finally gets written, we
- * could claim them.
- */
- dquot_reserve_block(inode,
- EXT4_C2B(sbi, reservation));
- spin_lock(&ei->i_block_reservation_lock);
- ei->i_reserved_data_blocks += reservation;
- spin_unlock(&ei->i_block_reservation_lock);
- }
/*
- * We will claim quota for all newly allocated blocks.
- * We're updating the reserved space *after* the
- * correction above so we do not accidentally free
- * all the metadata reservation because we might
- * actually need it later on.
+ * We don't have to compensate reserved clusters for
+ * sibling delayed allocated extents if they belong to
+ * the same cluster since they'll be mapped from the
+ * clusters directly, i.e. @map_from_cluser is true,
+ * otherwise there would be reservation/quota accounting
+ * leaking.
*/
ext4_da_update_reserve_space(inode, allocated_clusters,
1);
This can be reproduced by the following scripts, $ mkfs.ext4 -Obigalloc /dev/vdd $ mount /dev/vdd /mnt/ $ xfs_io -f -c "pwrite 0 2" -c "pwrite 16K 2" -c "pwrite 32K 2" -c "fsync" /mnt/dir/foobar As the bigalloc cluster size is 64K by default, the above writes should be in one whole cluster(64K), but currently it reports 128K instead, there is a 64K leak, and if quota is enabled, it also leads to incorrect quota accounting. During the writeback phrase, what was happening is, a) mapping block [0,4k) which covers [0,2) allocates a cluster and compensates both quota reserved blocks and @i_reserved_data_blocks by one reserved cluster count, b) mapping block [16k,20k) which covers [16k,16k+2) finds that there's already one available cluster, then it will map from it directly instead of updating reservation. c) no one will decrement @i_reserved_data_blocks and quota. In fact, get_implied_cluster_alloc() has figured out if the cluster has already been allocated, there is no need to do the compensation stuff. Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com> --- fs/ext4/extents.c | 68 +++++-------------------------------------------------- 1 file changed, 6 insertions(+), 62 deletions(-)