@@ -893,6 +893,7 @@ struct ext4_inode_info {
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
+ struct list_head i_es_lru;
/* ialloc */
ext4_group_t i_last_alloc_group;
@@ -1308,6 +1309,9 @@ struct ext4_sb_info {
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
+
+ /* Extent status tree shrinker */
+ struct ext4_es_shrinker s_es_shrinker;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -141,6 +141,7 @@ static int __es_insert_extent(struct ext4_es_tree *tree,
struct extent_status *newes);
static int __es_remove_extent(struct ext4_es_tree *tree, ext4_lblk_t lblk,
ext4_lblk_t end);
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc);
int __init ext4_init_es(void)
{
@@ -275,6 +276,8 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
+
trace_ext4_es_find_extent_exit(inode, es, ret);
return ret;
}
@@ -449,6 +452,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
+
ext4_es_print_tree(inode);
return err;
@@ -509,6 +514,8 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
+
trace_ext4_es_lookup_extent_exit(inode, es, found);
return found;
}
@@ -621,6 +628,108 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(tree, lblk, end);
write_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
ext4_es_print_tree(inode);
return err;
}
+
+void ext4_es_register_shrinker(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi;
+
+ sbi = EXT4_SB(sb);
+ INIT_LIST_HEAD(&sbi->s_es_shrinker.es_lru);
+ spin_lock_init(&sbi->s_es_shrinker.es_lru_lock);
+ sbi->s_es_shrinker.es_shrinker.shrink = ext4_es_shrink;
+ sbi->s_es_shrinker.es_shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&sbi->s_es_shrinker.es_shrinker);
+}
+
+void ext4_es_unregister_shrinker(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi;
+
+ sbi = EXT4_SB(sb);
+ unregister_shrinker(&sbi->s_es_shrinker.es_shrinker);
+}
+
+static int ext4_es_do_shrink(struct ext4_inode_info *ei)
+{
+ struct ext4_es_tree *tree;
+ struct rb_node *node;
+ struct extent_status *es;
+ int shrunk_nr = 0;
+
+ tree = &ei->i_es_tree;
+
+ write_lock(&ei->i_es_lock);
+ node = rb_first(&tree->root);
+ while (node != NULL) {
+ es = rb_entry(node, struct extent_status, rb_node);
+ node = rb_next(&es->rb_node);
+ /*
+ * We can't reclaim delayed extent from status tree because
+ * fiemap, bigalloc, and seek_data/hole need to use it.
+ */
+ if (!ext4_es_is_delayed(es)) {
+ rb_erase(&es->rb_node, &tree->root);
+ ext4_es_free_extent(es);
+ shrunk_nr++;
+ }
+ }
+ tree->cache_es = NULL;
+ write_unlock(&ei->i_es_lock);
+ return shrunk_nr;
+}
+
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct ext4_es_shrinker *es_shrinker = container_of(shrink,
+ struct ext4_es_shrinker, es_shrinker);
+ struct ext4_inode_info *ei;
+ int nr_to_scan = sc->nr_to_scan;
+ int ret, shrunk_nr = 0;
+
+ if (!nr_to_scan)
+ return shrunk_nr;
+
+ spin_lock(&es_shrinker->es_lru_lock);
+ while (!list_empty(&es_shrinker->es_lru)) {
+ if (nr_to_scan-- <= 0)
+ break;
+
+ ei = list_first_entry(&es_shrinker->es_lru,
+ struct ext4_inode_info, i_es_lru);
+ ret = ext4_es_do_shrink(ei);
+ shrunk_nr += ret;
+ nr_to_scan -= ret;
+ }
+ spin_unlock(&es_shrinker->es_lru_lock);
+ return shrunk_nr;
+}
+
+void ext4_es_lru_add(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_es_shrinker *es_shrinker = &sbi->s_es_shrinker;
+
+ spin_lock(&es_shrinker->es_lru_lock);
+ if (list_empty(&ei->i_es_lru))
+ list_add_tail(&ei->i_es_lru, &es_shrinker->es_lru);
+ else
+ list_move_tail(&ei->i_es_lru, &es_shrinker->es_lru);
+ spin_unlock(&es_shrinker->es_lru_lock);
+}
+
+void ext4_es_lru_del(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct ext4_es_shrinker *es_shrinker = &sbi->s_es_shrinker;
+
+ spin_lock(&es_shrinker->es_lru_lock);
+ if (!list_empty(&ei->i_es_lru))
+ list_del_init(&ei->i_es_lru);
+ spin_unlock(&es_shrinker->es_lru_lock);
+}
@@ -39,6 +39,13 @@ struct ext4_es_tree {
struct extent_status *cache_es; /* recently accessed extent */
};
+struct ext4_es_shrinker {
+ struct shrinker es_shrinker;
+ struct list_head es_lru;
+ spinlock_t es_lru_lock ____cacheline_aligned_in_smp;
+ unsigned int es_lru_nr;
+};
+
extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -73,4 +80,9 @@ static inline ext4_fsblk_t ext4_es_get_pblock(struct extent_status *es,
return (ext4_es_is_delayed(es) ? ~0 : pb);
}
+extern void ext4_es_register_shrinker(struct super_block *sb);
+extern void ext4_es_unregister_shrinker(struct super_block *sb);
+extern void ext4_es_lru_add(struct inode *inode);
+extern void ext4_es_lru_del(struct inode *inode);
+
#endif /* _EXT4_EXTENTS_STATUS_H */
@@ -858,6 +858,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_abort(sb, "Couldn't clean up the journal");
}
+ ext4_es_unregister_shrinker(sb);
del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
@@ -944,6 +945,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
+ INIT_LIST_HEAD(&ei->i_es_lru);
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
@@ -1031,6 +1033,7 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+ ext4_es_lru_del(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
@@ -3324,6 +3327,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto cantfind_ext4;
sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+ /* Register extent status tree shrinker */
+ ext4_es_register_shrinker(sb);
+
/* Warn if metadata_csum and gdt_csum are both set. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&