diff mbox series

[RFC,v2,6/9] ext4: add memory usage tracker for freespace trees

Message ID 20200821015523.1698374-7-harshads@google.com
State New
Headers show
Series ext4: add free-space extent based allocator | expand

Commit Message

harshad shirwadkar Aug. 21, 2020, 1:55 a.m. UTC
From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>

Freespace trees can occupy a lot of memory with as the fragmentation
increases. This patch adds a sysfs file to monitor the memory usage of
the freespace tree allocator. Also, added a sysfs config to control
maximum memory that the allocator can use. If the allocator exceeds
this threshold, file system enters "FRSP_MEM_CRUNCH" state. The next
patch in the series performs LRU eviction when this state is reached.

Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
 fs/ext4/ext4.h    |  8 ++++++++
 fs/ext4/mballoc.c | 20 ++++++++++++++++++++
 fs/ext4/mballoc.h |  4 ++++
 fs/ext4/sysfs.c   | 11 +++++++++++
 4 files changed, 43 insertions(+)
diff mbox series

Patch

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 15e6ce9f1afa..93bf2fe35cf1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1223,6 +1223,12 @@  struct ext4_inode_info {
 						    * allocator off)
 						    */
 
+#define EXT4_MOUNT2_FRSP_MEM_CRUNCH	0x00000040 /*
+						    * Freespace tree allocator
+						    * is in a tight memory
+						    * situation.
+						    */
+
 #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
 						~EXT4_MOUNT_##opt
 #define set_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt |= \
@@ -1607,6 +1613,8 @@  struct ext4_sb_info {
 	atomic_t s_mb_num_frsp_trees_cached;
 	struct list_head s_mb_uncached_trees;
 	u32 s_mb_frsp_cache_aggression;
+	atomic_t s_mb_num_fragments;
+	u32 s_mb_frsp_mem_limit;
 
 	/* workqueue for reserved extent conversions (buffered io) */
 	struct workqueue_struct *rsv_conversion_wq;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1da63afdbb3d..b28b7fb0506e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -869,6 +869,7 @@  void ext4_mb_frsp_print_tree_len(struct super_block *sb,
 static struct ext4_frsp_node *ext4_mb_frsp_alloc_node(struct super_block *sb)
 {
 	struct ext4_frsp_node *node;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	node = kmem_cache_alloc(ext4_freespace_node_cachep, GFP_NOFS);
 	if (!node)
@@ -877,13 +878,31 @@  static struct ext4_frsp_node *ext4_mb_frsp_alloc_node(struct super_block *sb)
 	RB_CLEAR_NODE(&node->frsp_node);
 	RB_CLEAR_NODE(&node->frsp_len_node);
 
+	atomic_inc(&sbi->s_mb_num_fragments);
+
+	if (sbi->s_mb_frsp_mem_limit &&
+		atomic_read(&sbi->s_mb_num_fragments) >
+		EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(sb))
+		set_opt2(sb, FRSP_MEM_CRUNCH);
+	else
+		clear_opt2(sb, FRSP_MEM_CRUNCH);
+
+
 	return node;
 }
 
 static void ext4_mb_frsp_free_node(struct super_block *sb,
 		struct ext4_frsp_node *node)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+
 	kmem_cache_free(ext4_freespace_node_cachep, node);
+	atomic_dec(&sbi->s_mb_num_fragments);
+
+	if (!sbi->s_mb_frsp_mem_limit ||
+		atomic_read(&sbi->s_mb_num_fragments) <
+		EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(sb))
+		clear_opt2(sb, FRSP_MEM_CRUNCH);
 }
 
 /* Evict a tree from memory */
@@ -1607,6 +1626,7 @@  int ext4_mb_init_freespace_trees(struct super_block *sb)
 	}
 	rwlock_init(&sbi->s_mb_frsp_lock);
 	atomic_set(&sbi->s_mb_num_frsp_trees_cached, 0);
+	atomic_set(&sbi->s_mb_num_fragments, 0);
 
 	return 0;
 }
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 1fcdd3e6f7d5..6cfb228e4da2 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -92,6 +92,10 @@  struct ext4_frsp_node {
 	struct rb_node frsp_node;
 	struct rb_node frsp_len_node;
 };
+
+#define EXT4_FRSP_MEM_LIMIT_TO_NUM_NODES(__sb)				\
+	((sbi->s_mb_frsp_mem_limit / sizeof(struct ext4_frsp_node)))
+
 struct ext4_free_data {
 	/* this links the free block information from sb_info */
 	struct list_head		efd_list;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index bfabb799fa45..19301b10944b 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -8,6 +8,7 @@ 
  *
  */
 
+#include "mballoc.h"
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
@@ -24,6 +25,7 @@  typedef enum {
 	attr_session_write_kbytes,
 	attr_lifetime_write_kbytes,
 	attr_reserved_clusters,
+	attr_frsp_tree_usage,
 	attr_inode_readahead,
 	attr_trigger_test_error,
 	attr_first_error_time,
@@ -208,6 +210,7 @@  EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
 EXT4_ATTR_FUNC(session_write_kbytes, 0444);
 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
 EXT4_ATTR_FUNC(reserved_clusters, 0644);
+EXT4_ATTR_FUNC(frsp_tree_usage, 0444);
 
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
 		 ext4_sb_info, s_inode_readahead_blks);
@@ -248,6 +251,7 @@  EXT4_ATTR(last_error_time, 0444, last_error_time);
 EXT4_ATTR(journal_task, 0444, journal_task);
 EXT4_RW_ATTR_SBI_UI(mb_prefetch, s_mb_prefetch);
 EXT4_RW_ATTR_SBI_UI(mb_prefetch_limit, s_mb_prefetch_limit);
+EXT4_RW_ATTR_SBI_UI(mb_frsp_max_mem, s_mb_frsp_mem_limit);
 
 static unsigned int old_bump_val = 128;
 EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -257,6 +261,7 @@  static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(session_write_kbytes),
 	ATTR_LIST(lifetime_write_kbytes),
 	ATTR_LIST(reserved_clusters),
+	ATTR_LIST(frsp_tree_usage),
 	ATTR_LIST(inode_readahead_blks),
 	ATTR_LIST(inode_goal),
 	ATTR_LIST(mb_stats),
@@ -296,6 +301,7 @@  static struct attribute *ext4_attrs[] = {
 #endif
 	ATTR_LIST(mb_prefetch),
 	ATTR_LIST(mb_prefetch_limit),
+	ATTR_LIST(mb_frsp_max_mem),
 	NULL,
 };
 ATTRIBUTE_GROUPS(ext4);
@@ -378,6 +384,11 @@  static ssize_t ext4_attr_show(struct kobject *kobj,
 		return snprintf(buf, PAGE_SIZE, "%llu\n",
 				(unsigned long long)
 				atomic64_read(&sbi->s_resv_clusters));
+	case attr_frsp_tree_usage:
+		return snprintf(buf, PAGE_SIZE, "%llu\n",
+				(unsigned long long)
+				atomic_read(&sbi->s_mb_num_fragments) *
+				sizeof(struct ext4_frsp_node));
 	case attr_inode_readahead:
 	case attr_pointer_ui:
 		if (!ptr)