Viewing: ext4-give-warning-with-dir-htree-growing.patch

commit 07660ad33a7d109cced29b6400f99f25adab3f54
Author:     Wang Shilong <wshilong@whamcloud.com>
AuthorDate: Thu Jul 2 21:04:45 2015 -0400
LU-6824 ldiskfs: give warning with dir htree growing

Currently without large dir feature, ldiskfs directory hash tree
will be limited 2 height, this means directory size is limited about
1GB, and in fact users are likely to hit ENOSPC when reaching half of
limit because of bad hash. tested by following scripts.

    i=0
    filename="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
    "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbb"
    while [ 1 ]
    do
            touch $filename"$i"
            if [ $? -ne 0 ];then
                    break
            fi
            ((i++))
    done

When directoy size grow about 590M, we hit ENOSPC. Better way is to
add support to e2fsprogs so that we can use large dir feature.
As a walkaround way, this patch try to give warning messages to
console when 10/16 and 11/16 of limit reach.

So this patch will try to give following messages when warning
limit or hash index tree limit reach:

Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) has approached
maximum limit.

Directory (inode: 8388610 FID: [0x200000401:0x1:0x0]) is approaching
maximum limit.

FID output here is useful for administrators to locate lustre
file path

Signed-off-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Yang Sheng <yang.sheng@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Change-Id: I7f78c421bbb89f76298e0174cc46d774ea82eb06
Reviewed-on: http://review.whamcloud.com/15548

Index: linux-4.15.0/fs/ext4/ext4.h
===================================================================
--- linux-4.15.0.orig/fs/ext4/ext4.h
+++ linux-4.15.0/fs/ext4/ext4.h
@@ -1450,6 +1450,7 @@ struct ext4_sb_info {
 	unsigned long s_mb_prealloc_table_size;
 	unsigned int s_mb_group_prealloc;
 	unsigned int s_max_dir_size_kb;
+	unsigned long s_warning_dir_size;
 	/* where last allocation was done - for stream allocation */
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
Index: linux-4.15.0/fs/ext4/namei.c
===================================================================
--- linux-4.15.0.orig/fs/ext4/namei.c
+++ linux-4.15.0/fs/ext4/namei.c
@@ -751,12 +751,20 @@ struct ext4_dir_lock_data {
 #define ext4_htree_lock_data(l)	((struct ext4_dir_lock_data *)(l)->lk_private)
 #define ext4_find_entry(dir, name, dirent, inline) \
 			__ext4_find_entry(dir, name, dirent, inline, NULL)
-#define ext4_add_entry(handle, dentry, inode) \
-			__ext4_add_entry(handle, dentry, inode, NULL)
 
 /* NB: ext4_lblk_t is 32 bits so we use high bits to identify invalid blk */
 #define EXT4_HTREE_NODE_CHANGED	(0xcafeULL << 32)
 
+inline int ext4_add_entry(handle_t *handle, struct dentry *dentry,
+			  struct inode *inode)
+{
+	int ret = __ext4_add_entry(handle, dentry, inode, NULL);
+
+	if (ret == -ENOBUFS)
+		ret = 0;
+	return ret;
+}
+
 static void ext4_htree_event_cb(void *target, void *event)
 {
 	u64 *block = (u64 *)target;
@@ -2508,6 +2516,54 @@ out:
 	return err;
 }
 
+static unsigned long __ext4_max_dir_size(struct dx_frame *frames,
+			       struct dx_frame *frame, struct inode *dir)
+{
+	unsigned long max_dir_size;
+
+	if (EXT4_SB(dir->i_sb)->s_max_dir_size_kb) {
+		max_dir_size = EXT4_SB(dir->i_sb)->s_max_dir_size_kb << 10;
+	} else {
+		max_dir_size = EXT4_BLOCK_SIZE(dir->i_sb);
+		while (frame >= frames) {
+			max_dir_size *= dx_get_limit(frame->entries);
+			if (frame == frames)
+				break;
+			frame--;
+		}
+		/* use 75% of max dir size in average */
+		max_dir_size = max_dir_size / 4 * 3;
+	}
+	return max_dir_size;
+}
+
+/*
+ * With hash tree growing, it is easy to hit ENOSPC, but it is hard
+ * to predict when it will happen. let's give administrators warning
+ * when reaching 3/5 and 2/3 of limit
+ */
+static inline bool dir_size_in_warning_range(struct dx_frame *frames,
+					     struct dx_frame *frame,
+					     struct inode *dir)
+{
+	unsigned long size1, size2;
+	struct super_block *sb = dir->i_sb;
+
+	if (unlikely(!EXT4_SB(sb)->s_warning_dir_size))
+		EXT4_SB(sb)->s_warning_dir_size =
+			__ext4_max_dir_size(frames, frame, dir);
+
+	size1 = EXT4_SB(sb)->s_warning_dir_size / 16 * 10;
+	size1 = size1 & ~(EXT4_BLOCK_SIZE(sb) - 1);
+	size2 = EXT4_SB(sb)->s_warning_dir_size / 16 * 11;
+	size2 = size2 & ~(EXT4_BLOCK_SIZE(sb) - 1);
+	if (in_range(dir->i_size, size1, EXT4_BLOCK_SIZE(sb)) ||
+	    in_range(dir->i_size, size2, EXT4_BLOCK_SIZE(sb)))
+		return true;
+
+	return false;
+}
+
 /*
  *	ext4_add_entry()
  *
@@ -2629,6 +2685,7 @@ static int ext4_dx_add_entry(handle_t *h
 	struct ext4_dir_entry_2 *de;
 	int restart;
 	int err;
+	bool ret_warn = false;
 
 again:
 	restart = 0;
@@ -2657,6 +2714,11 @@ again:
 	/* Block full, should compress but for now just split */
 	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
 		       dx_get_count(entries), dx_get_limit(entries)));
+
+	if (frame - frames + 1 >= ext4_dir_htree_level(sb) ||
+	    EXT4_SB(sb)->s_warning_dir_size)
+		ret_warn = dir_size_in_warning_range(frames, frame, dir);
+
 	/* Need to split index? */
 	if (dx_get_count(entries) == dx_get_limit(entries)) {
 		ext4_lblk_t newblock;
@@ -2819,6 +2881,8 @@ cleanup:
 	 */
 	if (restart && err == 0)
 		goto again;
+	if (err == 0 && ret_warn)
+		err = -ENOBUFS;
 	return err;
 }
 
Index: linux-4.15.0/fs/ext4/super.c
===================================================================
--- linux-4.15.0.orig/fs/ext4/super.c
+++ linux-4.15.0/fs/ext4/super.c
@@ -1804,6 +1804,8 @@ static int handle_mount_opt(struct super
 		sbi->s_li_wait_mult = arg;
 	} else if (token == Opt_max_dir_size_kb) {
 		sbi->s_max_dir_size_kb = arg;
+		/* reset s_warning_dir_size and make it re-calculated */
+		sbi->s_warning_dir_size = 0;
 	} else if (token == Opt_stripe) {
 		sbi->s_stripe = arg;
 	} else if (token == Opt_resuid) {
Index: linux-4.15.0/fs/ext4/sysfs.c
===================================================================
--- linux-4.15.0.orig/fs/ext4/sysfs.c
+++ linux-4.15.0/fs/ext4/sysfs.c
@@ -173,6 +173,7 @@ EXT4_ATTR_OFFSET(inode_readahead_blks, 0
 EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
 EXT4_RW_ATTR_SBI_UI(max_dir_size, s_max_dir_size_kb);
 EXT4_RW_ATTR_SBI_UI(max_dir_size_kb, s_max_dir_size_kb);
+EXT4_RW_ATTR_SBI_UI(warning_dir_size, s_warning_dir_size);
 EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
 EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
@@ -204,6 +205,7 @@ static struct attribute *ext4_attrs[] =
 	ATTR_LIST(inode_goal),
 	ATTR_LIST(max_dir_size),
 	ATTR_LIST(max_dir_size_kb),
+	ATTR_LIST(warning_dir_size),
 	ATTR_LIST(mb_stats),
 	ATTR_LIST(mb_max_to_scan),
 	ATTR_LIST(mb_min_to_scan),