Viewing: ext4-add-DISCARD-stats.patch

commit 4c33ce3a86a48ce6da2604a2a8bdab22650b12aa
Author:     Li Dongyang <dongyangli@ddn.com>
AuthorDate: Tue Jul 8 20:53:46 2025 +1000
LU-19158 ldiskfs: add trim statistics

This adds the ldiskfs to record lifetime kbytes discarded and number
of discard ops in the superblock and export them in the sysfs, under
/sys/fs/ldiskfs/<disk>/lifetime_discard_kbytes and
/sys/fs/ldiskfs/<disk>/lifetime_discard_ops.

Also adds a discard histogram for the current mount session
under /proc/fs/ldiskfs/<disk>/discard_stats, and it looks like this:

sizes: { 8KB: 1, 128KB: 5, 1MB: 1, 2MB: 4, 16MB: 3, 32MB: 2, 64MB: 4, 128MB: 67 }
count: 87

Writing anything to the procfs entry resets the histogram.

Test-Parameters: trivial
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Change-Id: I32fe2a722d165699feb5da6effefc08af35aa241
Reviewed-on: https://review.whamcloud.com/60071

Index: linux-4.18.0-553.58.1.el8_10/fs/ext4/ext4.h
===================================================================
--- linux-4.18.0-553.58.1.el8_10.orig/fs/ext4/ext4.h
+++ linux-4.18.0-553.58.1.el8_10/fs/ext4/ext4.h
@@ -1382,7 +1382,10 @@ struct ext4_super_block {
 	__u8	s_last_error_time_hi;
 	__u8	s_first_error_errcode;
 	__u8    s_last_error_errcode;
-	__le32	s_reserved[96];		/* Padding to the end of the block */
+	__le32	s_reserved[91];		/* Padding to the end of the block */
+	__le64	s_kbytes_discarded;	/* nr of lifetime kilobytes discarded */
+/*3F0*/	__le64	s_nr_discard_ops;	/* nr of lifetime discard operations */
+	__le32	s_unused_last;
 	__le32	s_checksum;		/* crc32c(superblock) */
 };
 
@@ -1404,6 +1407,8 @@ struct ext4_super_block {
 #define DUMMY_ENCRYPTION_ENABLED(sbi) (0)
 #endif
 
+#define EXT4_HIST_MAX	32
+
 /* Number of quota types we support */
 #define EXT4_MAXQUOTAS 3
 
@@ -1451,6 +1456,7 @@ struct ext4_sb_info {
 	struct percpu_counter s_freeinodes_counter;
 	struct percpu_counter s_dirs_counter;
 	struct percpu_counter s_dirtyclusters_counter;
+	struct percpu_counter s_discard_ops[EXT4_HIST_MAX];
 	struct blockgroup_lock *s_blockgroup_lock;
 	struct proc_dir_entry *s_proc;
 	struct kobject s_kobj;
@@ -1553,6 +1559,12 @@ struct ext4_sb_info {
 	u64 s_kbytes_written;
 	struct work_struct s_stats_work;
 
+	/* for discard statistics */
+	unsigned long s_sectors_discarded_start;
+	u64 s_kbytes_discarded;
+	unsigned long s_nr_discard_ops_start;
+	u64 s_nr_discard_ops;
+
 	/* the size of zero-out chunk */
 	unsigned int s_extent_max_zeroout_kb;
 
Index: linux-4.18.0-553.58.1.el8_10/fs/ext4/mballoc.c
===================================================================
--- linux-4.18.0-553.58.1.el8_10.orig/fs/ext4/mballoc.c
+++ linux-4.18.0-553.58.1.el8_10/fs/ext4/mballoc.c
@@ -3724,12 +3724,16 @@ static inline int ext4_issue_discard(str
 		struct bio **biop)
 {
 	ext4_fsblk_t discard_block;
+	unsigned int val;
 
 	discard_block = (EXT4_C2B(EXT4_SB(sb), cluster) +
 			 ext4_group_first_block_no(sb, block_group));
 	count = EXT4_C2B(EXT4_SB(sb), count);
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
+	val = min(fls((count << (sb->s_blocksize_bits - 10)) - 1),
+		  EXT4_HIST_MAX - 1);
+	percpu_counter_inc(&EXT4_SB(sb)->s_discard_ops[val]);
 	if (biop) {
 		return __blkdev_issue_discard(sb->s_bdev,
 			(sector_t)discard_block << (sb->s_blocksize_bits - 9),
Index: linux-4.18.0-553.58.1.el8_10/fs/ext4/super.c
===================================================================
--- linux-4.18.0-553.58.1.el8_10.orig/fs/ext4/super.c
+++ linux-4.18.0-553.58.1.el8_10/fs/ext4/super.c
@@ -1170,6 +1170,8 @@ static void ext4_put_super(struct super_
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 	percpu_free_rwsem(&sbi->s_writepages_rwsem);
+	for (i = 0; i < EXT4_HIST_MAX; i++)
+		percpu_counter_destroy(&sbi->s_discard_ops[i]);
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
 		kfree(get_qf_name(sb, sbi, i));
@@ -3904,9 +3906,14 @@ static int ext4_fill_super(struct super_
 	sbi->s_sb = sb;
 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
 	sbi->s_sb_block = sb_block;
-	if (sb->s_bdev->bd_part)
+	if (sb->s_bdev->bd_part) {
 		sbi->s_sectors_written_start =
 			part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]);
+		sbi->s_sectors_discarded_start =
+			part_stat_read(sb->s_bdev->bd_part, sectors[STAT_DISCARD]);
+		sbi->s_nr_discard_ops_start =
+			part_stat_read(sb->s_bdev->bd_part, ios[STAT_DISCARD]);
+	}
 
 	/* Cleanup superblock name */
 	strreplace(sb->s_id, '/', '!');
@@ -3944,6 +3951,8 @@ static int ext4_fill_super(struct super_
 	if (sb->s_magic != EXT4_SUPER_MAGIC)
 		goto cantfind_ext4;
 	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
+	sbi->s_kbytes_discarded = le64_to_cpu(es->s_kbytes_discarded);
+	sbi->s_nr_discard_ops = le64_to_cpu(es->s_nr_discard_ops);
 
 	/* Warn if metadata_csum and gdt_csum are both set. */
 	if (ext4_has_feature_metadata_csum(sb) &&
@@ -4830,6 +4839,12 @@ no_journal:
 	if (!err)
 		err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
 
+	if (!err) {
+		for (i = 0; i < EXT4_HIST_MAX && !err; i++)
+			err = percpu_counter_init(&sbi->s_discard_ops[i], 0,
+						  GFP_KERNEL);
+	}
+
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount6;
@@ -4939,6 +4954,8 @@ failed_mount6:
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
 	percpu_free_rwsem(&sbi->s_writepages_rwsem);
+	for (i = 0; i < EXT4_HIST_MAX; i++)
+		percpu_counter_destroy(&sbi->s_discard_ops[i]);
 failed_mount5:
 	ext4_ext_release(sb);
 	ext4_release_system_zone(sb);
@@ -5194,9 +5211,23 @@ static void ext4_update_super(struct sup
 			    ((part_stat_read(sb->s_bdev->bd_part,
 					     sectors[STAT_WRITE]) -
 			      EXT4_SB(sb)->s_sectors_written_start) >> 1));
+		es->s_kbytes_discarded =
+			cpu_to_le64(EXT4_SB(sb)->s_kbytes_discarded +
+			    ((part_stat_read(sb->s_bdev->bd_part,
+					     sectors[STAT_DISCARD]) -
+			      EXT4_SB(sb)->s_sectors_discarded_start) >> 1));
+		es->s_nr_discard_ops =
+			cpu_to_le64(EXT4_SB(sb)->s_nr_discard_ops +
+			    ((part_stat_read(sb->s_bdev->bd_part,
+						   ios[STAT_DISCARD]) -
+			      EXT4_SB(sb)->s_nr_discard_ops_start)));
 	} else {
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
+		es->s_kbytes_discarded =
+			cpu_to_le64(EXT4_SB(sb)->s_kbytes_discarded);
+		es->s_nr_discard_ops =
+			cpu_to_le64(EXT4_SB(sb)->s_nr_discard_ops);
 	}
 
 	if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) {
Index: linux-4.18.0-553.58.1.el8_10/fs/ext4/sysfs.c
===================================================================
--- linux-4.18.0-553.58.1.el8_10.orig/fs/ext4/sysfs.c
+++ linux-4.18.0-553.58.1.el8_10/fs/ext4/sysfs.c
@@ -25,6 +25,8 @@ typedef enum {
 	attr_mb_c3_threshold,
 	attr_session_write_kbytes,
 	attr_lifetime_write_kbytes,
+	attr_lifetime_discard_kbytes,
+	attr_lifetime_discard_ops,
 	attr_reserved_clusters,
 	attr_inode_readahead,
 	attr_trigger_test_error,
@@ -80,6 +82,28 @@ static ssize_t lifetime_write_kbytes_sho
 			  EXT4_SB(sb)->s_sectors_written_start) >> 1)));
 }
 
+static ssize_t lifetime_discard_kbytes_show(struct ext4_sb_info *sbi, char *buf)
+{
+	struct super_block *sb = sbi->s_buddy_cache->i_sb;
+
+	return sysfs_emit(buf, "%llu\n",
+			(unsigned long long)(sbi->s_kbytes_discarded +
+			((part_stat_read(sb->s_bdev->bd_part,
+					 sectors[STAT_DISCARD]) -
+			  EXT4_SB(sb)->s_sectors_discarded_start) >> 1)));
+}
+
+static ssize_t lifetime_discard_ops_show(struct ext4_sb_info *sbi, char *buf)
+{
+	struct super_block *sb = sbi->s_buddy_cache->i_sb;
+
+	return sysfs_emit(buf, "%llu\n",
+			(unsigned long long)(sbi->s_nr_discard_ops +
+			(part_stat_read(sb->s_bdev->bd_part,
+					ios[STAT_DISCARD]) -
+			 EXT4_SB(sb)->s_nr_discard_ops_start)));
+}
+
 static ssize_t inode_readahead_blks_store(struct ext4_sb_info *sbi,
 					  const char *buf, size_t count)
 {
@@ -204,6 +228,8 @@ static struct ext4_attr ext4_attr_##_nam
 EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
 EXT4_ATTR_FUNC(session_write_kbytes, 0444);
 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
+EXT4_ATTR_FUNC(lifetime_discard_kbytes, 0444);
+EXT4_ATTR_FUNC(lifetime_discard_ops, 0444);
 EXT4_ATTR_FUNC(reserved_clusters, 0644);
 EXT4_ATTR_FUNC(mb_c1_threshold, 0644);
 EXT4_ATTR_FUNC(mb_c2_threshold, 0644);
@@ -247,6 +273,8 @@ static struct attribute *ext4_attrs[] =
 	ATTR_LIST(delayed_allocation_blocks),
 	ATTR_LIST(session_write_kbytes),
 	ATTR_LIST(lifetime_write_kbytes),
+	ATTR_LIST(lifetime_discard_kbytes),
+	ATTR_LIST(lifetime_discard_ops),
 	ATTR_LIST(reserved_clusters),
 	ATTR_LIST(mb_c1_threshold),
 	ATTR_LIST(mb_c2_threshold),
@@ -352,6 +380,10 @@ static ssize_t ext4_attr_show(struct kob
 		return session_write_kbytes_show(sbi, buf);
 	case attr_lifetime_write_kbytes:
 		return lifetime_write_kbytes_show(sbi, buf);
+	case attr_lifetime_discard_kbytes:
+		return lifetime_discard_kbytes_show(sbi, buf);
+	case attr_lifetime_discard_ops:
+		return lifetime_discard_ops_show(sbi, buf);
 	case attr_reserved_clusters:
 		return snprintf(buf, PAGE_SIZE, "%llu\n",
 				(unsigned long long)
@@ -427,6 +459,57 @@ static ssize_t ext4_attr_store(struct ko
 	return 0;
 }
 
+static ssize_t ext4_seq_discard_stats_write(struct file *file,
+					    const char __user *buf,
+					    size_t count, loff_t *pos)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(pde_data(file_inode(file)));
+	int i;
+
+	for (i = 0; i < EXT4_HIST_MAX; i++)
+		percpu_counter_set(&sbi->s_discard_ops[i], 0);
+	return count;
+}
+
+static int ext4_seq_discard_stats_show(struct seq_file *seq, void *offset)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(seq->private);
+	int i;
+	unsigned long total = 0, cur;
+	bool have_nonzero = false;
+
+	for (i = 0; i < EXT4_HIST_MAX; i++) {
+		cur = percpu_counter_sum(&sbi->s_discard_ops[i]);
+		if (cur != 0) {
+			seq_printf(seq, "%s ", have_nonzero ? "," : "sizes: {");
+			if (i < 10)
+				seq_printf(seq, "%luKB: %lu", BIT(i), cur);
+			else if (i < 20)
+				seq_printf(seq, "%luMB: %lu", BIT(i - 10), cur);
+			else
+				seq_printf(seq, "%luGB: %lu", BIT(i - 20), cur);
+			total += cur;
+			have_nonzero = true;
+		}
+	}
+	seq_printf(seq, "%scount: %lu\n", have_nonzero ? " }\n" : "", total);
+	return 0;
+}
+
+static int ext4_seq_discard_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ext4_seq_discard_stats_show, PDE_DATA(inode));
+}
+
+static const struct file_operations ext4_seq_discard_stats_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ext4_seq_discard_stats_open,
+	.read		= seq_read,
+	.write		= ext4_seq_discard_stats_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static void ext4_sb_release(struct kobject *kobj)
 {
 	struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
@@ -477,6 +563,8 @@ int ext4_register_sysfs(struct super_blo
 	if (ext4_proc_root)
 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
 	if (sbi->s_proc) {
+		proc_create_data("discard_stats", 0644, sbi->s_proc,
+				&ext4_seq_discard_stats_fops, sb);
 		proc_create_single_data("options", S_IRUGO, sbi->s_proc,
 				ext4_seq_options_show, sb);
 		proc_create_single_data("es_shrinker_info", S_IRUGO,