Viewing: ext4-mballoc-extra-checks.patch

commit f2f28f1d09c0a00b3fc569422f881931d857fac9
Author:     Alex Zhuravlev <alex.zhuravlev@sun.com>
AuthorDate: Tue Oct 28 17:59:09 2008 +0000
Subject: ext4: detect on-disk corruption of block bitmap

Detect on-disk corruption of block bitmap and better checking of
preallocated blocks.

Bugzilla-ID: b=16680
Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
Reviewed-by: Kalpak Shah <kalpak.shah@sun.com>
Signed-off-by: Andreas Dilger <andreas.dilger@sun.com>
---
 fs/ext4/ext4.h    |   1 +
 fs/ext4/mballoc.c | 105 ++++++++++++++++++++++++++++++++++++++++------
 fs/ext4/mballoc.h |   2 +-
 3 files changed, 94 insertions(+), 14 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index dfbb9dd1..504531e0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3465,6 +3465,7 @@ struct ext4_group_info {
 	ext4_grpblk_t	bb_largest_free_order;/* order of largest frag in BG */
 	ext4_group_t	bb_group;	/* Group number */
 	struct          list_head bb_prealloc_list;
+	unsigned long   bb_prealloc_nr;
 #ifdef DOUBLE_CHECK
 	void            *bb_bitmap;
 #endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b669bbec..f9a09807 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -416,7 +416,7 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
 	"ext4_groupinfo_64k", "ext4_groupinfo_128k"
 };
 
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group);
 static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
 
@@ -1181,7 +1181,7 @@ mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
 }
 
 static noinline_for_stack
-void ext4_mb_generate_buddy(struct super_block *sb,
+int ext4_mb_generate_buddy(struct super_block *sb,
 			    void *buddy, void *bitmap, ext4_group_t group,
 			    struct ext4_group_info *grp)
 {
@@ -1225,6 +1225,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 		grp->bb_free = free;
 		ext4_mark_group_bitmap_corrupted(sb, group,
 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
+		return -EIO;
 	}
 	mb_set_largest_free_order(sb, grp);
 	mb_update_avg_fragment_size(sb, grp);
@@ -1234,6 +1235,8 @@ void ext4_mb_generate_buddy(struct super_block *sb,
 	period = get_cycles() - period;
 	atomic_inc(&sbi->s_mb_buddies_generated);
 	atomic64_add(period, &sbi->s_mb_generation_time);
+
+	return 0;
 }
 
 static void mb_regenerate_buddy(struct ext4_buddy *e4b)
@@ -1355,7 +1358,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
 	}
 
 	first_block = folio->index * blocks_per_page;
-	for (i = 0; i < blocks_per_page; i++) {
+	for (i = 0; i < blocks_per_page && err == 0; i++) {
 		group = (first_block + i) >> 1;
 		if (group >= ngroups)
 			break;
@@ -1403,7 +1406,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
 			ext4_lock_group(sb, group);
 			/* init the buddy */
 			memset(data, 0xff, blocksize);
-			ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
+			err = ext4_mb_generate_buddy(sb, data, incore, group, grinfo);
 			ext4_unlock_group(sb, group);
 			incore = NULL;
 		} else {
@@ -1418,7 +1421,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
 			memcpy(data, bitmap, blocksize);
 
 			/* mark all preallocated blks used in in-core bitmap */
-			ext4_mb_generate_from_pa(sb, data, group);
+			err = ext4_mb_generate_from_pa(sb, data, group);
 			WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
 			ext4_unlock_group(sb, group);
 
@@ -1428,7 +1431,8 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
 			incore = data;
 		}
 	}
-	folio_mark_uptodate(folio);
+	if (likely(err == 0))
+		folio_mark_uptodate(folio);
 
 out:
 	if (bh) {
@@ -3030,8 +3034,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 {
 	struct super_block *sb = pde_data(file_inode(seq->file));
 	ext4_group_t group = (ext4_group_t) ((unsigned long) v);
+	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 	int i, err;
 	char nbuf[16];
+	int free = 0;
 	struct ext4_buddy e4b;
 	struct ext4_group_info *grinfo;
 	unsigned char blocksize_bits = min_t(unsigned char,
@@ -3040,9 +3046,12 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 	DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters,
 			EXT4_MAX_BLOCK_LOG_SIZE + 2);
 
+	if (gdp)
+		free = ext4_free_group_clusters(sb, gdp);
+
 	group--;
 	if (group == 0)
-		seq_puts(seq, "#group: free  frags first ["
+		seq_puts(seq, "#group: bfree gfree frags first pa    ["
 			      " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
 			      " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
 
@@ -3067,8 +3076,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
 	 * these are safe to access even after the buddy has been unloaded
 	 */
 	memcpy(sg, grinfo, i);
-	seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free,
-			sg->bb_fragments, sg->bb_first_free);
+	seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
+			(long unsigned int)group, sg->bb_free, free,
+			sg->bb_fragments, sg->bb_first_free,
+			sg->bb_prealloc_nr);
 	for (i = 0; i <= 13; i++)
 		seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
 				sg->bb_counters[i] : 0);
@@ -5084,25 +5095,75 @@ try_group_pa:
 	return false;
 }
 
+/*
+ * check free blocks in bitmap match free block in group descriptor
+ * do this before taking preallocated blocks into account to be able
+ * to detect on-disk corruptions. The group lock should be hold by the
+ * caller.
+ */
+static
+int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
+				struct ext4_group_desc *gdp, int group)
+{
+	unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
+	unsigned short i, first, free = 0;
+	unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
+
+	if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
+		return 0;
+
+	i = mb_find_next_zero_bit(bitmap, max, 0);
+
+	while (i < max) {
+		first = i;
+		i = mb_find_next_bit(bitmap, max, i);
+		if (i > max)
+			i = max;
+		free += i - first;
+		if (i < max)
+			i = mb_find_next_zero_bit(bitmap, max, i);
+	}
+
+	if (free != free_in_gdp) {
+		ext4_error(sb, "on-disk bitmap for group %d"
+			"corrupted: %u blocks free in bitmap, %u - in gd\n",
+			group, free, free_in_gdp);
+		return -EIO;
+	}
+	return 0;
+}
+
 /*
  * the function goes through all preallocation in this group and marks them
  * used in in-core bitmap. buddy must be generated from this bitmap
  * Need to be called with ext4 group lock held
  */
 static noinline_for_stack
-void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group)
 {
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct ext4_prealloc_space *pa;
+	struct ext4_group_desc *gdp;
 	struct list_head *cur;
 	ext4_group_t groupnr;
 	ext4_grpblk_t start;
 	int preallocated = 0;
+	int skip = 0, count = 0;
+	int err;
 	int len;
 
 	if (!grp)
-		return;
+		return -EIO;
+
+	gdp = ext4_get_group_desc(sb, group, NULL);
+	if (gdp == NULL)
+		return -EIO;
+
+	/* before applying preallocations, check bitmap consistency */
+	err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
+	if (err)
+		return err;
 
 	/* all form of preallocation discards first load group,
 	 * so the only competing code is preallocation use.
@@ -5119,13 +5180,23 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					     &groupnr, &start);
 		len = pa->pa_len;
 		spin_unlock(&pa->pa_lock);
-		if (unlikely(len == 0))
+		if (unlikely(len == 0)) {
+			skip++;
 			continue;
+		}
 		BUG_ON(groupnr != group);
 		mb_set_bits(bitmap, start, len);
 		preallocated += len;
+		count++;
+	}
+	if (count + skip != grp->bb_prealloc_nr) {
+		ext4_error(sb, "lost preallocations: "
+			   "count %d, bb_prealloc_nr %lu, skip %d\n",
+			   count, grp->bb_prealloc_nr, skip);
+		return -EIO;
 	}
 	mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
+	return 0;
 }
 
 static void ext4_mb_mark_pa_deleted(struct super_block *sb,
@@ -5216,6 +5287,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 	 */
 	ext4_lock_group(sb, grp);
 	list_del(&pa->pa_group_list);
+	ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
 	ext4_unlock_group(sb, grp);
 
 	if (pa->pa_type == MB_INODE_PA) {
@@ -5349,6 +5421,7 @@ adjust_bex:
 	pa->pa_inode = ac->ac_inode;
 
 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
+	grp->bb_prealloc_nr++;
 
 	write_lock(pa->pa_node_lock.inode_lock);
 	ext4_mb_pa_rb_insert(&ei->i_prealloc_node, &pa->pa_node.inode_node);
@@ -5402,6 +5475,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	pa->pa_inode = NULL;
 
 	list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
+	grp->bb_prealloc_nr++;
 
 	/*
 	 * We will later add the new pa to the right bucket
@@ -5568,6 +5642,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 
 		spin_unlock(&pa->pa_lock);
 
+		BUG_ON(grp->bb_prealloc_nr == 0);
+		grp->bb_prealloc_nr--;
 		list_del(&pa->pa_group_list);
 		list_add(&pa->u.pa_tmp_list, &list);
 	}
@@ -5699,7 +5775,7 @@ repeat:
 		if (err) {
 			ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
 				       err, group);
-			continue;
+			return;
 		}
 
 		bitmap_bh = ext4_read_block_bitmap(sb, group);
@@ -5712,6 +5788,8 @@ repeat:
 		}
 
 		ext4_lock_group(sb, group);
+		BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
+		e4b.bd_info->bb_prealloc_nr--;
 		list_del(&pa->pa_group_list);
 		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 		ext4_unlock_group(sb, group);
@@ -6016,6 +6094,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 		}
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
+		ext4_get_group_info(sb, group)->bb_prealloc_nr--;
 		ext4_mb_release_group_pa(&e4b, pa);
 		ext4_unlock_group(sb, group);
 
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index f8280de3..6b8821d3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -66,7 +66,7 @@
 /*
  * for which requests use 2^N search using buddies
  */
-#define MB_DEFAULT_ORDER2_REQS		2
+#define MB_DEFAULT_ORDER2_REQS		8
 
 /*
  * default group prealloc size 512 blocks
-- 
2.45.2