Viewing: ext4-mballoc-dense.patch

commit 686dee707f8728aa8ba27bcd4cee69f8fbf7b278
Author:     Alex Zhuravlev <bzzz@whamcloud.com>
AuthorDate: Wed Mar 1 21:28:25 2023 +0300
LU-10026 osd-ldiskfs: use preallocation for dense writes

use inode's preallocation chunks as per-inode group preallocation:
just grab the very first available blocks from the window.

Test-Parameters: env=ONLY=1000,ONLY_REPEAT=11 testlist=sanity-compr
Test-Parameters: env=ONLY=fsx,ONLY_REPEAT=11 testlist=sanity-compr
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Change-Id: I9d36701f569f4c6305bc46f3373bfc054fcd61a9
Reviewed-on: https://review.whamcloud.com/50171
---
 fs/ext4/ext4.h    |  3 +++
 fs/ext4/extents.c |  2 ++
 fs/ext4/mballoc.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/mballoc.h |  4 +++-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8dd054d0..d3661a3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -141,6 +141,8 @@ enum SHIFT_DIRECTION {
 #define EXT4_MB_USE_ROOT_BLOCKS		0x1000
 /* Use blocks from reserved pool */
 #define EXT4_MB_USE_RESERVED		0x2000
+/* Ldiskfs very dense writes */
+#define EXT4_MB_VERY_DENSE		0x80000
 
 struct ext4_allocation_request {
 	/* target inode for block we're allocating */
@@ -617,6 +619,7 @@ enum {
 	/* Caller will submit data before dropping transaction handle. This
 	 * allows jbd2 to avoid submitting data before commit. */
 #define EXT4_GET_BLOCKS_IO_SUBMIT		0x0400
+#define EXT4_GET_BLOCKS_VERY_DENSE		0x08000
 
 /*
  * The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 507285d7..99ab7339 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4691,6 +4691,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		ar.flags = 0;
 	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
 		ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+	if (flags & EXT4_GET_BLOCKS_VERY_DENSE)
+		ar.flags |= EXT4_MB_VERY_DENSE;
 	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
 		ar.flags |= EXT4_MB_DELALLOC_RESERVED;
 	if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 397854ca..d9ec558b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3799,6 +3799,25 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 	ext4_fsblk_t end;
 	int len;
 
+	if (ac->ac_flags & EXT4_MB_VERY_DENSE && !pa->pa_regular) {
+		unsigned int len = ac->ac_o_ex.fe_len;
+		if (len > pa->pa_free)
+			len = pa->pa_free;
+		ext4_get_group_no_and_offset(ac->ac_sb,
+					pa->pa_pstart,
+					&ac->ac_b_ex.fe_group,
+					&ac->ac_b_ex.fe_start);
+		ac->ac_b_ex.fe_len = len;
+		pa->pa_lstart += len;
+		pa->pa_pstart += len;
+		pa->pa_free -= len;
+		pa->pa_len -= len;
+		ac->ac_status = AC_STATUS_FOUND;
+		ac->ac_pa = pa;
+		return;
+	}
+
+	pa->pa_regular = 1;
 	/* found preallocated blocks, use them */
 	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
 	end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
@@ -3888,6 +3907,23 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 	if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
 		return 0;
 
+	if (ac->ac_flags & EXT4_MB_VERY_DENSE) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+			spin_lock(&pa->pa_lock);
+			if (!pa->pa_deleted && pa->pa_free && !pa->pa_regular) {
+				atomic_inc(&pa->pa_count);
+				ext4_mb_use_inode_pa(ac, pa);
+				spin_unlock(&pa->pa_lock);
+				break;
+			}
+			spin_unlock(&pa->pa_lock);
+		}
+		rcu_read_unlock();
+		if (ac->ac_status == AC_STATUS_FOUND)
+			return true;
+	}
+
 	/* first, try per-file preallocation */
 	rcu_read_lock();
 	list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
@@ -4129,7 +4165,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 	if (pa->pa_type == MB_GROUP_PA)
 		grp_blk--;
 
-	grp = ext4_get_group_number(sb, grp_blk);
+	grp = pa->pa_group;
 
 	/*
 	 * possible race:
@@ -4220,6 +4256,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 	pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
 	pa->pa_len = ac->ac_b_ex.fe_len;
 	pa->pa_free = pa->pa_len;
+	pa->pa_group = ac->ac_b_ex.fe_group;
+	pa->pa_regular = 0;
 	atomic_set(&pa->pa_count, 1);
 	spin_lock_init(&pa->pa_lock);
 	INIT_LIST_HEAD(&pa->pa_inode_list);
@@ -4282,6 +4320,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 	pa->pa_lstart = pa->pa_pstart;
 	pa->pa_len = ac->ac_b_ex.fe_len;
 	pa->pa_free = pa->pa_len;
+	pa->pa_group = ac->ac_b_ex.fe_group;
 	atomic_set(&pa->pa_count, 1);
 	spin_lock_init(&pa->pa_lock);
 	INIT_LIST_HEAD(&pa->pa_inode_list);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 608a7020..cabbc02f 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -114,6 +114,8 @@ struct ext4_prealloc_space {
 	ext4_lblk_t		pa_lstart;	/* log. block */
 	ext4_grpblk_t		pa_len;		/* len of preallocated chunk */
 	ext4_grpblk_t		pa_free;	/* how many blocks are free */
+	ext4_grpblk_t		pa_group;
+	unsigned short		pa_regular;
 	unsigned short		pa_type;	/* pa type. inode or group */
 	unsigned short		pa_error;
 	spinlock_t		*pa_obj_lock;
@@ -170,7 +172,7 @@ struct ext4_allocation_context {
 	__u16 ac_found;
 	__u16 ac_tail;
 	__u16 ac_buddy;
-	__u16 ac_flags;		/* allocation hints */
+	__u32 ac_flags;		/* allocation hints */
 	__u8 ac_status;
 	__u8 ac_criteria;
 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
-- 
2.34.1