Viewing: ext4-mballoc-dense.patch
commit 686dee707f8728aa8ba27bcd4cee69f8fbf7b278
Author: Alex Zhuravlev <bzzz@whamcloud.com>
AuthorDate: Wed Mar 1 21:28:25 2023 +0300
LU-10026 osd-ldiskfs: use preallocation for dense writes
use inode's preallocation chunks as per-inode group preallocation:
just grab the very first available blocks from the window.
Test-Parameters: env=ONLY=1000,ONLY_REPEAT=11 testlist=sanity-compr
Test-Parameters: env=ONLY=fsx,ONLY_REPEAT=11 testlist=sanity-compr
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Change-Id: I9d36701f569f4c6305bc46f3373bfc054fcd61a9
Reviewed-on: https://review.whamcloud.com/50171
---
fs/ext4/ext4.h | 3 +++
fs/ext4/extents.c | 2 ++
fs/ext4/mballoc.c | 41 ++++++++++++++++++++++++++++++++++++++++-
fs/ext4/mballoc.h | 4 +++-
4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8dd054d0..d3661a3f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -141,6 +141,8 @@ enum SHIFT_DIRECTION {
#define EXT4_MB_USE_ROOT_BLOCKS 0x1000
/* Use blocks from reserved pool */
#define EXT4_MB_USE_RESERVED 0x2000
+/* Ldiskfs very dense writes */
+#define EXT4_MB_VERY_DENSE 0x80000
struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -617,6 +619,7 @@ enum {
/* Caller will submit data before dropping transaction handle. This
* allows jbd2 to avoid submitting data before commit. */
#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
+#define EXT4_GET_BLOCKS_VERY_DENSE 0x08000
/*
* The bit position of these flags must not overlap with any of the
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 507285d7..99ab7339 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4691,6 +4691,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ if (flags & EXT4_GET_BLOCKS_VERY_DENSE)
+ ar.flags |= EXT4_MB_VERY_DENSE;
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ar.flags |= EXT4_MB_DELALLOC_RESERVED;
if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 397854ca..d9ec558b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3799,6 +3799,25 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
ext4_fsblk_t end;
int len;
+ if (ac->ac_flags & EXT4_MB_VERY_DENSE && !pa->pa_regular) {
+ unsigned int len = ac->ac_o_ex.fe_len;
+ if (len > pa->pa_free)
+ len = pa->pa_free;
+ ext4_get_group_no_and_offset(ac->ac_sb,
+ pa->pa_pstart,
+ &ac->ac_b_ex.fe_group,
+ &ac->ac_b_ex.fe_start);
+ ac->ac_b_ex.fe_len = len;
+ pa->pa_lstart += len;
+ pa->pa_pstart += len;
+ pa->pa_free -= len;
+ pa->pa_len -= len;
+ ac->ac_status = AC_STATUS_FOUND;
+ ac->ac_pa = pa;
+ return;
+ }
+
+ pa->pa_regular = 1;
/* found preallocated blocks, use them */
start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
@@ -3888,6 +3907,23 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return 0;
+ if (ac->ac_flags & EXT4_MB_VERY_DENSE) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (!pa->pa_deleted && pa->pa_free && !pa->pa_regular) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_inode_pa(ac, pa);
+ spin_unlock(&pa->pa_lock);
+ break;
+ }
+ spin_unlock(&pa->pa_lock);
+ }
+ rcu_read_unlock();
+ if (ac->ac_status == AC_STATUS_FOUND)
+ return true;
+ }
+
/* first, try per-file preallocation */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
@@ -4129,7 +4165,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
if (pa->pa_type == MB_GROUP_PA)
grp_blk--;
- grp = ext4_get_group_number(sb, grp_blk);
+ grp = pa->pa_group;
/*
* possible race:
@@ -4220,6 +4256,8 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
+ pa->pa_group = ac->ac_b_ex.fe_group;
+ pa->pa_regular = 0;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
@@ -4282,6 +4320,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
pa->pa_lstart = pa->pa_pstart;
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
+ pa->pa_group = ac->ac_b_ex.fe_group;
atomic_set(&pa->pa_count, 1);
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 608a7020..cabbc02f 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -114,6 +114,8 @@ struct ext4_prealloc_space {
ext4_lblk_t pa_lstart; /* log. block */
ext4_grpblk_t pa_len; /* len of preallocated chunk */
ext4_grpblk_t pa_free; /* how many blocks are free */
+ ext4_grpblk_t pa_group;
+ unsigned short pa_regular;
unsigned short pa_type; /* pa type. inode or group */
unsigned short pa_error;
spinlock_t *pa_obj_lock;
@@ -170,7 +172,7 @@ struct ext4_allocation_context {
__u16 ac_found;
__u16 ac_tail;
__u16 ac_buddy;
- __u16 ac_flags; /* allocation hints */
+ __u32 ac_flags; /* allocation hints */
__u8 ac_status;
__u8 ac_criteria;
__u8 ac_2order; /* if request is to allocate 2^N blocks and
--
2.34.1