Viewing: ext4-mballoc-dense.patch
commit 686dee707f8728aa8ba27bcd4cee69f8fbf7b278
Author: Alex Zhuravlev <bzzz@whamcloud.com>
AuthorDate: Wed Mar 1 21:28:25 2023 +0300
LU-10026 osd-ldiskfs: use preallocation for dense writes
use inode's preallocation chunks as per-inode group preallocation:
just grab the very first available blocks from the window.
Test-Parameters: env=ONLY=1000,ONLY_REPEAT=11 testlist=sanity-compr
Test-Parameters: env=ONLY=fsx,ONLY_REPEAT=11 testlist=sanity-compr
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Change-Id: I9d36701f569f4c6305bc46f3373bfc054fcd61a9
Reviewed-on: https://review.whamcloud.com/50171
--- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/mballoc.h
+++ linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.h
@@ -131,6 +131,8 @@ enum SHIFT_DIRECTION {
ext4_lblk_t pa_lstart; /* log. block */
ext4_grpblk_t pa_len; /* len of preallocated chunk */
ext4_grpblk_t pa_free; /* how many blocks are free */
+ ext4_grpblk_t pa_group;
+ unsigned short pa_regular;
unsigned short pa_type; /* pa type. inode or group */
unsigned short pa_error;
spinlock_t *pa_obj_lock;
@@ -167,7 +167,7 @@ struct ext4_allocation_request {
__u16 ac_found;
__u16 ac_tail;
__u16 ac_buddy;
- __u16 ac_flags; /* allocation hints */
+ __u32 ac_flags; /* allocation hints */
__u8 ac_status;
__u8 ac_criteria;
__u8 ac_2order; /* if request is to allocate 2^N blocks and
--- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/ext4.h
+++ linux-4.18.0-80.1.2.el8_0/fs/ext4/ext4.h
@@ -151,6 +151,7 @@ enum SHIFT_DIRECTION {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
+#define EXT4_MB_VERY_DENSE 0x80000
struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -627,6 +628,7 @@ enum {
/* Caller will submit data before dropping transaction handle. This
* allows jbd2 to avoid submitting data before commit. */
#define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400
+#define EXT4_GET_BLOCKS_VERY_DENSE 0x08000
/*
* The bit position of these flags must not overlap with any of the
--- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/extents.c
+++ linux-4.18.0-80.1.2.el8_0/fs/ext4/extents.c
@@ -4484,6 +4467,8 @@ int ext4_ext_map_blocks(handle_t *han
ar.flags = 0;
if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
ar.flags |= EXT4_MB_HINT_NOPREALLOC;
+ if (flags & EXT4_GET_BLOCKS_VERY_DENSE)
+ ar.flags |= EXT4_MB_VERY_DENSE;
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
ar.flags |= EXT4_MB_DELALLOC_RESERVED;
if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
--- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/mballoc.c
+++ linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.c
@@ -4267,6 +4291,23 @@ ext4_mb_use_inode_pa(struct ext4
ext4_fsblk_t end;
int len;
+ if (!pa->pa_regular && (ac->ac_flags & EXT4_MB_VERY_DENSE ||
+ pa->pa_free != pa->pa_len)) {
+ unsigned int len = ac->ac_o_ex.fe_len;
+ if (len > pa->pa_free)
+ len = pa->pa_free;
+ ext4_get_group_no_and_offset(ac->ac_sb,
+ pa->pa_pstart + (pa->pa_len - pa->pa_free),
+ &ac->ac_b_ex.fe_group,
+ &ac->ac_b_ex.fe_start);
+ ac->ac_b_ex.fe_len = len;
+ pa->pa_free -= len;
+ ac->ac_status = AC_STATUS_FOUND;
+ ac->ac_pa = pa;
+ return;
+ }
+
+ pa->pa_regular = 1;
/* found preallocated blocks, use them */
start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
@@ -4367,6 +4380,23 @@ ext4_mb_use_preallocated(struct ext4
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return false;
+ if (ac->ac_flags & EXT4_MB_VERY_DENSE) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
+ spin_lock(&pa->pa_lock);
+ if (!pa->pa_deleted && pa->pa_free && !pa->pa_regular) {
+ atomic_inc(&pa->pa_count);
+ ext4_mb_use_inode_pa(ac, pa);
+ spin_unlock(&pa->pa_lock);
+ break;
+ }
+ spin_unlock(&pa->pa_lock);
+ }
+ rcu_read_unlock();
+ if (ac->ac_status == AC_STATUS_FOUND)
+ return true;
+ }
+
/* first, try per-file preallocation */
rcu_read_lock();
list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
@@ -4833,7 +4833,7 @@ ext4_mb_put_pa(struct ext4
if (pa->pa_type == MB_GROUP_PA)
grp_blk--;
- grp = ext4_get_group_number(sb, grp_blk);
+ grp = pa->pa_group;
/*
* possible race:
@@ -4894,6 +4894,8 @@ ext4_mb_new_inode_pa(struct ext4
pa->pa_pstart = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
+ pa->pa_group = ac->ac_b_ex.fe_group;
+ pa->pa_regular = 0;
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);
@@ -5004,6 +5005,7 @@ ext4_mb_new_group_pa(struct ext4
pa->pa_lstart = pa->pa_pstart;
pa->pa_len = ac->ac_b_ex.fe_len;
pa->pa_free = pa->pa_len;
+ pa->pa_group = ac->ac_b_ex.fe_group;
spin_lock_init(&pa->pa_lock);
INIT_LIST_HEAD(&pa->pa_inode_list);
INIT_LIST_HEAD(&pa->pa_group_list);