[PATCH v2 08/16] ext4: merge freed extent with existing extents before insertion

Baokun Li posted 16 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v2 08/16] ext4: merge freed extent with existing extents before insertion
Posted by Baokun Li 3 months, 2 weeks ago
Attempt to merge ext4_free_data with already inserted free extents prior
to adding new ones. This strategy drastically cuts down the number of
times locks are held.

For example, if prev, new, and next extents are all mergeable, the existing
code (before this patch) requires acquiring the s_md_lock three times:

  prev merge into new and free prev // hold lock
  next merge into new and free next // hold lock
  insert new // hold lock

After the patch, it only needs to be acquired once:

  new merge next and free new // no lock
  next merge into prev and free prev // hold lock

Performance test data follows:

Test: Running will-it-scale/fallocate2 on CPU-bound containers.
Observation: Average fallocate operations per container per second.

                   | Kunpeng 920 / 512GB -P80|  AMD 9654 / 1536GB -P96 |
 Disk: 960GB SSD   |-------------------------|-------------------------|
                   | base  |    patched      | base  |    patched      |
-------------------|-------|-----------------|-------|-----------------|
mb_optimize_scan=0 | 20982 | 21157 (+0.8%)   | 50629 | 50420 (-0.4%)   |
mb_optimize_scan=1 | 10703 | 12896 (+20.4%)  | 14856 | 17273 (+16.2%)  |

Signed-off-by: Baokun Li <libaokun1@huawei.com>
---
 fs/ext4/mballoc.c | 113 +++++++++++++++++++++++++++++++---------------
 1 file changed, 76 insertions(+), 37 deletions(-)

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5410fb3688ee..94950b07a577 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6298,28 +6298,63 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
  * are contiguous, AND the extents were freed by the same transaction,
  * AND the blocks are associated with the same group.
  */
-static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
-					struct ext4_free_data *entry,
-					struct ext4_free_data *new_entry,
-					struct rb_root *entry_rb_root)
+static inline bool
+ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1,
+				 struct ext4_free_data *entry2)
 {
-	if ((entry->efd_tid != new_entry->efd_tid) ||
-	    (entry->efd_group != new_entry->efd_group))
-		return;
-	if (entry->efd_start_cluster + entry->efd_count ==
-	    new_entry->efd_start_cluster) {
-		new_entry->efd_start_cluster = entry->efd_start_cluster;
-		new_entry->efd_count += entry->efd_count;
-	} else if (new_entry->efd_start_cluster + new_entry->efd_count ==
-		   entry->efd_start_cluster) {
-		new_entry->efd_count += entry->efd_count;
-	} else
-		return;
+	if (entry1->efd_tid != entry2->efd_tid)
+		return false;
+	if (entry1->efd_start_cluster + entry1->efd_count !=
+	    entry2->efd_start_cluster)
+		return false;
+	if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group))
+		return false;
+	return true;
+}
+
+static inline void
+ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root,
+			 struct ext4_free_data *entry1,
+			 struct ext4_free_data *entry2)
+{
+	entry1->efd_count += entry2->efd_count;
 	spin_lock(&sbi->s_md_lock);
-	list_del(&entry->efd_list);
+	list_del(&entry2->efd_list);
 	spin_unlock(&sbi->s_md_lock);
-	rb_erase(&entry->efd_node, entry_rb_root);
-	kmem_cache_free(ext4_free_data_cachep, entry);
+	rb_erase(&entry2->efd_node, root);
+	kmem_cache_free(ext4_free_data_cachep, entry2);
+}
+
+static inline void
+ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root,
+				 struct ext4_free_data *entry)
+{
+	struct ext4_free_data *prev;
+	struct rb_node *node;
+
+	node = rb_prev(&entry->efd_node);
+	if (!node)
+		return;
+
+	prev = rb_entry(node, struct ext4_free_data, efd_node);
+	if (ext4_freed_extents_can_be_merged(prev, entry))
+		ext4_merge_freed_extents(sbi, root, prev, entry);
+}
+
+static inline void
+ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root,
+				 struct ext4_free_data *entry)
+{
+	struct ext4_free_data *next;
+	struct rb_node *node;
+
+	node = rb_next(&entry->efd_node);
+	if (!node)
+		return;
+
+	next = rb_entry(node, struct ext4_free_data, efd_node);
+	if (ext4_freed_extents_can_be_merged(entry, next))
+		ext4_merge_freed_extents(sbi, root, entry, next);
 }
 
 static noinline_for_stack void
@@ -6329,11 +6364,12 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 	ext4_group_t group = e4b->bd_group;
 	ext4_grpblk_t cluster;
 	ext4_grpblk_t clusters = new_entry->efd_count;
-	struct ext4_free_data *entry;
+	struct ext4_free_data *entry = NULL;
 	struct ext4_group_info *db = e4b->bd_info;
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct rb_node **n = &db->bb_free_root.rb_node, *node;
+	struct rb_root *root = &db->bb_free_root;
+	struct rb_node **n = &root->rb_node;
 	struct rb_node *parent = NULL, *new_node;
 
 	BUG_ON(!ext4_handle_valid(handle));
@@ -6369,27 +6405,30 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
 		}
 	}
 
-	rb_link_node(new_node, parent, n);
-	rb_insert_color(new_node, &db->bb_free_root);
-
-	/* Now try to see the extent can be merged to left and right */
-	node = rb_prev(new_node);
-	if (node) {
-		entry = rb_entry(node, struct ext4_free_data, efd_node);
-		ext4_try_merge_freed_extent(sbi, entry, new_entry,
-					    &(db->bb_free_root));
+	atomic_add(clusters, &sbi->s_mb_free_pending);
+	if (!entry)
+		goto insert;
+
+	/* Now try to see the extent can be merged to prev and next */
+	if (ext4_freed_extents_can_be_merged(new_entry, entry)) {
+		entry->efd_start_cluster = cluster;
+		entry->efd_count += new_entry->efd_count;
+		kmem_cache_free(ext4_free_data_cachep, new_entry);
+		ext4_try_merge_freed_extent_prev(sbi, root, entry);
+		return;
 	}
-
-	node = rb_next(new_node);
-	if (node) {
-		entry = rb_entry(node, struct ext4_free_data, efd_node);
-		ext4_try_merge_freed_extent(sbi, entry, new_entry,
-					    &(db->bb_free_root));
+	if (ext4_freed_extents_can_be_merged(entry, new_entry)) {
+		entry->efd_count += new_entry->efd_count;
+		kmem_cache_free(ext4_free_data_cachep, new_entry);
+		ext4_try_merge_freed_extent_next(sbi, root, entry);
+		return;
 	}
+insert:
+	rb_link_node(new_node, parent, n);
+	rb_insert_color(new_node, root);
 
 	spin_lock(&sbi->s_md_lock);
 	list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]);
-	atomic_add(clusters, &sbi->s_mb_free_pending);
 	spin_unlock(&sbi->s_md_lock);
 }
 
-- 
2.46.1
Re: [PATCH v2 08/16] ext4: merge freed extent with existing extents before insertion
Posted by Jan Kara 3 months, 1 week ago
On Mon 23-06-25 15:32:56, Baokun Li wrote:
> Attempt to merge ext4_free_data with already inserted free extents prior
> to adding new ones. This strategy drastically cuts down the number of
> times locks are held.
> 
> For example, if prev, new, and next extents are all mergeable, the existing
> code (before this patch) requires acquiring the s_md_lock three times:
> 
>   prev merge into new and free prev // hold lock
>   next merge into new and free next // hold lock
>   insert new // hold lock
> 
> After the patch, it only needs to be acquired once:
> 
>   new merge next and free new // no lock
>   next merge into prev and free prev // hold lock
> 
> Performance test data follows:
> 
> Test: Running will-it-scale/fallocate2 on CPU-bound containers.
> Observation: Average fallocate operations per container per second.
> 
>                    | Kunpeng 920 / 512GB -P80|  AMD 9654 / 1536GB -P96 |
>  Disk: 960GB SSD   |-------------------------|-------------------------|
>                    | base  |    patched      | base  |    patched      |
> -------------------|-------|-----------------|-------|-----------------|
> mb_optimize_scan=0 | 20982 | 21157 (+0.8%)   | 50629 | 50420 (-0.4%)   |
> mb_optimize_scan=1 | 10703 | 12896 (+20.4%)  | 14856 | 17273 (+16.2%)  |
> 
> Signed-off-by: Baokun Li <libaokun1@huawei.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/ext4/mballoc.c | 113 +++++++++++++++++++++++++++++++---------------
>  1 file changed, 76 insertions(+), 37 deletions(-)
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 5410fb3688ee..94950b07a577 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -6298,28 +6298,63 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
>   * are contiguous, AND the extents were freed by the same transaction,
>   * AND the blocks are associated with the same group.
>   */
> -static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
> -					struct ext4_free_data *entry,
> -					struct ext4_free_data *new_entry,
> -					struct rb_root *entry_rb_root)
> +static inline bool
> +ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1,
> +				 struct ext4_free_data *entry2)
>  {
> -	if ((entry->efd_tid != new_entry->efd_tid) ||
> -	    (entry->efd_group != new_entry->efd_group))
> -		return;
> -	if (entry->efd_start_cluster + entry->efd_count ==
> -	    new_entry->efd_start_cluster) {
> -		new_entry->efd_start_cluster = entry->efd_start_cluster;
> -		new_entry->efd_count += entry->efd_count;
> -	} else if (new_entry->efd_start_cluster + new_entry->efd_count ==
> -		   entry->efd_start_cluster) {
> -		new_entry->efd_count += entry->efd_count;
> -	} else
> -		return;
> +	if (entry1->efd_tid != entry2->efd_tid)
> +		return false;
> +	if (entry1->efd_start_cluster + entry1->efd_count !=
> +	    entry2->efd_start_cluster)
> +		return false;
> +	if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group))
> +		return false;
> +	return true;
> +}
> +
> +static inline void
> +ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root,
> +			 struct ext4_free_data *entry1,
> +			 struct ext4_free_data *entry2)
> +{
> +	entry1->efd_count += entry2->efd_count;
>  	spin_lock(&sbi->s_md_lock);
> -	list_del(&entry->efd_list);
> +	list_del(&entry2->efd_list);
>  	spin_unlock(&sbi->s_md_lock);
> -	rb_erase(&entry->efd_node, entry_rb_root);
> -	kmem_cache_free(ext4_free_data_cachep, entry);
> +	rb_erase(&entry2->efd_node, root);
> +	kmem_cache_free(ext4_free_data_cachep, entry2);
> +}
> +
> +static inline void
> +ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root,
> +				 struct ext4_free_data *entry)
> +{
> +	struct ext4_free_data *prev;
> +	struct rb_node *node;
> +
> +	node = rb_prev(&entry->efd_node);
> +	if (!node)
> +		return;
> +
> +	prev = rb_entry(node, struct ext4_free_data, efd_node);
> +	if (ext4_freed_extents_can_be_merged(prev, entry))
> +		ext4_merge_freed_extents(sbi, root, prev, entry);
> +}
> +
> +static inline void
> +ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root,
> +				 struct ext4_free_data *entry)
> +{
> +	struct ext4_free_data *next;
> +	struct rb_node *node;
> +
> +	node = rb_next(&entry->efd_node);
> +	if (!node)
> +		return;
> +
> +	next = rb_entry(node, struct ext4_free_data, efd_node);
> +	if (ext4_freed_extents_can_be_merged(entry, next))
> +		ext4_merge_freed_extents(sbi, root, entry, next);
>  }
>  
>  static noinline_for_stack void
> @@ -6329,11 +6364,12 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
>  	ext4_group_t group = e4b->bd_group;
>  	ext4_grpblk_t cluster;
>  	ext4_grpblk_t clusters = new_entry->efd_count;
> -	struct ext4_free_data *entry;
> +	struct ext4_free_data *entry = NULL;
>  	struct ext4_group_info *db = e4b->bd_info;
>  	struct super_block *sb = e4b->bd_sb;
>  	struct ext4_sb_info *sbi = EXT4_SB(sb);
> -	struct rb_node **n = &db->bb_free_root.rb_node, *node;
> +	struct rb_root *root = &db->bb_free_root;
> +	struct rb_node **n = &root->rb_node;
>  	struct rb_node *parent = NULL, *new_node;
>  
>  	BUG_ON(!ext4_handle_valid(handle));
> @@ -6369,27 +6405,30 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
>  		}
>  	}
>  
> -	rb_link_node(new_node, parent, n);
> -	rb_insert_color(new_node, &db->bb_free_root);
> -
> -	/* Now try to see the extent can be merged to left and right */
> -	node = rb_prev(new_node);
> -	if (node) {
> -		entry = rb_entry(node, struct ext4_free_data, efd_node);
> -		ext4_try_merge_freed_extent(sbi, entry, new_entry,
> -					    &(db->bb_free_root));
> +	atomic_add(clusters, &sbi->s_mb_free_pending);
> +	if (!entry)
> +		goto insert;
> +
> +	/* Now try to see the extent can be merged to prev and next */
> +	if (ext4_freed_extents_can_be_merged(new_entry, entry)) {
> +		entry->efd_start_cluster = cluster;
> +		entry->efd_count += new_entry->efd_count;
> +		kmem_cache_free(ext4_free_data_cachep, new_entry);
> +		ext4_try_merge_freed_extent_prev(sbi, root, entry);
> +		return;
>  	}
> -
> -	node = rb_next(new_node);
> -	if (node) {
> -		entry = rb_entry(node, struct ext4_free_data, efd_node);
> -		ext4_try_merge_freed_extent(sbi, entry, new_entry,
> -					    &(db->bb_free_root));
> +	if (ext4_freed_extents_can_be_merged(entry, new_entry)) {
> +		entry->efd_count += new_entry->efd_count;
> +		kmem_cache_free(ext4_free_data_cachep, new_entry);
> +		ext4_try_merge_freed_extent_next(sbi, root, entry);
> +		return;
>  	}
> +insert:
> +	rb_link_node(new_node, parent, n);
> +	rb_insert_color(new_node, root);
>  
>  	spin_lock(&sbi->s_md_lock);
>  	list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]);
> -	atomic_add(clusters, &sbi->s_mb_free_pending);
>  	spin_unlock(&sbi->s_md_lock);
>  }
>  
> -- 
> 2.46.1
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR