block/qcow2: Improve I/O performance in write-through cache mode

[RFC 2/2] block/qcow2: Improve I/O performance in write-through cache mode for qcow2 driver

Posted by zhangjiaji via 3 months, 2 weeks ago

From: Zhu Yangyang <zhuyangyang14@huawei.com>

Optimize IO performance in writethrough cache mode by immediately performing
write cache after updating the L2 entry and only flushing the portions that
have actually changed.

Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
---
 block/qcow2-cache.c   |  7 +++++++
 block/qcow2-cluster.c | 15 +++++++++++++++
 block/qcow2.h         |  3 ++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index b5378d003d..3491cea7fb 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -463,6 +463,13 @@ void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
     c->entries[i].dirty = true;
 }
 
+bool qcow2_cache_is_dirty(Qcow2Cache *c, void *table)
+{
+    int i = qcow2_cache_get_table_idx(c, table);
+    assert(c->entries[i].offset != 0);
+    return c->entries[i].dirty;
+}
+
 void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
 {
     int i;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ce8c0076b3..95b0e44c6e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1032,6 +1032,10 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, *l2_slice;
     uint64_t cluster_offset = m->alloc_offset;
+    bool part_flush = false;
+    /* I haven't figured out yet how to perceive this IO
+     * as a writethrough cache mode. */
+    bool writethrough = true;
 
     trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
     assert(m->nb_clusters > 0);
@@ -1061,6 +1065,13 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
     if (ret < 0) {
         goto err;
     }
+
+    /* If the cache is clean before qcow2_cache_entry_mark_dirty(),
+     * we can flush only the modified L2 entries..
+     */
+    if (writethrough && !qcow2_cache_is_dirty(s->l2_table_cache, l2_slice)) {
+        part_flush = true;
+    }
     qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
 
     assert(l2_index + m->nb_clusters <= s->l2_slice_size);
@@ -1102,6 +1113,10 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
      }
 
 
+     if (part_flush) {
+        qcow2_write_l2_entry(bs, s->l2_table_cache, l2_slice, l2_index,
+                             m->nb_clusters);
+     }
     qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
     /*
diff --git a/block/qcow2.h b/block/qcow2.h
index b0ba2e1996..8fb59c4e87 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -1000,9 +1000,10 @@ qcow2_cache_create(BlockDriverState *bs, int num_tables, unsigned table_size);
 
 int qcow2_cache_destroy(Qcow2Cache *c);
 
+bool qcow2_cache_is_dirty(Qcow2Cache *c, void *table);
 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_write_l2_entry(BlockDriverState *bs, Qcow2Cache *c, void *l2_tabel,
-                         int l2_index, int nb_clusters)
+                         int l2_index, int nb_clusters);
 int GRAPH_RDLOCK qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int GRAPH_RDLOCK qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
 int GRAPH_RDLOCK qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
-- 
2.33.0

Re: [RFC 2/2] block/qcow2: Improve I/O performance in write-through cache mode for qcow2 driver

Posted by Denis V. Lunev 3 months, 2 weeks ago

On 10/23/25 14:24, zhangjiaji via wrote:
> From: Zhu Yangyang <zhuyangyang14@huawei.com>
>
> Optimize IO performance in writethrough cache mode by immediately performing
> write cache after updating the L2 entry and only flushing the portions that
> have actually changed.
>
> Signed-off-by: Zhu Yangyang <zhuyangyang14@huawei.com>
> ---
>   block/qcow2-cache.c   |  7 +++++++
>   block/qcow2-cluster.c | 15 +++++++++++++++
>   block/qcow2.h         |  3 ++-
>   3 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
> index b5378d003d..3491cea7fb 100644
> --- a/block/qcow2-cache.c
> +++ b/block/qcow2-cache.c
> @@ -463,6 +463,13 @@ void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
>       c->entries[i].dirty = true;
>   }
>   
> +bool qcow2_cache_is_dirty(Qcow2Cache *c, void *table)
> +{
> +    int i = qcow2_cache_get_table_idx(c, table);
> +    assert(c->entries[i].offset != 0);
> +    return c->entries[i].dirty;
> +}
> +
>   void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
>   {
>       int i;
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index ce8c0076b3..95b0e44c6e 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1032,6 +1032,10 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
>       int i, j = 0, l2_index, ret;
>       uint64_t *old_cluster, *l2_slice;
>       uint64_t cluster_offset = m->alloc_offset;
> +    bool part_flush = false;
> +    /* I haven't figured out yet how to perceive this IO
> +     * as a writethrough cache mode. */
> +    bool writethrough = true;
>   
>       trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
>       assert(m->nb_clusters > 0);
> @@ -1061,6 +1065,13 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
>       if (ret < 0) {
>           goto err;
>       }
> +
> +    /* If the cache is clean before qcow2_cache_entry_mark_dirty(),
> +     * we can flush only the modified L2 entries..
> +     */
> +    if (writethrough && !qcow2_cache_is_dirty(s->l2_table_cache, l2_slice)) {
> +        part_flush = true;
> +    }
>       qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
>   
>       assert(l2_index + m->nb_clusters <= s->l2_slice_size);
> @@ -1102,6 +1113,10 @@ int coroutine_fn qcow2_alloc_cluster_link_l2(BlockDriverState *bs,
>        }
>   
>   
> +     if (part_flush) {
> +        qcow2_write_l2_entry(bs, s->l2_table_cache, l2_slice, l2_index,
> +                             m->nb_clusters);
> +     }
>       qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
>   
>       /*
> diff --git a/block/qcow2.h b/block/qcow2.h
> index b0ba2e1996..8fb59c4e87 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -1000,9 +1000,10 @@ qcow2_cache_create(BlockDriverState *bs, int num_tables, unsigned table_size);
>   
>   int qcow2_cache_destroy(Qcow2Cache *c);
>   
> +bool qcow2_cache_is_dirty(Qcow2Cache *c, void *table);
>   void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
>   int qcow2_write_l2_entry(BlockDriverState *bs, Qcow2Cache *c, void *l2_tabel,
> -                         int l2_index, int nb_clusters)
> +                         int l2_index, int nb_clusters);
>   int GRAPH_RDLOCK qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
>   int GRAPH_RDLOCK qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
>   int GRAPH_RDLOCK qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
At my taste this approach is incorrect. I think that we do NOT need to write
dirty L2 entry at all until flush comes from OS. This is what how this
code was written in mind a lot of time ago.

With this approach in mind we will have doubled amount of IOPS in host
to ones from the guest.

If this approach stops working for some reason, that would be
good to check.

Den

[RFC 1/2] block/qcow2-cache: Introduce qcow2_write_l2_entry()
[RFC 2/2] block/qcow2: Improve I/O performance in write-through cache mode for qcow2 driver