[v1] Live Migration Acceleration with IAA Compression

[PATCH 4/5] migration iaa-compress: Add IAA initialization and deinitialization

Posted by Yuan Liu 2 years, 3 months ago

This patch defines the structure for IAA jobs related to data
compression and decompression, as well as the initialization and
deinitialization processes for IAA.

Signed-off-by: Yuan Liu <yuan1.liu@intel.com>
Reviewed-by: Nanhai Zou <nanhai.zou@intel.com>
---
 migration/iaa-ram-compress.c | 152 +++++++++++++++++++++++++++++++++++
 migration/iaa-ram-compress.h |  20 +++++
 migration/meson.build        |   1 +
 migration/ram-compress.c     |  21 +++--
 4 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 migration/iaa-ram-compress.c
 create mode 100644 migration/iaa-ram-compress.h

diff --git a/migration/iaa-ram-compress.c b/migration/iaa-ram-compress.c
new file mode 100644
index 0000000000..da45952594
--- /dev/null
+++ b/migration/iaa-ram-compress.c
@@ -0,0 +1,152 @@
+/*
+ * QEMU IAA compression support
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *  Written by:
+ *  Yuan Liu<yuan1.liu@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
+#include "migration.h"
+#include "options.h"
+#include "io/channel-null.h"
+#include "exec/target_page.h"
+#include "exec/ramblock.h"
+#include "iaa-ram-compress.h"
+#include "qpl/qpl.h"
+
+/* The IAA work queue maximum depth */
+#define IAA_JOB_NUM (512)
+
+typedef struct {
+    CompressResult result;
+    ram_addr_t offset; /* The offset of the compressed page in the block */
+    RAMBlock *block; /* The block of the compressed page */
+} iaa_comp_param;
+
+typedef struct {
+    uint8_t *host; /* Target address for decompression page */
+} iaa_decomp_param;
+
+typedef struct IaaJob {
+    QSIMPLEQ_ENTRY(IaaJob) entry;
+    bool is_compression;
+    uint32_t in_len;
+    uint32_t out_len;
+    uint8_t *in_buf;
+    uint8_t *out_buf;
+    qpl_job *qpl; /* It is used to submit (de)compression work to IAA */
+    union {
+        iaa_comp_param comp;
+        iaa_decomp_param decomp;
+    } param;
+} IaaJob;
+
+typedef struct IaaJobPool {
+    uint32_t pos;
+    uint32_t cnt;
+    IaaJob *jobs[IAA_JOB_NUM];
+    uint8_t *job_in_buf; /* The IAA device input buffers for all IAA jobs */
+    uint8_t *job_out_buf; /* The IAA device output buffers for all IAA jobs */
+    size_t buf_size;
+} IaaJobPool;
+
+static IaaJobPool iaa_job_pool;
+/* This is used to record jobs that have been submitted but not yet completed */
+static QSIMPLEQ_HEAD(, IaaJob) polling_queue =
+                                   QSIMPLEQ_HEAD_INITIALIZER(polling_queue);
+
+void iaa_compress_deinit(void)
+{
+    for (int i = 0; i < IAA_JOB_NUM; i++) {
+        if (iaa_job_pool.jobs[i]) {
+            if (iaa_job_pool.jobs[i]->qpl) {
+                qpl_fini_job(iaa_job_pool.jobs[i]->qpl);
+                g_free(iaa_job_pool.jobs[i]->qpl);
+            }
+            g_free(iaa_job_pool.jobs[i]);
+        }
+    }
+    if (iaa_job_pool.job_in_buf) {
+        munmap(iaa_job_pool.job_in_buf, iaa_job_pool.buf_size);
+        iaa_job_pool.job_in_buf = NULL;
+    }
+    if (iaa_job_pool.job_out_buf) {
+        munmap(iaa_job_pool.job_out_buf, iaa_job_pool.buf_size);
+        iaa_job_pool.job_out_buf = NULL;
+    }
+}
+
+int iaa_compress_init(bool is_decompression)
+{
+    qpl_status status;
+    IaaJob *job = NULL;
+    uint32_t qpl_hw_size = 0;
+    int flags = MAP_PRIVATE | MAP_POPULATE | MAP_ANONYMOUS;
+    size_t buf_size = IAA_JOB_NUM * qemu_target_page_size();
+
+    QSIMPLEQ_INIT(&polling_queue);
+    memset(&iaa_job_pool, 0, sizeof(IaaJobPool));
+    iaa_job_pool.buf_size = buf_size;
+    iaa_job_pool.job_out_buf = mmap(NULL, buf_size, PROT_READ | PROT_WRITE,
+                                    flags, -1, 0);
+    if (iaa_job_pool.job_out_buf == MAP_FAILED) {
+        error_report("Failed to allocate iaa output buffer, error %s",
+                     strerror(errno));
+        return -1;
+    }
+    /*
+     * There is no need to allocate an input buffer for the compression
+     * function, the IAA hardware can directly access the virtual machine
+     * memory through the host address through Share Virtual Memory(SVM)
+     */
+    if (is_decompression) {
+        iaa_job_pool.job_in_buf = mmap(NULL, buf_size, PROT_READ | PROT_WRITE,
+                                       flags, -1, 0);
+        if (iaa_job_pool.job_in_buf == MAP_FAILED) {
+            error_report("Failed to allocate iaa input buffer, error %s",
+                         strerror(errno));
+            goto init_err;
+        }
+    }
+    status = qpl_get_job_size(qpl_path_hardware, &qpl_hw_size);
+    if (status != QPL_STS_OK) {
+        error_report("Failed to initialize iaa hardware, error %d", status);
+        goto init_err;
+    }
+    for (int i = 0; i < IAA_JOB_NUM; i++) {
+        size_t buf_offset = qemu_target_page_size() * i;
+        job = g_try_malloc0(sizeof(IaaJob));
+        if (!job) {
+            error_report("Failed to allocate iaa job memory, error %s",
+                         strerror(errno));
+            goto init_err;
+        }
+        iaa_job_pool.jobs[i] = job;
+        job->qpl = g_try_malloc0(qpl_hw_size);
+        if (!job->qpl) {
+            error_report("Failed to allocate iaa qpl memory, error %s",
+                         strerror(errno));
+            goto init_err;
+        }
+        if (is_decompression) {
+            job->in_buf = iaa_job_pool.job_in_buf + buf_offset;
+        }
+        job->out_buf = iaa_job_pool.job_out_buf + buf_offset;
+        status = qpl_init_job(qpl_path_hardware, job->qpl);
+        if (status != QPL_STS_OK) {
+            error_report("Failed to initialize iaa qpl, error %d", status);
+            goto init_err;
+        }
+    }
+    return 0;
+init_err:
+    iaa_compress_deinit();
+    return -1;
+}
diff --git a/migration/iaa-ram-compress.h b/migration/iaa-ram-compress.h
new file mode 100644
index 0000000000..27998b255b
--- /dev/null
+++ b/migration/iaa-ram-compress.h
@@ -0,0 +1,20 @@
+/*
+ * QEMU IAA compression support
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *  Written by:
+ *  Yuan Liu<yuan1.liu@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MIGRATION_IAA_COMPRESS_H
+#define QEMU_MIGRATION_IAA_COMPRESS_H
+#include "qemu-file.h"
+#include "ram-compress.h"
+
+int iaa_compress_init(bool is_decompression);
+void iaa_compress_deinit(void);
+#endif
diff --git a/migration/meson.build b/migration/meson.build
index 92b1cc4297..9131815420 100644
--- a/migration/meson.build
+++ b/migration/meson.build
@@ -40,6 +40,7 @@ if get_option('live_block_migration').allowed()
   system_ss.add(files('block.c'))
 endif
 system_ss.add(when: zstd, if_true: files('multifd-zstd.c'))
+system_ss.add(when: qpl, if_true: files('iaa-ram-compress.c'))
 
 specific_ss.add(when: 'CONFIG_SYSTEM_ONLY',
                 if_true: files('ram.c',
diff --git a/migration/ram-compress.c b/migration/ram-compress.c
index 47357352f7..acc511ce57 100644
--- a/migration/ram-compress.c
+++ b/migration/ram-compress.c
@@ -30,6 +30,9 @@
 #include "qemu/cutils.h"
 
 #include "ram-compress.h"
+#ifdef CONFIG_QPL
+#include "iaa-ram-compress.h"
+#endif
 
 #include "qemu/error-report.h"
 #include "migration.h"
@@ -484,10 +487,11 @@ int ram_compress_save_setup(void)
     if (!migrate_compress()) {
         return 0;
     }
+#ifdef CONFIG_QPL
     if (migrate_compress_with_iaa()) {
-        /* Implement in next patch */
-        return 0;
+        return iaa_compress_init(false);
     }
+#endif
     return compress_threads_save_setup();
 }
 
@@ -496,10 +500,12 @@ void ram_compress_save_cleanup(void)
     if (!migrate_compress()) {
         return;
     }
+#ifdef CONFIG_QPL
     if (migrate_compress_with_iaa()) {
-        /* Implement in next patch */
+        iaa_compress_deinit();
         return;
     }
+#endif
     compress_threads_save_cleanup();
 }
 
@@ -516,9 +522,11 @@ int ram_compress_load_setup(QEMUFile *f)
     if (!migrate_compress()) {
         return 0;
     }
+#ifdef CONFIG_QPL
     if (migrate_compress_with_iaa()) {
-        /* Implement in next patch */
+        return iaa_compress_init(true);
     }
+#endif
     return compress_threads_load_setup(f);
 }
 
@@ -527,8 +535,11 @@ void ram_compress_load_cleanup(void)
     if (!migrate_compress()) {
         return;
     }
+#ifdef CONFIG_QPL
     if (migrate_compress_with_iaa()) {
-        /* Implement in next patch */
+        iaa_compress_deinit();
+        return;
     }
+#endif
     compress_threads_load_cleanup();
 }
-- 
2.39.3

Re: [PATCH 4/5] migration iaa-compress: Add IAA initialization and deinitialization

Posted by Juan Quintela 2 years, 3 months ago

Yuan Liu <yuan1.liu@intel.com> wrote:
> This patch defines the structure for IAA jobs related to data
> compression and decompression, as well as the initialization and
> deinitialization processes for IAA.
>
> Signed-off-by: Yuan Liu <yuan1.liu@intel.com>
> Reviewed-by: Nanhai Zou <nanhai.zou@intel.com>

You should be using orderfile.

$ less .git/config
...
[diff]
        orderFile = scripts/git.orderfile

So .h and friends came first in patches.

> diff --git a/migration/ram-compress.c b/migration/ram-compress.c
> index 47357352f7..acc511ce57 100644
> --- a/migration/ram-compress.c
> +++ b/migration/ram-compress.c
> @@ -30,6 +30,9 @@
>  #include "qemu/cutils.h"
>  
>  #include "ram-compress.h"
> +#ifdef CONFIG_QPL
> +#include "iaa-ram-compress.h"
> +#endif
>  
>  #include "qemu/error-report.h"
>  #include "migration.h"
> @@ -484,10 +487,11 @@ int ram_compress_save_setup(void)
>      if (!migrate_compress()) {
>          return 0;
>      }
> +#ifdef CONFIG_QPL
>      if (migrate_compress_with_iaa()) {
> -        /* Implement in next patch */
> -        return 0;
> +        return iaa_compress_init(false);
>      }
> +#endif
>      return compress_threads_save_setup();
>  }
>  
> @@ -496,10 +500,12 @@ void ram_compress_save_cleanup(void)
>      if (!migrate_compress()) {
>          return;
>      }
> +#ifdef CONFIG_QPL
>      if (migrate_compress_with_iaa()) {
> -        /* Implement in next patch */
> +        iaa_compress_deinit();
>          return;
>      }
> +#endif
>      compress_threads_save_cleanup();
>  }
>  
> @@ -516,9 +522,11 @@ int ram_compress_load_setup(QEMUFile *f)
>      if (!migrate_compress()) {
>          return 0;
>      }
> +#ifdef CONFIG_QPL
>      if (migrate_compress_with_iaa()) {
> -        /* Implement in next patch */
> +        return iaa_compress_init(true);
>      }
> +#endif
>      return compress_threads_load_setup(f);
>  }
>  
> @@ -527,8 +535,11 @@ void ram_compress_load_cleanup(void)
>      if (!migrate_compress()) {
>          return;
>      }
> +#ifdef CONFIG_QPL
>      if (migrate_compress_with_iaa()) {
> -        /* Implement in next patch */
> +        iaa_compress_deinit();
> +        return;
>      }
> +#endif
>      compress_threads_load_cleanup();
>  }

I think it would be easier to understand and implement if you drop
patch3, and just add at each place that there is a:

compress_threads_load_cleanup()

a

iaa_load_cleanup()

And the same for everything else.

Later, Juan.

[PATCH 1/5] configure: add qpl meson option
[PATCH 2/5] qapi/migration: Introduce compress-with-iaa migration parameter
[PATCH 3/5] ram compress: Refactor ram compression functions
[PATCH 4/5] migration iaa-compress: Add IAA initialization and deinitialization
[PATCH 5/5] migration iaa-compress: Implement IAA compression