[PATCH V10 1/8] accel/kvm: Extract common KVM vCPU {creation, parking} code

Salil Mehta via posted 8 patches 6 months, 1 week ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Yanan Wang <wangyanan55@huawei.com>, Richard Henderson <richard.henderson@linaro.org>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>
There is a newer version of this series
[PATCH V10 1/8] accel/kvm: Extract common KVM vCPU {creation, parking} code
Posted by Salil Mehta via 6 months, 1 week ago
KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
is spawned. This is common to all the architectures as of now.

Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
support vCPU removal. Therefore, its representative KVM vCPU object/context in
Qemu is parked.

Refactor architecture common logic so that some APIs could be reused by vCPU
Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
with trace events. No functional change is intended here.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Xianglai Li <lixianglai@loongson.cn>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Reviewed-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
---
 accel/kvm/kvm-all.c    | 97 ++++++++++++++++++++++++++++--------------
 accel/kvm/kvm-cpus.h   | 23 ++++++++++
 accel/kvm/trace-events |  5 ++-
 3 files changed, 92 insertions(+), 33 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c0be9f5eed..a8f93078dc 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -340,14 +340,73 @@ err:
     return ret;
 }
 
+void kvm_park_vcpu(CPUState *cpu)
+{
+    struct KVMParkedVcpu *vcpu;
+
+    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+    vcpu = g_malloc0(sizeof(*vcpu));
+    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
+    vcpu->kvm_fd = cpu->kvm_fd;
+    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
+{
+    struct KVMParkedVcpu *cpu;
+
+    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
+        if (cpu->vcpu_id == vcpu_id) {
+            int kvm_fd;
+
+            trace_kvm_unpark_vcpu(vcpu_id);
+
+            QLIST_REMOVE(cpu, node);
+            kvm_fd = cpu->kvm_fd;
+            g_free(cpu);
+            return kvm_fd;
+        }
+    }
+
+    return -ENOENT;
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
+    KVMState *s = kvm_state;
+    int kvm_fd;
+
+    /* check if the KVM vCPU already exist but is parked */
+    kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
+    if (kvm_fd < 0) {
+        /* vCPU not parked: create a new KVM vCPU */
+        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+        if (kvm_fd < 0) {
+            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
+            return kvm_fd;
+        }
+    }
+
+    trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
+
+    cpu->kvm_fd = kvm_fd;
+    cpu->kvm_state = s;
+    cpu->vcpu_dirty = true;
+    cpu->dirty_pages = 0;
+    cpu->throttle_us_per_full = 0;
+
+    return 0;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
     long mmap_size;
-    struct KVMParkedVcpu *vcpu = NULL;
     int ret = 0;
 
-    trace_kvm_destroy_vcpu();
+    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
     ret = kvm_arch_destroy_vcpu(cpu);
     if (ret < 0) {
@@ -373,10 +432,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
         }
     }
 
-    vcpu = g_malloc0(sizeof(*vcpu));
-    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
-    vcpu->kvm_fd = cpu->kvm_fd;
-    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+    kvm_park_vcpu(cpu);
 err:
     return ret;
 }
@@ -389,24 +445,6 @@ void kvm_destroy_vcpu(CPUState *cpu)
     }
 }
 
-static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
-{
-    struct KVMParkedVcpu *cpu;
-
-    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
-        if (cpu->vcpu_id == vcpu_id) {
-            int kvm_fd;
-
-            QLIST_REMOVE(cpu, node);
-            kvm_fd = cpu->kvm_fd;
-            g_free(cpu);
-            return kvm_fd;
-        }
-    }
-
-    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
-}
-
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
 {
     KVMState *s = kvm_state;
@@ -415,19 +453,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
     trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+    ret = kvm_create_vcpu(cpu);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
+        error_setg_errno(errp, -ret,
+                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
                          kvm_arch_vcpu_id(cpu));
         goto err;
     }
 
-    cpu->kvm_fd = ret;
-    cpu->kvm_state = s;
-    cpu->vcpu_dirty = true;
-    cpu->dirty_pages = 0;
-    cpu->throttle_us_per_full = 0;
-
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         ret = mmap_size;
diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
index ca40add32c..2e6bb38b5d 100644
--- a/accel/kvm/kvm-cpus.h
+++ b/accel/kvm/kvm-cpus.h
@@ -22,5 +22,28 @@ bool kvm_supports_guest_debug(void);
 int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
 int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
 void kvm_remove_all_breakpoints(CPUState *cpu);
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
 
+/**
+ * kvm_park_vcpu - Park QEMU KVM vCPU context
+ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
+ *
+ * @returns: none
+ */
+void kvm_park_vcpu(CPUState *cpu);
+
+/**
+ * kvm_unpark_vcpu - unpark QEMU KVM vCPU context
+ * @s: KVM State
+ * @cpu: Architecture vCPU ID of the parked vCPU
+ *
+ * @returns: KVM fd
+ */
+int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
 #endif /* KVM_CPUS_H */
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index 681ccb667d..bd43a0ef26 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
 kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
 kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d"
+kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_unpark_vcpu(unsigned long arch_cpu_id) "id: %lu"
 kvm_irqchip_commit_routes(void) ""
 kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
 kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
@@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
 kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
 kvm_dirty_ring_reaper_kick(const char *reason) "%s"
 kvm_dirty_ring_flush(int finished) "%d"
-kvm_destroy_vcpu(void) ""
 kvm_failed_get_vcpu_mmap_size(void) ""
 kvm_cpu_exec(void) ""
 kvm_interrupt_exit_request(void) ""
-- 
2.34.1
Re: [PATCH V10 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code
Posted by Nicholas Piggin 6 months, 1 week ago
On Tue May 21, 2024 at 9:32 AM AEST, Salil Mehta wrote:
> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
> is spawned. This is common to all the architectures as of now.
>
> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
> support vCPU removal. Therefore, its representative KVM vCPU object/context in
> Qemu is parked.
>
> Refactor architecture common logic so that some APIs could be reused by vCPU
> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
> with trace events. No functional change is intended here.
>
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Tested-by: Xianglai Li <lixianglai@loongson.cn>
> Tested-by: Miguel Luis <miguel.luis@oracle.com>
> Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
> Reviewed-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> ---
>  accel/kvm/kvm-all.c    | 97 ++++++++++++++++++++++++++++--------------
>  accel/kvm/kvm-cpus.h   | 23 ++++++++++
>  accel/kvm/trace-events |  5 ++-
>  3 files changed, 92 insertions(+), 33 deletions(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index c0be9f5eed..a8f93078dc 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -340,14 +340,73 @@ err:
>      return ret;
>  }
>  
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    struct KVMParkedVcpu *vcpu;
> +
> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +
> +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id)
> +{
> +    struct KVMParkedVcpu *cpu;
> +
> +    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
> +        if (cpu->vcpu_id == vcpu_id) {
> +            int kvm_fd;
> +
> +            trace_kvm_unpark_vcpu(vcpu_id);

Just an aside, but unfortunately tracing is not entirely consistent.
Often a function-level trace point is done at the beginning of the
function regardless of the result. But I actually like this style
of tracing at the end and providing result too. OTOH you don't see
the -ENOENT case.

In any case it's nice to have something here.

Other unforunate thing is some confusion between attaching a KVM
context for QEMU vCPU, and actually making the KVM_CREATE_VCPU
ioctl call, and kvm_create_vcpu is not the counterpart of
kvm_destroy_vcpu, etc.. It is not your fault the existing naming
makes this a bit confusing. Fortunately it's pretty well contained
to small amount of code.

I hate to nitpick it but since the functions are being exported,
would it be a better name somthing like kvm_attach_vcpu()?

Just a thought, but no big deal. Either way,

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> +
> +            QLIST_REMOVE(cpu, node);
> +            kvm_fd = cpu->kvm_fd;
> +            g_free(cpu);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    return -ENOENT;
> +}
> +
> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
> +
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->vcpu_dirty = true;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;
> +
> +    return 0;
> +}
> +
>  static int do_kvm_destroy_vcpu(CPUState *cpu)
>  {
>      KVMState *s = kvm_state;
>      long mmap_size;
> -    struct KVMParkedVcpu *vcpu = NULL;
>      int ret = 0;
>  
> -    trace_kvm_destroy_vcpu();
> +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  
>      ret = kvm_arch_destroy_vcpu(cpu);
>      if (ret < 0) {
> @@ -373,10 +432,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>          }
>      }
>  
> -    vcpu = g_malloc0(sizeof(*vcpu));
> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> -    vcpu->kvm_fd = cpu->kvm_fd;
> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +    kvm_park_vcpu(cpu);
>  err:
>      return ret;
>  }
> @@ -389,24 +445,6 @@ void kvm_destroy_vcpu(CPUState *cpu)
>      }
>  }
>  
> -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
> -{
> -    struct KVMParkedVcpu *cpu;
> -
> -    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
> -        if (cpu->vcpu_id == vcpu_id) {
> -            int kvm_fd;
> -
> -            QLIST_REMOVE(cpu, node);
> -            kvm_fd = cpu->kvm_fd;
> -            g_free(cpu);
> -            return kvm_fd;
> -        }
> -    }
> -
> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> -}
> -
>  int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  {
>      KVMState *s = kvm_state;
> @@ -415,19 +453,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  
>      trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  
> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> +    ret = kvm_create_vcpu(cpu);
>      if (ret < 0) {
> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
> +        error_setg_errno(errp, -ret,
> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>                           kvm_arch_vcpu_id(cpu));
>          goto err;
>      }
>  
> -    cpu->kvm_fd = ret;
> -    cpu->kvm_state = s;
> -    cpu->vcpu_dirty = true;
> -    cpu->dirty_pages = 0;
> -    cpu->throttle_us_per_full = 0;
> -
>      mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>      if (mmap_size < 0) {
>          ret = mmap_size;
> diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h
> index ca40add32c..2e6bb38b5d 100644
> --- a/accel/kvm/kvm-cpus.h
> +++ b/accel/kvm/kvm-cpus.h
> @@ -22,5 +22,28 @@ bool kvm_supports_guest_debug(void);
>  int kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
>  int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
>  void kvm_remove_all_breakpoints(CPUState *cpu);
> +/**
> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
> + *
> + * @returns: 0 when success, errno (<0) when failed.
> + */
> +int kvm_create_vcpu(CPUState *cpu);
>  
> +/**
> + * kvm_park_vcpu - Park QEMU KVM vCPU context
> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
> + *
> + * @returns: none
> + */
> +void kvm_park_vcpu(CPUState *cpu);
> +
> +/**
> + * kvm_unpark_vcpu - unpark QEMU KVM vCPU context
> + * @s: KVM State
> + * @cpu: Architecture vCPU ID of the parked vCPU
> + *
> + * @returns: KVM fd
> + */
> +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
>  #endif /* KVM_CPUS_H */
> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
> index 681ccb667d..bd43a0ef26 100644
> --- a/accel/kvm/trace-events
> +++ b/accel/kvm/trace-events
> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>  kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>  kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>  kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int kvm_fd) "index: %d, id: %lu, kvm fd: %d"
> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_unpark_vcpu(unsigned long arch_cpu_id) "id: %lu"
>  kvm_irqchip_commit_routes(void) ""
>  kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>  kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>  kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>  kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>  kvm_dirty_ring_flush(int finished) "%d"
> -kvm_destroy_vcpu(void) ""
>  kvm_failed_get_vcpu_mmap_size(void) ""
>  kvm_cpu_exec(void) ""
>  kvm_interrupt_exit_request(void) ""
RE: [PATCH V10 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code
Posted by Salil Mehta via 6 months ago
>  From: Nicholas Piggin <npiggin@gmail.com>
>  Sent: Wednesday, May 22, 2024 2:25 AM
>  To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org;
>  qemu-arm@nongnu.org
>  
>  On Tue May 21, 2024 at 9:32 AM AEST, Salil Mehta wrote:
>  > KVM vCPU creation is done once during the vCPU realization when Qemu
>  > vCPU thread is spawned. This is common to all the architectures as of now.
>  >
>  > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>  > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>  > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>  > vCPU object/context in Qemu is parked.
>  >
>  > Refactor architecture common logic so that some APIs could be reused
>  > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>  > Update new/old APIs with trace events. No functional change is intended
>  here.
>  >
>  > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > Reviewed-by: Gavin Shan <gshan@redhat.com>
>  > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>  > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>  > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>  > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>  > Reviewed-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > ---
>  >  accel/kvm/kvm-all.c    | 97 ++++++++++++++++++++++++++++-------------
>  -
>  >  accel/kvm/kvm-cpus.h   | 23 ++++++++++
>  >  accel/kvm/trace-events |  5 ++-
>  >  3 files changed, 92 insertions(+), 33 deletions(-)
>  >
>  > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>  > c0be9f5eed..a8f93078dc 100644
>  > --- a/accel/kvm/kvm-all.c
>  > +++ b/accel/kvm/kvm-all.c
>  > @@ -340,14 +340,73 @@ err:
>  >      return ret;
>  >  }
>  >
>  > +void kvm_park_vcpu(CPUState *cpu)
>  > +{
>  > +    struct KVMParkedVcpu *vcpu;
>  > +
>  > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  > +
>  > +    vcpu = g_malloc0(sizeof(*vcpu));
>  > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    vcpu->kvm_fd = cpu->kvm_fd;
>  > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>  > +
>  > +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) {
>  > +    struct KVMParkedVcpu *cpu;
>  > +
>  > +    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
>  > +        if (cpu->vcpu_id == vcpu_id) {
>  > +            int kvm_fd;
>  > +
>  > +            trace_kvm_unpark_vcpu(vcpu_id);
>  
>  Just an aside, but unfortunately tracing is not entirely consistent.
>  Often a function-level trace point is done at the beginning of the function
>  regardless of the result. But I actually like this style of tracing at the end and
>  providing result too. OTOH you don't see the -ENOENT case.
>  
>  In any case it's nice to have something here.


I can definitely move it to the end. You mean you wish to include the case where
vCPU was not found already parked?


>  
>  Other unforunate thing is some confusion between attaching a KVM
>  context for QEMU vCPU, and actually making the KVM_CREATE_VCPU ioctl
>  call, and kvm_create_vcpu is not the counterpart of kvm_destroy_vcpu,
>  etc.. It is not your fault the existing naming makes this a bit confusing.
>  Fortunately it's pretty well contained to small amount of code.
>  
>  I hate to nitpick it but since the functions are being exported, would it be a
>  better name somthing like kvm_attach_vcpu()?
>  
>  Just a thought, but no big deal. Either way,


Sure, I'm getting your point but KVM does not supports destruction of KVM vCPUs
and hence as you rightly pointed creation and destruction legs at Qemu are
not symmetrical. 

Can we live with existing conventions for now otherwise this change can add a
noise to this patch?


>  
>  Reviewed-by: Nicholas Piggin <npiggin@gmail.com>


Thank you.
Salil


>  
>  > +
>  > +            QLIST_REMOVE(cpu, node);
>  > +            kvm_fd = cpu->kvm_fd;
>  > +            g_free(cpu);
>  > +            return kvm_fd;
>  > +        }
>  > +    }
>  > +
>  > +    return -ENOENT;
>  > +}
>  > +
>  > +int kvm_create_vcpu(CPUState *cpu)
>  > +{
>  > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    KVMState *s = kvm_state;
>  > +    int kvm_fd;
>  > +
>  > +    /* check if the KVM vCPU already exist but is parked */
>  > +    kvm_fd = kvm_unpark_vcpu(s, vcpu_id);
>  > +    if (kvm_fd < 0) {
>  > +        /* vCPU not parked: create a new KVM vCPU */
>  > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  > +        if (kvm_fd < 0) {
>  > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu",
>  vcpu_id);
>  > +            return kvm_fd;
>  > +        }
>  > +    }
>  > +
>  > +    trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
>  > +
>  > +    cpu->kvm_fd = kvm_fd;
>  > +    cpu->kvm_state = s;
>  > +    cpu->vcpu_dirty = true;
>  > +    cpu->dirty_pages = 0;
>  > +    cpu->throttle_us_per_full = 0;
>  > +
>  > +    return 0;
>  > +}
>  > +
>  >  static int do_kvm_destroy_vcpu(CPUState *cpu)  {
>  >      KVMState *s = kvm_state;
>  >      long mmap_size;
>  > -    struct KVMParkedVcpu *vcpu = NULL;
>  >      int ret = 0;
>  >
>  > -    trace_kvm_destroy_vcpu();
>  > +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >
>  >      ret = kvm_arch_destroy_vcpu(cpu);
>  >      if (ret < 0) {
>  > @@ -373,10 +432,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >          }
>  >      }
>  >
>  > -    vcpu = g_malloc0(sizeof(*vcpu));
>  > -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  > -    vcpu->kvm_fd = cpu->kvm_fd;
>  > -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  > +    kvm_park_vcpu(cpu);
>  >  err:
>  >      return ret;
>  >  }
>  > @@ -389,24 +445,6 @@ void kvm_destroy_vcpu(CPUState *cpu)
>  >      }
>  >  }
>  >
>  > -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) -{
>  > -    struct KVMParkedVcpu *cpu;
>  > -
>  > -    QLIST_FOREACH(cpu, &s->kvm_parked_vcpus, node) {
>  > -        if (cpu->vcpu_id == vcpu_id) {
>  > -            int kvm_fd;
>  > -
>  > -            QLIST_REMOVE(cpu, node);
>  > -            kvm_fd = cpu->kvm_fd;
>  > -            g_free(cpu);
>  > -            return kvm_fd;
>  > -        }
>  > -    }
>  > -
>  > -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>  > -}
>  > -
>  >  int kvm_init_vcpu(CPUState *cpu, Error **errp)  {
>  >      KVMState *s = kvm_state;
>  > @@ -415,19 +453,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  >
>  >      trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >
>  > -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>  > +    ret = kvm_create_vcpu(cpu);
>  >      if (ret < 0) {
>  > -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed
>  (%lu)",
>  > +        error_setg_errno(errp, -ret,
>  > +                         "kvm_init_vcpu: kvm_create_vcpu failed
>  > + (%lu)",
>  >                           kvm_arch_vcpu_id(cpu));
>  >          goto err;
>  >      }
>  >
>  > -    cpu->kvm_fd = ret;
>  > -    cpu->kvm_state = s;
>  > -    cpu->vcpu_dirty = true;
>  > -    cpu->dirty_pages = 0;
>  > -    cpu->throttle_us_per_full = 0;
>  > -
>  >      mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>  >      if (mmap_size < 0) {
>  >          ret = mmap_size;
>  > diff --git a/accel/kvm/kvm-cpus.h b/accel/kvm/kvm-cpus.h index
>  > ca40add32c..2e6bb38b5d 100644
>  > --- a/accel/kvm/kvm-cpus.h
>  > +++ b/accel/kvm/kvm-cpus.h
>  > @@ -22,5 +22,28 @@ bool kvm_supports_guest_debug(void);  int
>  > kvm_insert_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr len);
>  > int kvm_remove_breakpoint(CPUState *cpu, int type, vaddr addr, vaddr
>  > len);  void kvm_remove_all_breakpoints(CPUState *cpu);
>  > +/**
>  > + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>  > + * @cpu: QOM CPUState object for which KVM vCPU has to be
>  fetched/created.
>  > + *
>  > + * @returns: 0 when success, errno (<0) when failed.
>  > + */
>  > +int kvm_create_vcpu(CPUState *cpu);
>  >
>  > +/**
>  > + * kvm_park_vcpu - Park QEMU KVM vCPU context
>  > + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has
>  to be parked.
>  > + *
>  > + * @returns: none
>  > + */
>  > +void kvm_park_vcpu(CPUState *cpu);
>  > +
>  > +/**
>  > + * kvm_unpark_vcpu - unpark QEMU KVM vCPU context
>  > + * @s: KVM State
>  > + * @cpu: Architecture vCPU ID of the parked vCPU
>  > + *
>  > + * @returns: KVM fd
>  > + */
>  > +int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id);
>  >  #endif /* KVM_CPUS_H */
>  > diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index
>  > 681ccb667d..bd43a0ef26 100644
>  > --- a/accel/kvm/trace-events
>  > +++ b/accel/kvm/trace-events
>  > @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd
>  %d, type 0x%x, arg %p"
>  >  kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to
>  retrieve ONEREG %" PRIu64 " from KVM: %s"
>  >  kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to
>  set ONEREG %" PRIu64 " to KVM: %s"
>  >  kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id:
>  %lu"
>  > +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id, int
>  kvm_fd) "index: %d, id: %lu, kvm fd: %d"
>  > +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index:
>  %d id: %lu"
>  > +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d
>  id: %lu"
>  > +kvm_unpark_vcpu(unsigned long arch_cpu_id) "id: %lu"
>  >  kvm_irqchip_commit_routes(void) ""
>  >  kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s
>  vector %d virq %d"
>  >  kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>  > @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>  >  kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages
>  (took %"PRIi64" us)"
>  >  kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>  >  kvm_dirty_ring_flush(int finished) "%d"
>  > -kvm_destroy_vcpu(void) ""
>  >  kvm_failed_get_vcpu_mmap_size(void) ""
>  >  kvm_cpu_exec(void) ""
>  >  kvm_interrupt_exit_request(void) ""