[PATCH] kvm: reuse per-vcpu stats fd to avoid vcpu interruption

Marcelo Tosatti posted 1 patch 11 months, 1 week ago
Failed in applying to current master (apply log)
There is a newer version of this series
[PATCH] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
Posted by Marcelo Tosatti 11 months, 1 week ago

A regression has been detected in latency testing of KVM guests.
More specifically, it was observed that the cyclictest
numbers inside of an isolated vcpu (running on isolated pcpu) are:

# Max Latencies: 00090 00096 00141

Where a maximum of 50us is acceptable.

The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
per vcpu statistics, which interrupts the vcpu (and is unnecessary).

To fix this, open the per vcpu stats fd on vcpu initialization,
and read from that fd from QEMU's main thread.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

---

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 7679f397ae..5da2901eca 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
                          "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
                          kvm_arch_vcpu_id(cpu));
     }
+    cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+
 err:
     return ret;
 }
@@ -4007,7 +4009,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
 
     /* Read stats header */
     kvm_stats_header = &descriptors->kvm_stats_header;
-    ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
+    ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
     if (ret != sizeof(*kvm_stats_header)) {
         error_setg(errp, "KVM stats: failed to read stats header: "
                    "expected %zu actual %zu",
@@ -4038,7 +4040,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
 }
 
 static void query_stats(StatsResultList **result, StatsTarget target,
-                        strList *names, int stats_fd, Error **errp)
+                        strList *names, int stats_fd, Error **errp,
+                        CPUState *cpu)
 {
     struct kvm_stats_desc *kvm_stats_desc;
     struct kvm_stats_header *kvm_stats_header;
@@ -4096,7 +4099,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
         break;
     case STATS_TARGET_VCPU:
         add_stats_entry(result, STATS_PROVIDER_KVM,
-                        current_cpu->parent_obj.canonical_path,
+                        cpu->parent_obj.canonical_path,
                         stats_list);
         break;
     default:
@@ -4133,10 +4136,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
     add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
 }
 
-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
 {
-    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
-    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+    int stats_fd = cpu->kvm_vcpu_stats_fd;
     Error *local_err = NULL;
 
     if (stats_fd == -1) {
@@ -4145,14 +4147,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
         return;
     }
     query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
-                kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
-    close(stats_fd);
+                kvm_stats_args->names, stats_fd, kvm_stats_args->errp,
+                cpu);
 }
 
-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
 {
-    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
-    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+    int stats_fd = cpu->kvm_vcpu_stats_fd;
     Error *local_err = NULL;
 
     if (stats_fd == -1) {
@@ -4162,7 +4163,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
     }
     query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
                        kvm_stats_args->errp);
-    close(stats_fd);
 }
 
 static void query_stats_cb(StatsResultList **result, StatsTarget target,
@@ -4180,7 +4180,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
             error_setg_errno(errp, errno, "KVM stats: ioctl failed");
             return;
         }
-        query_stats(result, target, names, stats_fd, errp);
+        query_stats(result, target, names, stats_fd, errp, NULL);
         close(stats_fd);
         break;
     }
@@ -4194,7 +4194,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
             if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
                 continue;
             }
-            run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+            query_stats_vcpu(cpu, &stats_args);
         }
         break;
     }
@@ -4220,6 +4220,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
     if (first_cpu) {
         stats_args.result.schema = result;
         stats_args.errp = errp;
-        run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+        query_stats_schema_vcpu(first_cpu, &stats_args);
     }
 }
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 383456d1b3..4f0fa70755 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -402,6 +402,7 @@ struct CPUState {
     struct kvm_dirty_gfn *kvm_dirty_gfns;
     uint32_t kvm_fetch_index;
     uint64_t dirty_pages;
+    int kvm_vcpu_stats_fd;
 
     /* Use by accel-block: CPU is executing an ioctl() */
     QemuLockCnt in_ioctl_lock;
Re: [PATCH] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
Posted by Markus Armbruster 11 months, 1 week ago
Marcelo Tosatti <mtosatti@redhat.com> writes:

> A regression has been detected in latency testing of KVM guests.
> More specifically, it was observed that the cyclictest
> numbers inside of an isolated vcpu (running on isolated pcpu) are:
>
> # Max Latencies: 00090 00096 00141
>
> Where a maximum of 50us is acceptable.
>
> The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
> per vcpu statistics, which interrupts the vcpu (and is unnecessary).
>
> To fix this, open the per vcpu stats fd on vcpu initialization,
> and read from that fd from QEMU's main thread.
>
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

[...]

> @@ -4038,7 +4040,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
>  }
>  
>  static void query_stats(StatsResultList **result, StatsTarget target,
> -                        strList *names, int stats_fd, Error **errp)
> +                        strList *names, int stats_fd, Error **errp,
> +                        CPUState *cpu)

include/qapi/error.h:

 * - Functions that use Error to report errors have an Error **errp
 *   parameter.  It should be the last parameter, except for functions
 *   taking variable arguments.

>  {
>      struct kvm_stats_desc *kvm_stats_desc;
>      struct kvm_stats_header *kvm_stats_header;

[...]
[PATCH v2] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
Posted by Marcelo Tosatti 11 months ago

A regression has been detected in latency testing of KVM guests.  
More specifically, it was observed that the cyclictest  
numbers inside of an isolated vcpu (running on isolated pcpu) are:  
                                            
# Max Latencies: 00090 00096 00141  
  
Where a maximum of 50us is acceptable.   
                   
The implementation of KVM_GET_STATS_FD uses run_on_cpu to query  
per vcpu statistics, which interrupts the vcpu (and is unnecessary).  
                
To fix this, open the per vcpu stats fd on vcpu initialization,  
and read from that fd from QEMU's main thread.  
             
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>  

---

v2: use convention for Error parameter order (Markus Armbruster)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 7679f397ae..9aa898db14 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -450,6 +450,8 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
                          "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
                          kvm_arch_vcpu_id(cpu));
     }
+    cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+
 err:
     return ret;
 }
@@ -4007,7 +4009,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
 
     /* Read stats header */
     kvm_stats_header = &descriptors->kvm_stats_header;
-    ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
+    ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
     if (ret != sizeof(*kvm_stats_header)) {
         error_setg(errp, "KVM stats: failed to read stats header: "
                    "expected %zu actual %zu",
@@ -4038,7 +4040,8 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
 }
 
 static void query_stats(StatsResultList **result, StatsTarget target,
-                        strList *names, int stats_fd, Error **errp)
+                        strList *names, int stats_fd, CPUState *cpu,
+                        Error **errp)
 {
     struct kvm_stats_desc *kvm_stats_desc;
     struct kvm_stats_header *kvm_stats_header;
@@ -4096,7 +4099,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
         break;
     case STATS_TARGET_VCPU:
         add_stats_entry(result, STATS_PROVIDER_KVM,
-                        current_cpu->parent_obj.canonical_path,
+                        cpu->parent_obj.canonical_path,
                         stats_list);
         break;
     default:
@@ -4133,10 +4136,9 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
     add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
 }
 
-static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
 {
-    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
-    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+    int stats_fd = cpu->kvm_vcpu_stats_fd;
     Error *local_err = NULL;
 
     if (stats_fd == -1) {
@@ -4145,14 +4147,13 @@ static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
         return;
     }
     query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
-                kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
-    close(stats_fd);
+                kvm_stats_args->names, stats_fd, cpu,
+                kvm_stats_args->errp);
 }
 
-static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
+static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
 {
-    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
-    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+    int stats_fd = cpu->kvm_vcpu_stats_fd;
     Error *local_err = NULL;
 
     if (stats_fd == -1) {
@@ -4162,7 +4163,6 @@ static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
     }
     query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
                        kvm_stats_args->errp);
-    close(stats_fd);
 }
 
 static void query_stats_cb(StatsResultList **result, StatsTarget target,
@@ -4180,7 +4180,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
             error_setg_errno(errp, errno, "KVM stats: ioctl failed");
             return;
         }
-        query_stats(result, target, names, stats_fd, errp);
+        query_stats(result, target, names, stats_fd, NULL, errp);
         close(stats_fd);
         break;
     }
@@ -4194,7 +4194,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
             if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
                 continue;
             }
-            run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+            query_stats_vcpu(cpu, &stats_args);
         }
         break;
     }
@@ -4220,6 +4220,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
     if (first_cpu) {
         stats_args.result.schema = result;
         stats_args.errp = errp;
-        run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
+        query_stats_schema_vcpu(first_cpu, &stats_args);
     }
 }
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 383456d1b3..4f0fa70755 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -402,6 +402,7 @@ struct CPUState {
     struct kvm_dirty_gfn *kvm_dirty_gfns;
     uint32_t kvm_fetch_index;
     uint64_t dirty_pages;
+    int kvm_vcpu_stats_fd;
 
     /* Use by accel-block: CPU is executing an ioctl() */
     QemuLockCnt in_ioctl_lock;
Re: [PATCH v2] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
Posted by Philippe Mathieu-Daudé 11 months ago
Hi Marcelo,

On 16/6/23 19:44, Marcelo Tosatti wrote:
> 
> A regression has been detected in latency testing of KVM guests.
> More specifically, it was observed that the cyclictest
> numbers inside of an isolated vcpu (running on isolated pcpu) are:
>                                              
> # Max Latencies: 00090 00096 00141
>    
> Where a maximum of 50us is acceptable.
>                     
> The implementation of KVM_GET_STATS_FD uses run_on_cpu to query
> per vcpu statistics, which interrupts the vcpu (and is unnecessary).
>                  
> To fix this, open the per vcpu stats fd on vcpu initialization,
> and read from that fd from QEMU's main thread.
>               
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>


>   static void query_stats(StatsResultList **result, StatsTarget target,
> -                        strList *names, int stats_fd, Error **errp)
> +                        strList *names, int stats_fd, CPUState *cpu,
> +                        Error **errp)
>   {
>       struct kvm_stats_desc *kvm_stats_desc;
>       struct kvm_stats_header *kvm_stats_header;
> @@ -4096,7 +4099,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
>           break;
>       case STATS_TARGET_VCPU:
>           add_stats_entry(result, STATS_PROVIDER_KVM,
> -                        current_cpu->parent_obj.canonical_path,
> +                        cpu->parent_obj.canonical_path,

Can we get a NULL deref here ...

>                           stats_list);
>           break;
>       default:


>   static void query_stats_cb(StatsResultList **result, StatsTarget target,
> @@ -4180,7 +4180,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
>               error_setg_errno(errp, errno, "KVM stats: ioctl failed");
>               return;
>           }
> -        query_stats(result, target, names, stats_fd, errp);
> +        query_stats(result, target, names, stats_fd, NULL, errp);

... from here?

>           close(stats_fd);
>           break;
>       }
Re: [PATCH v2] kvm: reuse per-vcpu stats fd to avoid vcpu interruption
Posted by Paolo Bonzini 11 months ago
On 6/18/23 01:00, Philippe Mathieu-Daudé wrote:
>>       case STATS_TARGET_VCPU:
>>           add_stats_entry(result, STATS_PROVIDER_KVM,
>> -                        current_cpu->parent_obj.canonical_path,
>> +                        cpu->parent_obj.canonical_path,
> 
> Can we get a NULL deref here ...
> 
>>                           stats_list);
>>           break;
>>       default:
> 
> 
>>   static void query_stats_cb(StatsResultList **result, StatsTarget 
>> target,
>> @@ -4180,7 +4180,7 @@ static void query_stats_cb(StatsResultList 
>> **result, StatsTarget target,
>>               error_setg_errno(errp, errno, "KVM stats: ioctl failed");
>>               return;
>>           }
>> -        query_stats(result, target, names, stats_fd, errp);
>> +        query_stats(result, target, names, stats_fd, NULL, errp);
> 
> ... from here?

No, target is STATS_TARGET_VM here.

In the kernel, KVM_GET_STATS_FD could also be improved because it does 
not need to take vcpu->mutex.  However that would not be enough; it 
would require QEMU changes anyway to remove run_on_cpu.  So I'm queuing 
the patch, thanks!

Paolo