monitor.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-)
If a CPU selected with the "cpu" command is hot-unplugged then "info cpus"
causes QEMU to exit:
(qemu) device_del cpu1
(qemu) info cpus
qemu:qemu_cpu_kick_thread: No such process
This happens because "cpu" stores the pointer to the selected CPU into
the monitor structure. When the CPU is hot-unplugged, we end up with a
dangling pointer. The "info cpus" command then does:
hmp_info_cpus()
monitor_get_cpu_index()
mon_get_cpu()
cpu_synchronize_state() <--- called with dangling pointer
This could cause a QEMU crash as well.
This patch switches the monitor to store the QOM path instead of a
pointer to the current CPU. The path is then resolved when needed.
If the resolution fails, we assume that the CPU was removed and the
path is resetted to the default (ie, path of first_cpu).
Note that the resolution should really return a CPU object, otherwise
we have a bug. This is achieved by relying on object_resolve_path()
and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU).
Suggested-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
---
monitor.c | 25 ++++++++++++++++++++-----
1 file changed, 20 insertions(+), 5 deletions(-)
diff --git a/monitor.c b/monitor.c
index fe0d1bdbb461..8489b2ad99c0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -200,7 +200,7 @@ struct Monitor {
ReadLineState *rs;
MonitorQMP qmp;
- CPUState *mon_cpu;
+ gchar *mon_cpu_path;
BlockCompletionFunc *password_completion_cb;
void *password_opaque;
mon_cmd_t *cmd_table;
@@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon)
static void monitor_data_destroy(Monitor *mon)
{
+ g_free(mon->mon_cpu_path);
qemu_chr_fe_deinit(&mon->chr, false);
if (monitor_is_qmp(mon)) {
json_message_parser_destroy(&mon->qmp.parser);
@@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index)
if (cpu == NULL) {
return -1;
}
- cur_mon->mon_cpu = cpu;
+ g_free(cur_mon->mon_cpu_path);
+ cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu));
return 0;
}
CPUState *mon_get_cpu(void)
{
- if (!cur_mon->mon_cpu) {
+ CPUState *cpu;
+
+ if (cur_mon->mon_cpu_path) {
+ Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL);
+
+ if (!obj) {
+ g_free(cur_mon->mon_cpu_path);
+ cur_mon->mon_cpu_path = NULL;
+ } else {
+ cpu = CPU(obj);
+ }
+ }
+ if (!cur_mon->mon_cpu_path) {
if (!first_cpu) {
return NULL;
}
monitor_set_cpu(first_cpu->cpu_index);
+ cpu = first_cpu;
}
- cpu_synchronize_state(cur_mon->mon_cpu);
- return cur_mon->mon_cpu;
+ cpu_synchronize_state(cpu);
+ return cpu;
}
CPUArchState *mon_get_cpu_env(void)
On Fri, 13 Oct 2017 13:47:51 +0200 Greg Kurz <groug@kaod.org> wrote: > If a CPU selected with the "cpu" command is hot-unplugged then "info cpus" > causes QEMU to exit: > > (qemu) device_del cpu1 > (qemu) info cpus > qemu:qemu_cpu_kick_thread: No such process > > This happens because "cpu" stores the pointer to the selected CPU into > the monitor structure. When the CPU is hot-unplugged, we end up with a > dangling pointer. The "info cpus" command then does: > > hmp_info_cpus() > monitor_get_cpu_index() > mon_get_cpu() > cpu_synchronize_state() <--- called with dangling pointer > > This could cause a QEMU crash as well. > > This patch switches the monitor to store the QOM path instead of a > pointer to the current CPU. The path is then resolved when needed. > If the resolution fails, we assume that the CPU was removed and the > path is resetted to the default (ie, path of first_cpu). > > Note that the resolution should really return a CPU object, otherwise > we have a bug. This is achieved by relying on object_resolve_path() > and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU). > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > Signed-off-by: Greg Kurz <groug@kaod.org> > --- Whoever merges this patch, please add: Reported-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com> Thanks! > monitor.c | 25 ++++++++++++++++++++----- > 1 file changed, 20 insertions(+), 5 deletions(-) > > diff --git a/monitor.c b/monitor.c > index fe0d1bdbb461..8489b2ad99c0 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -200,7 +200,7 @@ struct Monitor { > > ReadLineState *rs; > MonitorQMP qmp; > - CPUState *mon_cpu; > + gchar *mon_cpu_path; > BlockCompletionFunc *password_completion_cb; > void *password_opaque; > mon_cmd_t *cmd_table; > @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon) > > static void monitor_data_destroy(Monitor *mon) > { > + g_free(mon->mon_cpu_path); > qemu_chr_fe_deinit(&mon->chr, false); > if (monitor_is_qmp(mon)) { > json_message_parser_destroy(&mon->qmp.parser); > @@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index) > if (cpu == NULL) { > return -1; > } > - cur_mon->mon_cpu = cpu; > + g_free(cur_mon->mon_cpu_path); > + cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu)); > return 0; > } > > CPUState *mon_get_cpu(void) > { > - if (!cur_mon->mon_cpu) { > + CPUState *cpu; > + > + if (cur_mon->mon_cpu_path) { > + Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL); > + > + if (!obj) { > + g_free(cur_mon->mon_cpu_path); > + cur_mon->mon_cpu_path = NULL; > + } else { > + cpu = CPU(obj); > + } > + } > + if (!cur_mon->mon_cpu_path) { > if (!first_cpu) { > return NULL; > } > monitor_set_cpu(first_cpu->cpu_index); > + cpu = first_cpu; > } > - cpu_synchronize_state(cur_mon->mon_cpu); > - return cur_mon->mon_cpu; > + cpu_synchronize_state(cpu); > + return cpu; > } > > CPUArchState *mon_get_cpu_env(void) > >
On Fri, 13 Oct 2017 13:47:51 +0200 Greg Kurz <groug@kaod.org> wrote: [...] > Note that the resolution should really return a CPU object, otherwise > we have a bug. This is achieved by relying on object_resolve_path() > and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU). I'm not sure what you are trying to say here, could you explain a bit more why CPU(object_resolve_path()) is chosen vs object_resolve_path_type(path, TYPE_CPU) > Suggested-by: Igor Mammedov <imammedo@redhat.com> > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > monitor.c | 25 ++++++++++++++++++++----- > 1 file changed, 20 insertions(+), 5 deletions(-) > > diff --git a/monitor.c b/monitor.c > index fe0d1bdbb461..8489b2ad99c0 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -200,7 +200,7 @@ struct Monitor { > > ReadLineState *rs; > MonitorQMP qmp; > - CPUState *mon_cpu; > + gchar *mon_cpu_path; > BlockCompletionFunc *password_completion_cb; > void *password_opaque; > mon_cmd_t *cmd_table; > @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon) > > static void monitor_data_destroy(Monitor *mon) > { > + g_free(mon->mon_cpu_path); > qemu_chr_fe_deinit(&mon->chr, false); > if (monitor_is_qmp(mon)) { > json_message_parser_destroy(&mon->qmp.parser); > @@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index) > if (cpu == NULL) { > return -1; > } > - cur_mon->mon_cpu = cpu; > + g_free(cur_mon->mon_cpu_path); > + cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu)); > return 0; > } > > CPUState *mon_get_cpu(void) > { > - if (!cur_mon->mon_cpu) { > + CPUState *cpu; > + > + if (cur_mon->mon_cpu_path) { > + Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL); > + > + if (!obj) { > + g_free(cur_mon->mon_cpu_path); > + cur_mon->mon_cpu_path = NULL; > + } else { > + cpu = CPU(obj); this potentially might abort if obj couldn't be cast to TYPE_CPU > + } > + } > + if (!cur_mon->mon_cpu_path) { > if (!first_cpu) { > return NULL; > } > monitor_set_cpu(first_cpu->cpu_index); > + cpu = first_cpu; > } > - cpu_synchronize_state(cur_mon->mon_cpu); > - return cur_mon->mon_cpu; > + cpu_synchronize_state(cpu); > + return cpu; > } > > CPUArchState *mon_get_cpu_env(void) > >
On Mon, 16 Oct 2017 10:41:39 +0200 Igor Mammedov <imammedo@redhat.com> wrote: > On Fri, 13 Oct 2017 13:47:51 +0200 > Greg Kurz <groug@kaod.org> wrote: > > [...] > > > Note that the resolution should really return a CPU object, otherwise > > we have a bug. This is achieved by relying on object_resolve_path() > > and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU). > I'm not sure what you are trying to say here, could you explain > a bit more why CPU(object_resolve_path()) is chosen vs > object_resolve_path_type(path, TYPE_CPU) > IIUC, if we use object_resolve_path_type(path, TYPE_CPU) and path doesn't point to a CPU object, it will return NULL (see object_resolve_abs_path()) just like if the CPU got hot-unplugged. My point is that the path we got from object_get_canonical_path() did point to a CPU: if later on this path resolves to an object that isn't a CPU, then some code somewhere used the same QOM path for some unrelated object. I tend to think this is a bug in QEMU and we shouldn't silently ignore it. Makes sense ? > > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > monitor.c | 25 ++++++++++++++++++++----- > > 1 file changed, 20 insertions(+), 5 deletions(-) > > > > diff --git a/monitor.c b/monitor.c > > index fe0d1bdbb461..8489b2ad99c0 100644 > > --- a/monitor.c > > +++ b/monitor.c > > @@ -200,7 +200,7 @@ struct Monitor { > > > > ReadLineState *rs; > > MonitorQMP qmp; > > - CPUState *mon_cpu; > > + gchar *mon_cpu_path; > > BlockCompletionFunc *password_completion_cb; > > void *password_opaque; > > mon_cmd_t *cmd_table; > > @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon) > > > > static void monitor_data_destroy(Monitor *mon) > > { > > + g_free(mon->mon_cpu_path); > > qemu_chr_fe_deinit(&mon->chr, false); > > if (monitor_is_qmp(mon)) { > > json_message_parser_destroy(&mon->qmp.parser); > > @@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index) > > if (cpu == NULL) { > > return -1; > > } > > - cur_mon->mon_cpu = cpu; > > + g_free(cur_mon->mon_cpu_path); > > + cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu)); > > return 0; > > } > > > > CPUState *mon_get_cpu(void) > > { > > - if (!cur_mon->mon_cpu) { > > + CPUState *cpu; > > + > > + if (cur_mon->mon_cpu_path) { > > + Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL); > > + > > + if (!obj) { > > + g_free(cur_mon->mon_cpu_path); > > + cur_mon->mon_cpu_path = NULL; > > + } else { > > + cpu = CPU(obj); > this potentially might abort if obj couldn't be cast to TYPE_CPU > This is deliberate... is there a case where cur_mon->mon_cpu_path would legitimately point to something that isn't of type TYPE_CPU ? > > > + } > > + } > > + if (!cur_mon->mon_cpu_path) { > > if (!first_cpu) { > > return NULL; > > } > > monitor_set_cpu(first_cpu->cpu_index); > > + cpu = first_cpu; > > } > > - cpu_synchronize_state(cur_mon->mon_cpu); > > - return cur_mon->mon_cpu; > > + cpu_synchronize_state(cpu); > > + return cpu; > > } > > > > CPUArchState *mon_get_cpu_env(void) > > > > >
On Mon, 16 Oct 2017 13:24:20 +0200 Greg Kurz <groug@kaod.org> wrote: > On Mon, 16 Oct 2017 10:41:39 +0200 > Igor Mammedov <imammedo@redhat.com> wrote: > > > On Fri, 13 Oct 2017 13:47:51 +0200 > > Greg Kurz <groug@kaod.org> wrote: > > > > [...] > > > > > Note that the resolution should really return a CPU object, otherwise > > > we have a bug. This is achieved by relying on object_resolve_path() > > > and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU). > > I'm not sure what you are trying to say here, could you explain > > a bit more why CPU(object_resolve_path()) is chosen vs > > object_resolve_path_type(path, TYPE_CPU) > > > > IIUC, if we use object_resolve_path_type(path, TYPE_CPU) and path doesn't > point to a CPU object, it will return NULL (see object_resolve_abs_path()) > just like if the CPU got hot-unplugged. > > My point is that the path we got from object_get_canonical_path() did > point to a CPU: if later on this path resolves to an object that isn't > a CPU, then some code somewhere used the same QOM path for some unrelated > object. I tend to think this is a bug in QEMU and we shouldn't silently > ignore it. sometimes QOM path to a cpu object could be: 1. arbitrary if set explicitly by board 2. /unattached/device[X] or /peripheral-anon/device[X] potentially might end-up with another device at the same path in case of hot-remove + migration 3. /peripheral hot-remove named (i.e. non null 'id') cpu and then plug a non cpu device with the same 'id', which is legitimate thing to do I'd use object_resolve_path_type(path, TYPE_CPU) and gracefully error out in case it returns NULL. > Makes sense ? > > > > > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > > --- > > > monitor.c | 25 ++++++++++++++++++++----- > > > 1 file changed, 20 insertions(+), 5 deletions(-) > > > > > > diff --git a/monitor.c b/monitor.c > > > index fe0d1bdbb461..8489b2ad99c0 100644 > > > --- a/monitor.c > > > +++ b/monitor.c > > > @@ -200,7 +200,7 @@ struct Monitor { > > > > > > ReadLineState *rs; > > > MonitorQMP qmp; > > > - CPUState *mon_cpu; > > > + gchar *mon_cpu_path; > > > BlockCompletionFunc *password_completion_cb; > > > void *password_opaque; > > > mon_cmd_t *cmd_table; > > > @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon) > > > > > > static void monitor_data_destroy(Monitor *mon) > > > { > > > + g_free(mon->mon_cpu_path); > > > qemu_chr_fe_deinit(&mon->chr, false); > > > if (monitor_is_qmp(mon)) { > > > json_message_parser_destroy(&mon->qmp.parser); > > > @@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index) > > > if (cpu == NULL) { > > > return -1; > > > } > > > - cur_mon->mon_cpu = cpu; > > > + g_free(cur_mon->mon_cpu_path); > > > + cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu)); > > > return 0; > > > } > > > > > > CPUState *mon_get_cpu(void) > > > { > > > - if (!cur_mon->mon_cpu) { > > > + CPUState *cpu; > > > + > > > + if (cur_mon->mon_cpu_path) { > > > + Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL); > > > + > > > + if (!obj) { > > > + g_free(cur_mon->mon_cpu_path); > > > + cur_mon->mon_cpu_path = NULL; > > > + } else { > > > + cpu = CPU(obj); > > this potentially might abort if obj couldn't be cast to TYPE_CPU > > > > This is deliberate... is there a case where cur_mon->mon_cpu_path would > legitimately point to something that isn't of type TYPE_CPU ? > > > > > > + } > > > + } > > > + if (!cur_mon->mon_cpu_path) { > > > if (!first_cpu) { > > > return NULL; > > > } > > > monitor_set_cpu(first_cpu->cpu_index); > > > + cpu = first_cpu; > > > } > > > - cpu_synchronize_state(cur_mon->mon_cpu); > > > - return cur_mon->mon_cpu; > > > + cpu_synchronize_state(cpu); > > > + return cpu; > > > } > > > > > > CPUArchState *mon_get_cpu_env(void) > > > > > > > > > >
On Mon, 16 Oct 2017 16:31:07 +0200 Igor Mammedov <imammedo@redhat.com> wrote: > On Mon, 16 Oct 2017 13:24:20 +0200 > Greg Kurz <groug@kaod.org> wrote: > > > On Mon, 16 Oct 2017 10:41:39 +0200 > > Igor Mammedov <imammedo@redhat.com> wrote: > > > > > On Fri, 13 Oct 2017 13:47:51 +0200 > > > Greg Kurz <groug@kaod.org> wrote: > > > > > > [...] > > > > > > > Note that the resolution should really return a CPU object, otherwise > > > > we have a bug. This is achieved by relying on object_resolve_path() > > > > and CPU() instead of calling object_resolve_path_type(path, TYPE_CPU). > > > I'm not sure what you are trying to say here, could you explain > > > a bit more why CPU(object_resolve_path()) is chosen vs > > > object_resolve_path_type(path, TYPE_CPU) > > > > > > > IIUC, if we use object_resolve_path_type(path, TYPE_CPU) and path doesn't > > point to a CPU object, it will return NULL (see object_resolve_abs_path()) > > just like if the CPU got hot-unplugged. > > > > My point is that the path we got from object_get_canonical_path() did > > point to a CPU: if later on this path resolves to an object that isn't > > a CPU, then some code somewhere used the same QOM path for some unrelated > > object. I tend to think this is a bug in QEMU and we shouldn't silently > > ignore it. > sometimes QOM path to a cpu object could be: > 1. arbitrary if set explicitly by board > 2. /unattached/device[X] or /peripheral-anon/device[X] > potentially might end-up with another device at the same path > in case of hot-remove + migration > 3. /peripheral > hot-remove named (i.e. non null 'id') cpu and then plug a non cpu device with the same 'id', > which is legitimate thing to do > Ok, I didn't think of these cases... > I'd use object_resolve_path_type(path, TYPE_CPU) and > gracefully error out in case it returns NULL. > I'll do that. Thanks for the detailed explanation. > > Makes sense ? > > > > > > > > > Suggested-by: Igor Mammedov <imammedo@redhat.com> > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > > > --- > > > > monitor.c | 25 ++++++++++++++++++++----- > > > > 1 file changed, 20 insertions(+), 5 deletions(-) > > > > > > > > diff --git a/monitor.c b/monitor.c > > > > index fe0d1bdbb461..8489b2ad99c0 100644 > > > > --- a/monitor.c > > > > +++ b/monitor.c > > > > @@ -200,7 +200,7 @@ struct Monitor { > > > > > > > > ReadLineState *rs; > > > > MonitorQMP qmp; > > > > - CPUState *mon_cpu; > > > > + gchar *mon_cpu_path; > > > > BlockCompletionFunc *password_completion_cb; > > > > void *password_opaque; > > > > mon_cmd_t *cmd_table; > > > > @@ -579,6 +579,7 @@ static void monitor_data_init(Monitor *mon) > > > > > > > > static void monitor_data_destroy(Monitor *mon) > > > > { > > > > + g_free(mon->mon_cpu_path); > > > > qemu_chr_fe_deinit(&mon->chr, false); > > > > if (monitor_is_qmp(mon)) { > > > > json_message_parser_destroy(&mon->qmp.parser); > > > > @@ -1047,20 +1048,34 @@ int monitor_set_cpu(int cpu_index) > > > > if (cpu == NULL) { > > > > return -1; > > > > } > > > > - cur_mon->mon_cpu = cpu; > > > > + g_free(cur_mon->mon_cpu_path); > > > > + cur_mon->mon_cpu_path = object_get_canonical_path(OBJECT(cpu)); > > > > return 0; > > > > } > > > > > > > > CPUState *mon_get_cpu(void) > > > > { > > > > - if (!cur_mon->mon_cpu) { > > > > + CPUState *cpu; > > > > + > > > > + if (cur_mon->mon_cpu_path) { > > > > + Object *obj = object_resolve_path(cur_mon->mon_cpu_path, NULL); > > > > + > > > > + if (!obj) { > > > > + g_free(cur_mon->mon_cpu_path); > > > > + cur_mon->mon_cpu_path = NULL; > > > > + } else { > > > > + cpu = CPU(obj); > > > this potentially might abort if obj couldn't be cast to TYPE_CPU > > > > > > > This is deliberate... is there a case where cur_mon->mon_cpu_path would > > legitimately point to something that isn't of type TYPE_CPU ? > > > > > > > > > + } > > > > + } > > > > + if (!cur_mon->mon_cpu_path) { > > > > if (!first_cpu) { > > > > return NULL; > > > > } > > > > monitor_set_cpu(first_cpu->cpu_index); > > > > + cpu = first_cpu; > > > > } > > > > - cpu_synchronize_state(cur_mon->mon_cpu); > > > > - return cur_mon->mon_cpu; > > > > + cpu_synchronize_state(cpu); > > > > + return cpu; > > > > } > > > > > > > > CPUArchState *mon_get_cpu_env(void) > > > > > > > > > > > > > > > >
© 2016 - 2024 Red Hat, Inc.