Make qemu_thread_create() return a Boolean to indicate if it succeeds
rather than failing with an error. And add an Error parameter to hold
the error message and let the callers handle it.
Cc: Markus Armbruster <armbru@redhat.com>
Cc: Daniel P. Berrangé <berrange@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Fei Li <fli@suse.com>
---
cpus.c | 45 ++++++++++++++++++++++++-------------
dump.c | 6 +++--
hw/misc/edu.c | 6 +++--
hw/ppc/spapr_hcall.c | 10 +++++++--
hw/rdma/rdma_backend.c | 4 +++-
hw/usb/ccid-card-emulated.c | 16 ++++++++++----
include/qemu/thread.h | 4 ++--
io/task.c | 3 ++-
iothread.c | 16 +++++++++-----
migration/migration.c | 54 +++++++++++++++++++++++++++++----------------
migration/postcopy-ram.c | 14 ++++++++++--
migration/ram.c | 40 ++++++++++++++++++++++++---------
migration/savevm.c | 11 ++++++---
tests/atomic_add-bench.c | 3 ++-
tests/iothread.c | 2 +-
tests/qht-bench.c | 3 ++-
tests/rcutorture.c | 3 ++-
tests/test-aio.c | 2 +-
tests/test-rcu-list.c | 3 ++-
ui/vnc-jobs.c | 17 +++++++++-----
ui/vnc-jobs.h | 2 +-
ui/vnc.c | 4 +++-
util/compatfd.c | 12 ++++++++--
util/oslib-posix.c | 17 ++++++++++----
util/qemu-thread-posix.c | 24 +++++++++++++-------
util/qemu-thread-win32.c | 16 ++++++++++----
util/rcu.c | 3 ++-
util/thread-pool.c | 4 +++-
28 files changed, 243 insertions(+), 101 deletions(-)
diff --git a/cpus.c b/cpus.c
index 7b091bda53..e8450e518a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error **errp)
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name,
+ qemu_tcg_cpu_thread_fn, cpu,
+ QEMU_THREAD_JOINABLE, errp)) {
+ return;
+ }
} else {
/* share a single thread for all cpus with TCG */
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
- qemu_thread_create(cpu->thread, thread_name,
- qemu_tcg_rr_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name,
+ qemu_tcg_rr_cpu_thread_fn, cpu,
+ QEMU_THREAD_JOINABLE, errp)) {
+ return;
+ }
single_tcg_halt_cond = cpu->halt_cond;
single_tcg_cpu_thread = cpu->thread;
@@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error **errp)
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE, errp)) {
+ return;
+ }
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
@@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error **errp)
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE, errp)) {
+ /* keep 'if' here in case there is further error handling logic */
+ }
}
static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
@@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE, errp)) {
+ /* keep 'if' here in case there is further error handling logic */
+ }
}
static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
@@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE, errp)) {
+ return;
+ }
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
@@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error **errp)
qemu_cond_init(cpu->halt_cond);
snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE, errp)) {
+ /* keep 'if' here in case there is further error handling logic */
+ }
}
bool qemu_init_vcpu(CPUState *cpu, Error **errp)
diff --git a/dump.c b/dump.c
index 4ec94c5e25..1f003aff9a 100644
--- a/dump.c
+++ b/dump.c
@@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
if (detach_p) {
/* detached dump */
s->detached = true;
- qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
- s, QEMU_THREAD_DETACHED);
+ if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
+ s, QEMU_THREAD_DETACHED, errp)) {
+ /* keep 'if' here in case there is further error handling logic */
+ }
} else {
/* sync dump */
dump_process(s, errp);
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
index cdcf550dd7..6684c60a96 100644
--- a/hw/misc/edu.c
+++ b/hw/misc/edu.c
@@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
qemu_mutex_init(&edu->thr_mutex);
qemu_cond_init(&edu->thr_cond);
- qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
- edu, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
+ edu, QEMU_THREAD_JOINABLE, errp)) {
+ return;
+ }
memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
"edu-mmio", 1 * MiB);
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index ae913d070f..7c16ade04a 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
sPAPRPendingHPT *pending = spapr->pending_hpt;
uint64_t current_ram_size;
int rc;
+ Error *local_err = NULL;
if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
return H_AUTHORITY;
@@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
pending->shift = shift;
pending->ret = H_HARDWARE;
- qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
- hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+ if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+ hpt_prepare_thread, pending,
+ QEMU_THREAD_DETACHED, &local_err)) {
+ error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
+ g_free(pending);
+ return H_RESOURCE;
+ }
spapr->pending_hpt = pending;
diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index d7a4bbd91f..53a2bd0d85 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
ibv_get_device_name(backend_dev->ib_dev));
backend_dev->comp_thread.run = true;
+ /* FIXME: let the further caller handle the error instead of abort() here */
qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
- comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+ comp_handler_thread, backend_dev,
+ QEMU_THREAD_DETACHED, &error_abort);
}
void rdma_backend_register_comp_handler(void (*handler)(int status,
diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
index 25976ed84f..c6783f124a 100644
--- a/hw/usb/ccid-card-emulated.c
+++ b/hw/usb/ccid-card-emulated.c
@@ -33,6 +33,7 @@
#include "qemu/main-loop.h"
#include "ccid.h"
#include "qapi/error.h"
+#include "qemu/error-report.h"
#define DPRINTF(card, lvl, fmt, ...) \
do {\
@@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
goto out2;
}
- qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
- card, QEMU_THREAD_JOINABLE);
- qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
- card, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
+ card, QEMU_THREAD_JOINABLE, errp)) {
+ error_report("failed to create event_thread");
+ goto out2;
+ }
+ if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
+ handle_apdu_thread, card,
+ QEMU_THREAD_JOINABLE, errp)) {
+ error_report("failed to create handle_apdu_thread");
+ goto out2;
+ }
out2:
clean_event_notifier(card);
diff --git a/include/qemu/thread.h b/include/qemu/thread.h
index 55d83a907c..12291f4ccd 100644
--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
void qemu_event_wait(QemuEvent *ev);
void qemu_event_destroy(QemuEvent *ev);
-void qemu_thread_create(QemuThread *thread, const char *name,
+bool qemu_thread_create(QemuThread *thread, const char *name,
void *(*start_routine)(void *),
- void *arg, int mode);
+ void *arg, int mode, Error **errp);
void *qemu_thread_join(QemuThread *thread);
void qemu_thread_get_self(QemuThread *thread);
bool qemu_thread_is_self(QemuThread *thread);
diff --git a/io/task.c b/io/task.c
index 2886a2c1bc..6d3a18ab80 100644
--- a/io/task.c
+++ b/io/task.c
@@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
"io-task-worker",
qio_task_thread_worker,
data,
- QEMU_THREAD_DETACHED);
+ QEMU_THREAD_DETACHED,
+ &error_abort);
}
diff --git a/iothread.c b/iothread.c
index 2fb1cdf55d..7335dacf0b 100644
--- a/iothread.c
+++ b/iothread.c
@@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
&local_error);
if (local_error) {
error_propagate(errp, local_error);
- aio_context_unref(iothread->ctx);
- iothread->ctx = NULL;
- return;
+ goto fail;
}
qemu_mutex_init(&iothread->init_done_lock);
@@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
*/
name = object_get_canonical_path_component(OBJECT(obj));
thread_name = g_strdup_printf("IO %s", name);
- qemu_thread_create(&iothread->thread, thread_name, iothread_run,
- iothread, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
+ iothread, QEMU_THREAD_JOINABLE, errp)) {
+ g_free(thread_name);
+ g_free(name);
+ goto fail;
+ }
g_free(thread_name);
g_free(name);
@@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
&iothread->init_done_lock);
}
qemu_mutex_unlock(&iothread->init_done_lock);
+ return;
+fail:
+ aio_context_unref(iothread->ctx);
+ iothread->ctx = NULL;
}
typedef struct {
diff --git a/migration/migration.c b/migration/migration.c
index 0537fc0c26..af6c72ac5d 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
/* Make sure all file formats flush their mutable metadata */
bdrv_invalidate_cache_all(&local_err);
if (local_err) {
- migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_FAILED);
error_report_err(local_err);
- exit(EXIT_FAILURE);
+ goto fail;
}
if (colo_init_ram_cache() < 0) {
error_report("Init ram cache failed");
- exit(EXIT_FAILURE);
+ goto fail;
}
- qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
- colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
+ colo_process_incoming_thread, mis,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err, "failed to create "
+ "colo_process_incoming_thread: ");
+ goto fail;
+ }
mis->have_colo_incoming_thread = true;
qemu_coroutine_yield();
@@ -461,20 +464,22 @@ static void process_incoming_migration_co(void *opaque)
}
if (ret < 0) {
- Error *local_err = NULL;
-
- migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
- MIGRATION_STATUS_FAILED);
error_report("load of migration failed: %s", strerror(-ret));
- qemu_fclose(mis->from_src_file);
- if (multifd_load_cleanup(&local_err) != 0) {
- error_report_err(local_err);
- }
- exit(EXIT_FAILURE);
+ goto fail;
}
mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
qemu_bh_schedule(mis->bh);
mis->migration_incoming_co = NULL;
+ return;
+fail:
+ local_err = NULL;
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_FAILED);
+ qemu_fclose(mis->from_src_file);
+ if (multifd_load_cleanup(&local_err) != 0) {
+ error_report_err(local_err);
+ }
+ exit(EXIT_FAILURE);
}
static void migration_incoming_setup(QEMUFile *f)
@@ -2345,6 +2350,7 @@ out:
static int open_return_path_on_source(MigrationState *ms,
bool create_thread)
{
+ Error *local_err = NULL;
ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
if (!ms->rp_state.from_dst_file) {
@@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
return 0;
}
- qemu_thread_create(&ms->rp_state.rp_thread, "return path",
- source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
+ source_return_path_thread, ms,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err,
+ "failed to create source_return_path_thread: ");
+ return -1;
+ }
trace_open_return_path_on_source_continue();
@@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
migrate_fd_cleanup(s);
return;
}
- qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
+ s, QEMU_THREAD_JOINABLE, &error_in)) {
+ error_reportf_err(error_in, "failed to create migration_thread: ");
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
+ migrate_fd_cleanup(s);
+ return;
+ }
s->migration_thread_running = true;
}
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index fa09dba534..80bfa9c4a2 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1083,6 +1083,8 @@ retry:
int postcopy_ram_enable_notify(MigrationIncomingState *mis)
{
+ Error *local_err = NULL;
+
/* Open the fd for the kernel to give us userfaults */
mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (mis->userfault_fd == -1) {
@@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
}
qemu_sem_init(&mis->fault_thread_sem, 0);
- qemu_thread_create(&mis->fault_thread, "postcopy/fault",
- postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
+ postcopy_ram_fault_thread, mis,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err,
+ "failed to create postcopy_ram_fault_thread: ");
+ close(mis->userfault_event_fd);
+ close(mis->userfault_fd);
+ qemu_sem_destroy(&mis->fault_thread_sem);
+ return -1;
+ }
qemu_sem_wait(&mis->fault_thread_sem);
qemu_sem_destroy(&mis->fault_thread_sem);
mis->have_fault_thread = true;
diff --git a/migration/ram.c b/migration/ram.c
index 658dfa88a3..6e0cccf066 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
static int compress_threads_save_setup(void)
{
int i, thread_count;
+ Error *local_err = NULL;
if (!migrate_use_compression()) {
return 0;
@@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
comp_param[i].quit = false;
qemu_mutex_init(&comp_param[i].mutex);
qemu_cond_init(&comp_param[i].cond);
- qemu_thread_create(compress_threads + i, "compress",
- do_data_compress, comp_param + i,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(compress_threads + i, "compress",
+ do_data_compress, comp_param + i,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err, "failed to create do_data_compress: ");
+ goto exit;
+ }
}
return 0;
@@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
p->c = QIO_CHANNEL(sioc);
qio_channel_set_delay(p->c, false);
p->running = true;
- qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ migrate_set_error(migrate_get_current(), local_err);
+ error_reportf_err(local_err,
+ "failed to create multifd_send_thread: ");
+ multifd_save_cleanup();
+ return;
+ }
atomic_inc(&multifd_send_state->count);
}
@@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
p->num_packets = 1;
p->running = true;
- qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
+ p, QEMU_THREAD_JOINABLE, &local_err)) {
+ error_propagate_prepend(errp, local_err,
+ "failed to create multifd_recv_thread: ");
+ multifd_recv_terminate_threads(local_err);
+ return false;
+ }
atomic_inc(&multifd_recv_state->count);
return atomic_read(&multifd_recv_state->count) ==
migrate_multifd_channels();
@@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
static int compress_threads_load_setup(QEMUFile *f)
{
int i, thread_count;
+ Error *local_err = NULL;
if (!migrate_use_compression()) {
return 0;
@@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
qemu_cond_init(&decomp_param[i].cond);
decomp_param[i].done = true;
decomp_param[i].quit = false;
- qemu_thread_create(decompress_threads + i, "decompress",
- do_data_decompress, decomp_param + i,
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(decompress_threads + i, "decompress",
+ do_data_decompress, decomp_param + i,
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err,
+ "failed to create do_data_decompress: ");
+ goto exit;
+ }
}
return 0;
exit:
diff --git a/migration/savevm.c b/migration/savevm.c
index d784e8aa40..b8bdcde5d8 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1747,9 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
mis->have_listen_thread = true;
/* Start up the listening thread and wait for it to signal ready */
qemu_sem_init(&mis->listen_thread_sem, 0);
- qemu_thread_create(&mis->listen_thread, "postcopy/listen",
- postcopy_ram_listen_thread, NULL,
- QEMU_THREAD_DETACHED);
+ if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+ postcopy_ram_listen_thread, NULL,
+ QEMU_THREAD_DETACHED, &local_err)) {
+ error_reportf_err(local_err,
+ "failed to create postcopy_ram_listen_thread: ");
+ qemu_sem_destroy(&mis->listen_thread_sem);
+ return -1;
+ }
qemu_sem_wait(&mis->listen_thread_sem);
qemu_sem_destroy(&mis->listen_thread_sem);
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
index 2f6c72f63a..338b9563e3 100644
--- a/tests/atomic_add-bench.c
+++ b/tests/atomic_add-bench.c
@@ -2,6 +2,7 @@
#include "qemu/thread.h"
#include "qemu/host-utils.h"
#include "qemu/processor.h"
+#include "qapi/error.h"
struct thread_info {
uint64_t r;
@@ -110,7 +111,7 @@ static void create_threads(void)
info->r = (i + 1) ^ time(NULL);
qemu_thread_create(&threads[i], NULL, thread_func, info,
- QEMU_THREAD_JOINABLE);
+ QEMU_THREAD_JOINABLE, &error_abort);
}
}
diff --git a/tests/iothread.c b/tests/iothread.c
index 777d9eea46..f4ad992e61 100644
--- a/tests/iothread.c
+++ b/tests/iothread.c
@@ -73,7 +73,7 @@ IOThread *iothread_new(void)
qemu_mutex_init(&iothread->init_done_lock);
qemu_cond_init(&iothread->init_done_cond);
qemu_thread_create(&iothread->thread, NULL, iothread_run,
- iothread, QEMU_THREAD_JOINABLE);
+ iothread, QEMU_THREAD_JOINABLE, &error_abort);
/* Wait for initialization to complete */
qemu_mutex_lock(&iothread->init_done_lock);
diff --git a/tests/qht-bench.c b/tests/qht-bench.c
index 2089e2bed1..71df567ea2 100644
--- a/tests/qht-bench.c
+++ b/tests/qht-bench.c
@@ -9,6 +9,7 @@
#include "qemu/atomic.h"
#include "qemu/qht.h"
#include "qemu/rcu.h"
+#include "qapi/error.h"
#include "exec/tb-hash-xx.h"
struct thread_stats {
@@ -247,7 +248,7 @@ th_create_n(QemuThread **threads, struct thread_info **infos, const char *name,
prepare_thread_info(&info[i], offset + i);
info[i].func = func;
qemu_thread_create(&th[i], name, thread_func, &info[i],
- QEMU_THREAD_JOINABLE);
+ QEMU_THREAD_JOINABLE, &error_abort);
}
}
diff --git a/tests/rcutorture.c b/tests/rcutorture.c
index 49311c82ea..0e799ff256 100644
--- a/tests/rcutorture.c
+++ b/tests/rcutorture.c
@@ -64,6 +64,7 @@
#include "qemu/atomic.h"
#include "qemu/rcu.h"
#include "qemu/thread.h"
+#include "qapi/error.h"
long long n_reads = 0LL;
long n_updates = 0L;
@@ -90,7 +91,7 @@ static void create_thread(void *(*func)(void *))
exit(-1);
}
qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads],
- QEMU_THREAD_JOINABLE);
+ QEMU_THREAD_JOINABLE, &error_abort);
n_threads++;
}
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 86fb73b3d5..b3ac261724 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -154,7 +154,7 @@ static void test_acquire(void)
qemu_thread_create(&thread, "test_acquire_thread",
test_acquire_thread,
- &data, QEMU_THREAD_JOINABLE);
+ &data, QEMU_THREAD_JOINABLE, &error_abort);
/* Block in aio_poll(), let other thread kick us and acquire context */
aio_context_acquire(ctx);
diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
index 2e6f70bd59..0f7da81291 100644
--- a/tests/test-rcu-list.c
+++ b/tests/test-rcu-list.c
@@ -25,6 +25,7 @@
#include "qemu/rcu.h"
#include "qemu/thread.h"
#include "qemu/rcu_queue.h"
+#include "qapi/error.h"
/*
* Test variables.
@@ -68,7 +69,7 @@ static void create_thread(void *(*func)(void *))
exit(-1);
}
qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads],
- QEMU_THREAD_JOINABLE);
+ QEMU_THREAD_JOINABLE, &error_abort);
n_threads++;
}
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 929391f85d..35a652d1fd 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -31,6 +31,7 @@
#include "vnc-jobs.h"
#include "qemu/sockets.h"
#include "qemu/main-loop.h"
+#include "qapi/error.h"
#include "block/aio.h"
/*
@@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
return queue; /* Check global queue */
}
-void vnc_start_worker_thread(void)
+bool vnc_start_worker_thread(Error **errp)
{
VncJobQueue *q;
- if (vnc_worker_thread_running())
- return ;
+ if (vnc_worker_thread_running()) {
+ goto out;
+ }
q = vnc_queue_init();
- qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
- QEMU_THREAD_DETACHED);
+ if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
+ q, QEMU_THREAD_DETACHED, errp)) {
+ vnc_queue_clear(q);
+ return false;
+ }
queue = q; /* Set global queue */
+out:
+ return true;
}
diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
index 59f66bcc35..14640593db 100644
--- a/ui/vnc-jobs.h
+++ b/ui/vnc-jobs.h
@@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
void vnc_jobs_join(VncState *vs);
void vnc_jobs_consume_buffer(VncState *vs);
-void vnc_start_worker_thread(void);
+bool vnc_start_worker_thread(Error **errp);
/* Locks */
static inline int vnc_trylock_display(VncDisplay *vd)
diff --git a/ui/vnc.c b/ui/vnc.c
index 0c1b477425..0ffe9e6a5d 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
vd->connections_limit = 32;
qemu_mutex_init(&vd->mutex);
- vnc_start_worker_thread();
+ if (!vnc_start_worker_thread(errp)) {
+ return;
+ }
vd->dcl.ops = &dcl_ops;
register_displaychangelistener(&vd->dcl);
diff --git a/util/compatfd.c b/util/compatfd.c
index 980bd33e52..886aa249f9 100644
--- a/util/compatfd.c
+++ b/util/compatfd.c
@@ -16,6 +16,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/thread.h"
+#include "qapi/error.h"
#include <sys/syscall.h>
@@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t *mask)
struct sigfd_compat_info *info;
QemuThread thread;
int fds[2];
+ Error *local_err = NULL;
info = malloc(sizeof(*info));
if (info == NULL) {
@@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
memcpy(&info->mask, mask, sizeof(*mask));
info->fd = fds[1];
- qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
- QEMU_THREAD_DETACHED);
+ if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
+ info, QEMU_THREAD_DETACHED, &local_err)) {
+ error_reportf_err(local_err, "failed to create sigwait_compat: ");
+ close(fds[0]);
+ close(fds[1]);
+ free(info);
+ return -1;
+ }
return fds[0];
}
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index c1bee2a581..2c779fd634 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
size_t size_per_thread;
char *addr = area;
int i = 0;
+ int started_thread = 0;
+ Error *local_err = NULL;
memset_thread_failed = false;
memset_num_threads = get_memset_num_threads(smp_cpus);
+ started_thread = memset_num_threads;
memset_thread = g_new0(MemsetThread, memset_num_threads);
numpages_per_thread = (numpages / memset_num_threads);
size_per_thread = (hpagesize * numpages_per_thread);
@@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
numpages : numpages_per_thread;
memset_thread[i].hpagesize = hpagesize;
- qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
- do_touch_pages, &memset_thread[i],
- QEMU_THREAD_JOINABLE);
+ if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
+ do_touch_pages, &memset_thread[i],
+ QEMU_THREAD_JOINABLE, &local_err)) {
+ error_reportf_err(local_err, "failed to create do_touch_pages: ");
+ memset_thread_failed = true;
+ started_thread = i;
+ goto out;
+ }
addr += size_per_thread;
numpages -= numpages_per_thread;
}
- for (i = 0; i < memset_num_threads; i++) {
+out:
+ for (i = 0; i < started_thread; i++) {
qemu_thread_join(&memset_thread[i].pgthread);
}
g_free(memset_thread);
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index 865e476df5..81b40a1ece 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -15,6 +15,7 @@
#include "qemu/atomic.h"
#include "qemu/notify.h"
#include "qemu-thread-common.h"
+#include "qapi/error.h"
static bool name_threads;
@@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
return r;
}
-void qemu_thread_create(QemuThread *thread, const char *name,
- void *(*start_routine)(void*),
- void *arg, int mode)
+bool qemu_thread_create(QemuThread *thread, const char *name,
+ void *(*start_routine)(void *),
+ void *arg, int mode, Error **errp)
{
sigset_t set, oldset;
int err;
@@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char *name,
err = pthread_attr_init(&attr);
if (err) {
- error_exit(err, __func__);
+ error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
+ strerror(err));
+ return false;
}
if (mode == QEMU_THREAD_DETACHED) {
@@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char *name,
qemu_thread_args->name = g_strdup(name);
qemu_thread_args->start_routine = start_routine;
qemu_thread_args->arg = arg;
-
err = pthread_create(&thread->thread, &attr,
qemu_thread_start, qemu_thread_args);
-
- if (err)
- error_exit(err, __func__);
+ if (err) {
+ error_setg_errno(errp, -err, "pthread_create failed: %s",
+ strerror(err));
+ pthread_attr_destroy(&attr);
+ g_free(qemu_thread_args->name);
+ g_free(qemu_thread_args);
+ return false;
+ }
pthread_sigmask(SIG_SETMASK, &oldset, NULL);
pthread_attr_destroy(&attr);
+ return true;
}
void qemu_thread_get_self(QemuThread *thread)
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index 4a363ca675..57b1143e97 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -20,6 +20,7 @@
#include "qemu/thread.h"
#include "qemu/notify.h"
#include "qemu-thread-common.h"
+#include "qapi/error.h"
#include <process.h>
static bool name_threads;
@@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
return ret;
}
-void qemu_thread_create(QemuThread *thread, const char *name,
- void *(*start_routine)(void *),
- void *arg, int mode)
+bool qemu_thread_create(QemuThread *thread, const char *name,
+ void *(*start_routine)(void *),
+ void *arg, int mode, Error **errp)
{
HANDLE hThread;
struct QemuThreadData *data;
@@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char *name,
hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
data, 0, &thread->tid);
if (!hThread) {
- error_exit(GetLastError(), __func__);
+ if (data->mode != QEMU_THREAD_DETACHED) {
+ DeleteCriticalSection(&data->cs);
+ }
+ error_setg_errno(errp, errno,
+ "failed to create win32_start_routine");
+ g_free(data);
+ return false;
}
CloseHandle(hThread);
thread->data = data;
+ return true;
}
void qemu_thread_get_self(QemuThread *thread)
diff --git a/util/rcu.c b/util/rcu.c
index 5676c22bd1..145dcdb0c6 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -32,6 +32,7 @@
#include "qemu/atomic.h"
#include "qemu/thread.h"
#include "qemu/main-loop.h"
+#include "qapi/error.h"
#if defined(CONFIG_MALLOC_TRIM)
#include <malloc.h>
#endif
@@ -325,7 +326,7 @@ static void rcu_init_complete(void)
* must have been quiescent even after forking, just recreate it.
*/
qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
- NULL, QEMU_THREAD_DETACHED);
+ NULL, QEMU_THREAD_DETACHED, &error_abort);
rcu_register_thread();
}
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 610646d131..ad0f980783 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -22,6 +22,7 @@
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
+#include "qapi/error.h"
static void do_spawn_thread(ThreadPool *pool);
@@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
pool->new_threads--;
pool->pending_threads++;
- qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
+ qemu_thread_create(&t, "worker", worker_thread, pool,
+ QEMU_THREAD_DETACHED, &error_abort);
}
static void spawn_thread_bh_fn(void *opaque)
--
2.13.7
There's a question for David Gibson inline. Please search for /ppc/.
Fei Li <fli@suse.com> writes:
> Make qemu_thread_create() return a Boolean to indicate if it succeeds
> rather than failing with an error. And add an Error parameter to hold
> the error message and let the callers handle it.
The "rather than failing with an error" is misleading. Before the
patch, we report to stderr and abort(). What about:
qemu-thread: Make qemu_thread_create() handle errors properly
qemu_thread_create() abort()s on error. Not nice. Give it a
return value and an Error ** argument, so it can return success /
failure.
Still missing from the commit message then: how you update the callers.
Let's see below.
>
> Cc: Markus Armbruster <armbru@redhat.com>
> Cc: Daniel P. Berrangé <berrange@redhat.com>
> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Fei Li <fli@suse.com>
> ---
> cpus.c | 45 ++++++++++++++++++++++++-------------
> dump.c | 6 +++--
> hw/misc/edu.c | 6 +++--
> hw/ppc/spapr_hcall.c | 10 +++++++--
> hw/rdma/rdma_backend.c | 4 +++-
> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
> include/qemu/thread.h | 4 ++--
> io/task.c | 3 ++-
> iothread.c | 16 +++++++++-----
> migration/migration.c | 54 +++++++++++++++++++++++++++++----------------
> migration/postcopy-ram.c | 14 ++++++++++--
> migration/ram.c | 40 ++++++++++++++++++++++++---------
> migration/savevm.c | 11 ++++++---
> tests/atomic_add-bench.c | 3 ++-
> tests/iothread.c | 2 +-
> tests/qht-bench.c | 3 ++-
> tests/rcutorture.c | 3 ++-
> tests/test-aio.c | 2 +-
> tests/test-rcu-list.c | 3 ++-
> ui/vnc-jobs.c | 17 +++++++++-----
> ui/vnc-jobs.h | 2 +-
> ui/vnc.c | 4 +++-
> util/compatfd.c | 12 ++++++++--
> util/oslib-posix.c | 17 ++++++++++----
> util/qemu-thread-posix.c | 24 +++++++++++++-------
> util/qemu-thread-win32.c | 16 ++++++++++----
> util/rcu.c | 3 ++-
> util/thread-pool.c | 4 +++-
> 28 files changed, 243 insertions(+), 101 deletions(-)
>
> diff --git a/cpus.c b/cpus.c
> index 7b091bda53..e8450e518a 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error **errp)
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
> cpu->cpu_index);
>
> - qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name,
> + qemu_tcg_cpu_thread_fn, cpu,
> + QEMU_THREAD_JOINABLE, errp)) {
> + return;
> + }
>
> } else {
> /* share a single thread for all cpus with TCG */
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
> - qemu_thread_create(cpu->thread, thread_name,
> - qemu_tcg_rr_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name,
> + qemu_tcg_rr_cpu_thread_fn, cpu,
> + QEMU_THREAD_JOINABLE, errp)) {
> + return;
> + }
>
> single_tcg_halt_cond = cpu->halt_cond;
> single_tcg_cpu_thread = cpu->thread;
This is a caller that sets an error on failure. You make it set an
error on qemu_thread_create() failure. Makes sense.
> @@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error **errp)
>
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
> cpu->cpu_index);
> - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
> + cpu, QEMU_THREAD_JOINABLE, errp)) {
> + return;
> + }
> #ifdef _WIN32
> cpu->hThread = qemu_thread_get_handle(cpu->thread);
> #endif
Likewise. I'll stop commenting on this pattern now.
> @@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error **errp)
> qemu_cond_init(cpu->halt_cond);
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
> cpu->cpu_index);
> - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
> + cpu, QEMU_THREAD_JOINABLE, errp)) {
> + /* keep 'if' here in case there is further error handling logic */
> + }
> }
>
> static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
> @@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
> cpu->cpu_index);
> - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
> + cpu, QEMU_THREAD_JOINABLE, errp)) {
> + /* keep 'if' here in case there is further error handling logic */
> + }
> }
>
> static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
> @@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
> qemu_cond_init(cpu->halt_cond);
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
> cpu->cpu_index);
> - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
> - cpu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
> + cpu, QEMU_THREAD_JOINABLE, errp)) {
> + return;
> + }
> #ifdef _WIN32
> cpu->hThread = qemu_thread_get_handle(cpu->thread);
> #endif
> @@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error **errp)
> qemu_cond_init(cpu->halt_cond);
> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
> cpu->cpu_index);
> - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
> + cpu, QEMU_THREAD_JOINABLE, errp)) {
> + /* keep 'if' here in case there is further error handling logic */
> + }
> }
>
> bool qemu_init_vcpu(CPUState *cpu, Error **errp)
> diff --git a/dump.c b/dump.c
> index 4ec94c5e25..1f003aff9a 100644
> --- a/dump.c
> +++ b/dump.c
> @@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
> if (detach_p) {
> /* detached dump */
> s->detached = true;
> - qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
> - s, QEMU_THREAD_DETACHED);
> + if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
> + s, QEMU_THREAD_DETACHED, errp)) {
> + /* keep 'if' here in case there is further error handling logic */
> + }
> } else {
> /* sync dump */
> dump_process(s, errp);
> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
> index cdcf550dd7..6684c60a96 100644
> --- a/hw/misc/edu.c
> +++ b/hw/misc/edu.c
> @@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>
> qemu_mutex_init(&edu->thr_mutex);
> qemu_cond_init(&edu->thr_cond);
> - qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
> - edu, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
> + edu, QEMU_THREAD_JOINABLE, errp)) {
> + return;
> + }
>
> memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
> "edu-mmio", 1 * MiB);
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index ae913d070f..7c16ade04a 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> sPAPRPendingHPT *pending = spapr->pending_hpt;
> uint64_t current_ram_size;
> int rc;
> + Error *local_err = NULL;
>
> if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> return H_AUTHORITY;
> @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> pending->shift = shift;
> pending->ret = H_HARDWARE;
>
> - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
> - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
> + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
> + hpt_prepare_thread, pending,
> + QEMU_THREAD_DETACHED, &local_err)) {
> + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
> + g_free(pending);
> + return H_RESOURCE;
> + }
>
> spapr->pending_hpt = pending;
>
This is a caller that returns an error code on failure. You change it
to report the error, then return failure. The return failure part looks
fine. Whether reporting the error is appropriate I can't say for sure.
No other failure mode reports anything. David, what do you think?
Fei Li, you could pass &error_abort to side-step this question for now.
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index d7a4bbd91f..53a2bd0d85 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
> snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
> ibv_get_device_name(backend_dev->ib_dev));
> backend_dev->comp_thread.run = true;
> + /* FIXME: let the further caller handle the error instead of abort() here */
> qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
> - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
> + comp_handler_thread, backend_dev,
> + QEMU_THREAD_DETACHED, &error_abort);
> }
>
This is a caller that can't return failure. You pass &error_abort. No
behavioral change.
I think I'd mark the spot TODO, not FIXME. Matter of taste, I guess.
> void rdma_backend_register_comp_handler(void (*handler)(int status,
> diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
> index 25976ed84f..c6783f124a 100644
> --- a/hw/usb/ccid-card-emulated.c
> +++ b/hw/usb/ccid-card-emulated.c
> @@ -33,6 +33,7 @@
> #include "qemu/main-loop.h"
> #include "ccid.h"
> #include "qapi/error.h"
> +#include "qemu/error-report.h"
>
> #define DPRINTF(card, lvl, fmt, ...) \
> do {\
> @@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
> error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
> goto out2;
> }
> - qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
> - card, QEMU_THREAD_JOINABLE);
> - qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
> - card, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
> + card, QEMU_THREAD_JOINABLE, errp)) {
> + error_report("failed to create event_thread");
> + goto out2;
> + }
> + if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
> + handle_apdu_thread, card,
> + QEMU_THREAD_JOINABLE, errp)) {
> + error_report("failed to create handle_apdu_thread");
> + goto out2;
> + }
>
> out2:
> clean_event_notifier(card);
error_report() in a realize() method is almost certainly wrong.
> diff --git a/include/qemu/thread.h b/include/qemu/thread.h
> index 55d83a907c..12291f4ccd 100644
> --- a/include/qemu/thread.h
> +++ b/include/qemu/thread.h
> @@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
> void qemu_event_wait(QemuEvent *ev);
> void qemu_event_destroy(QemuEvent *ev);
>
> -void qemu_thread_create(QemuThread *thread, const char *name,
> +bool qemu_thread_create(QemuThread *thread, const char *name,
> void *(*start_routine)(void *),
> - void *arg, int mode);
> + void *arg, int mode, Error **errp);
> void *qemu_thread_join(QemuThread *thread);
> void qemu_thread_get_self(QemuThread *thread);
> bool qemu_thread_is_self(QemuThread *thread);
> diff --git a/io/task.c b/io/task.c
> index 2886a2c1bc..6d3a18ab80 100644
> --- a/io/task.c
> +++ b/io/task.c
> @@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
> "io-task-worker",
> qio_task_thread_worker,
> data,
> - QEMU_THREAD_DETACHED);
> + QEMU_THREAD_DETACHED,
> + &error_abort);
> }
>
>
This is a caller that can't return failure. You pass &error_abort. No
behavioral change. Unlike above, you don't mark this spot FIXME. Any
particular reason for marking one, but not the other?
I'll stop commenting on this pattern now.
> diff --git a/iothread.c b/iothread.c
> index 2fb1cdf55d..7335dacf0b 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
> &local_error);
> if (local_error) {
> error_propagate(errp, local_error);
> - aio_context_unref(iothread->ctx);
> - iothread->ctx = NULL;
> - return;
> + goto fail;
> }
>
> qemu_mutex_init(&iothread->init_done_lock);
> @@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
> */
> name = object_get_canonical_path_component(OBJECT(obj));
> thread_name = g_strdup_printf("IO %s", name);
> - qemu_thread_create(&iothread->thread, thread_name, iothread_run,
> - iothread, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
> + iothread, QEMU_THREAD_JOINABLE, errp)) {
> + g_free(thread_name);
> + g_free(name);
> + goto fail;
> + }
> g_free(thread_name);
> g_free(name);
>
> @@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
> &iothread->init_done_lock);
> }
> qemu_mutex_unlock(&iothread->init_done_lock);
> + return;
> +fail:
> + aio_context_unref(iothread->ctx);
> + iothread->ctx = NULL;
> }
>
> typedef struct {
> diff --git a/migration/migration.c b/migration/migration.c
> index 0537fc0c26..af6c72ac5d 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
> /* Make sure all file formats flush their mutable metadata */
> bdrv_invalidate_cache_all(&local_err);
> if (local_err) {
> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> - MIGRATION_STATUS_FAILED);
> error_report_err(local_err);
> - exit(EXIT_FAILURE);
> + goto fail;
> }
>
> if (colo_init_ram_cache() < 0) {
> error_report("Init ram cache failed");
> - exit(EXIT_FAILURE);
> + goto fail;
> }
>
> - qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> - colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
> + colo_process_incoming_thread, mis,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err, "failed to create "
> + "colo_process_incoming_thread: ");
> + goto fail;
> + }
> mis->have_colo_incoming_thread = true;
> qemu_coroutine_yield();
>
> @@ -461,20 +464,22 @@ static void process_incoming_migration_co(void *opaque)
> }
>
> if (ret < 0) {
> - Error *local_err = NULL;
> -
> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> - MIGRATION_STATUS_FAILED);
> error_report("load of migration failed: %s", strerror(-ret));
> - qemu_fclose(mis->from_src_file);
> - if (multifd_load_cleanup(&local_err) != 0) {
> - error_report_err(local_err);
> - }
> - exit(EXIT_FAILURE);
> + goto fail;
> }
> mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
> qemu_bh_schedule(mis->bh);
> mis->migration_incoming_co = NULL;
> + return;
> +fail:
> + local_err = NULL;
> + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> + MIGRATION_STATUS_FAILED);
> + qemu_fclose(mis->from_src_file);
> + if (multifd_load_cleanup(&local_err) != 0) {
> + error_report_err(local_err);
> + }
> + exit(EXIT_FAILURE);
> }
You change handling of errors other than qemu_thread_create(). Separate
patch, please. I'd put it before this one.
>
> static void migration_incoming_setup(QEMUFile *f)
> @@ -2345,6 +2350,7 @@ out:
> static int open_return_path_on_source(MigrationState *ms,
> bool create_thread)
> {
> + Error *local_err = NULL;
>
> ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
> if (!ms->rp_state.from_dst_file) {
> @@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
> return 0;
> }
>
> - qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> - source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
> + source_return_path_thread, ms,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err,
> + "failed to create source_return_path_thread: ");
> + return -1;
> + }
>
> trace_open_return_path_on_source_continue();
>
This is a caller that returns an error code on failure. You change it
to report the error, then return failure. This is okay, because its
sole caller also reports errors that way.
> @@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
> migrate_fd_cleanup(s);
> return;
> }
> - qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
> + s, QEMU_THREAD_JOINABLE, &error_in)) {
> + error_reportf_err(error_in, "failed to create migration_thread: ");
> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> + migrate_fd_cleanup(s);
> + return;
> + }
> s->migration_thread_running = true;
> }
This is a caller that reports errors. You make it handle
qemu_thread_create() the same way. Good.
>
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index fa09dba534..80bfa9c4a2 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1083,6 +1083,8 @@ retry:
>
> int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> {
> + Error *local_err = NULL;
> +
> /* Open the fd for the kernel to give us userfaults */
> mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> if (mis->userfault_fd == -1) {
> @@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> }
>
> qemu_sem_init(&mis->fault_thread_sem, 0);
> - qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> - postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
> + postcopy_ram_fault_thread, mis,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err,
> + "failed to create postcopy_ram_fault_thread: ");
> + close(mis->userfault_event_fd);
> + close(mis->userfault_fd);
> + qemu_sem_destroy(&mis->fault_thread_sem);
> + return -1;
> + }
> qemu_sem_wait(&mis->fault_thread_sem);
> qemu_sem_destroy(&mis->fault_thread_sem);
> mis->have_fault_thread = true;
This is a caller that reports errors, then returns failure. You make it
handle qemu_thread_create() the same way. Good.
Not related to this patch, just spotted while reviewing it:
/* Mark so that we get notified of accesses to unwritten areas */
if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
error_report("ram_block_enable_notify failed");
return -1;
}
Do we leak mis->userfault_fd, mis->userfault_event_fd,
mis->fault_thread_sem here?
> diff --git a/migration/ram.c b/migration/ram.c
> index 658dfa88a3..6e0cccf066 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
> static int compress_threads_save_setup(void)
> {
> int i, thread_count;
> + Error *local_err = NULL;
>
> if (!migrate_use_compression()) {
> return 0;
> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
> comp_param[i].quit = false;
> qemu_mutex_init(&comp_param[i].mutex);
> qemu_cond_init(&comp_param[i].cond);
> - qemu_thread_create(compress_threads + i, "compress",
> - do_data_compress, comp_param + i,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(compress_threads + i, "compress",
> + do_data_compress, comp_param + i,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err, "failed to create do_data_compress: ");
> + goto exit;
> + }
> }
> return 0;
>
Reviewing the migration changes is getting tiresome... Is reporting the
error appropriate here, and why?
> @@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
> p->c = QIO_CHANNEL(sioc);
> qio_channel_set_delay(p->c, false);
> p->running = true;
> - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + migrate_set_error(migrate_get_current(), local_err);
> + error_reportf_err(local_err,
> + "failed to create multifd_send_thread: ");
> + multifd_save_cleanup();
> + return;
> + }
>
> atomic_inc(&multifd_send_state->count);
> }
Same question.
> @@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
> p->num_packets = 1;
>
> p->running = true;
> - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
> + p, QEMU_THREAD_JOINABLE, &local_err)) {
> + error_propagate_prepend(errp, local_err,
> + "failed to create multifd_recv_thread: ");
> + multifd_recv_terminate_threads(local_err);
> + return false;
> + }
> atomic_inc(&multifd_recv_state->count);
> return atomic_read(&multifd_recv_state->count) ==
> migrate_multifd_channels();
> @@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
> static int compress_threads_load_setup(QEMUFile *f)
> {
> int i, thread_count;
> + Error *local_err = NULL;
>
> if (!migrate_use_compression()) {
> return 0;
> @@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
> qemu_cond_init(&decomp_param[i].cond);
> decomp_param[i].done = true;
> decomp_param[i].quit = false;
> - qemu_thread_create(decompress_threads + i, "decompress",
> - do_data_decompress, decomp_param + i,
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(decompress_threads + i, "decompress",
> + do_data_decompress, decomp_param + i,
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err,
> + "failed to create do_data_decompress: ");
> + goto exit;
> + }
> }
> return 0;
> exit:
Same question.
> diff --git a/migration/savevm.c b/migration/savevm.c
> index d784e8aa40..b8bdcde5d8 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1747,9 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
> mis->have_listen_thread = true;
> /* Start up the listening thread and wait for it to signal ready */
> qemu_sem_init(&mis->listen_thread_sem, 0);
> - qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> - postcopy_ram_listen_thread, NULL,
> - QEMU_THREAD_DETACHED);
> + if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
> + postcopy_ram_listen_thread, NULL,
> + QEMU_THREAD_DETACHED, &local_err)) {
> + error_reportf_err(local_err,
> + "failed to create postcopy_ram_listen_thread: ");
> + qemu_sem_destroy(&mis->listen_thread_sem);
> + return -1;
> + }
> qemu_sem_wait(&mis->listen_thread_sem);
> qemu_sem_destroy(&mis->listen_thread_sem);
>
This is a caller that reports errors, then returns failure. You make it
handle qemu_thread_create() the same way. Good.
I'll stop commenting on this pattern now.
> diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
> index 2f6c72f63a..338b9563e3 100644
> --- a/tests/atomic_add-bench.c
> +++ b/tests/atomic_add-bench.c
> @@ -2,6 +2,7 @@
> #include "qemu/thread.h"
> #include "qemu/host-utils.h"
> #include "qemu/processor.h"
> +#include "qapi/error.h"
>
> struct thread_info {
> uint64_t r;
> @@ -110,7 +111,7 @@ static void create_threads(void)
>
> info->r = (i + 1) ^ time(NULL);
> qemu_thread_create(&threads[i], NULL, thread_func, info,
> - QEMU_THREAD_JOINABLE);
> + QEMU_THREAD_JOINABLE, &error_abort);
> }
> }
>
> diff --git a/tests/iothread.c b/tests/iothread.c
> index 777d9eea46..f4ad992e61 100644
> --- a/tests/iothread.c
> +++ b/tests/iothread.c
> @@ -73,7 +73,7 @@ IOThread *iothread_new(void)
> qemu_mutex_init(&iothread->init_done_lock);
> qemu_cond_init(&iothread->init_done_cond);
> qemu_thread_create(&iothread->thread, NULL, iothread_run,
> - iothread, QEMU_THREAD_JOINABLE);
> + iothread, QEMU_THREAD_JOINABLE, &error_abort);
>
> /* Wait for initialization to complete */
> qemu_mutex_lock(&iothread->init_done_lock);
> diff --git a/tests/qht-bench.c b/tests/qht-bench.c
> index 2089e2bed1..71df567ea2 100644
> --- a/tests/qht-bench.c
> +++ b/tests/qht-bench.c
> @@ -9,6 +9,7 @@
> #include "qemu/atomic.h"
> #include "qemu/qht.h"
> #include "qemu/rcu.h"
> +#include "qapi/error.h"
> #include "exec/tb-hash-xx.h"
>
> struct thread_stats {
> @@ -247,7 +248,7 @@ th_create_n(QemuThread **threads, struct thread_info **infos, const char *name,
> prepare_thread_info(&info[i], offset + i);
> info[i].func = func;
> qemu_thread_create(&th[i], name, thread_func, &info[i],
> - QEMU_THREAD_JOINABLE);
> + QEMU_THREAD_JOINABLE, &error_abort);
> }
> }
>
> diff --git a/tests/rcutorture.c b/tests/rcutorture.c
> index 49311c82ea..0e799ff256 100644
> --- a/tests/rcutorture.c
> +++ b/tests/rcutorture.c
> @@ -64,6 +64,7 @@
> #include "qemu/atomic.h"
> #include "qemu/rcu.h"
> #include "qemu/thread.h"
> +#include "qapi/error.h"
>
> long long n_reads = 0LL;
> long n_updates = 0L;
> @@ -90,7 +91,7 @@ static void create_thread(void *(*func)(void *))
> exit(-1);
> }
> qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads],
> - QEMU_THREAD_JOINABLE);
> + QEMU_THREAD_JOINABLE, &error_abort);
> n_threads++;
> }
>
> diff --git a/tests/test-aio.c b/tests/test-aio.c
> index 86fb73b3d5..b3ac261724 100644
> --- a/tests/test-aio.c
> +++ b/tests/test-aio.c
> @@ -154,7 +154,7 @@ static void test_acquire(void)
>
> qemu_thread_create(&thread, "test_acquire_thread",
> test_acquire_thread,
> - &data, QEMU_THREAD_JOINABLE);
> + &data, QEMU_THREAD_JOINABLE, &error_abort);
>
> /* Block in aio_poll(), let other thread kick us and acquire context */
> aio_context_acquire(ctx);
> diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
> index 2e6f70bd59..0f7da81291 100644
> --- a/tests/test-rcu-list.c
> +++ b/tests/test-rcu-list.c
> @@ -25,6 +25,7 @@
> #include "qemu/rcu.h"
> #include "qemu/thread.h"
> #include "qemu/rcu_queue.h"
> +#include "qapi/error.h"
>
> /*
> * Test variables.
> @@ -68,7 +69,7 @@ static void create_thread(void *(*func)(void *))
> exit(-1);
> }
> qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads],
> - QEMU_THREAD_JOINABLE);
> + QEMU_THREAD_JOINABLE, &error_abort);
> n_threads++;
> }
>
> diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
> index 929391f85d..35a652d1fd 100644
> --- a/ui/vnc-jobs.c
> +++ b/ui/vnc-jobs.c
> @@ -31,6 +31,7 @@
> #include "vnc-jobs.h"
> #include "qemu/sockets.h"
> #include "qemu/main-loop.h"
> +#include "qapi/error.h"
> #include "block/aio.h"
>
> /*
> @@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
> return queue; /* Check global queue */
> }
>
> -void vnc_start_worker_thread(void)
> +bool vnc_start_worker_thread(Error **errp)
> {
> VncJobQueue *q;
>
> - if (vnc_worker_thread_running())
> - return ;
> + if (vnc_worker_thread_running()) {
> + goto out;
> + }
>
> q = vnc_queue_init();
> - qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
> - QEMU_THREAD_DETACHED);
> + if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
> + q, QEMU_THREAD_DETACHED, errp)) {
> + vnc_queue_clear(q);
> + return false;
> + }
> queue = q; /* Set global queue */
> +out:
> + return true;
> }
I recommend to pass &error_abort to qemu_thread_create() in this patch,
then convert vnc_start_worker_thread() to Error in a subsequent patch.
> diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
> index 59f66bcc35..14640593db 100644
> --- a/ui/vnc-jobs.h
> +++ b/ui/vnc-jobs.h
> @@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
> void vnc_jobs_join(VncState *vs);
>
> void vnc_jobs_consume_buffer(VncState *vs);
> -void vnc_start_worker_thread(void);
> +bool vnc_start_worker_thread(Error **errp);
>
> /* Locks */
> static inline int vnc_trylock_display(VncDisplay *vd)
> diff --git a/ui/vnc.c b/ui/vnc.c
> index 0c1b477425..0ffe9e6a5d 100644
> --- a/ui/vnc.c
> +++ b/ui/vnc.c
> @@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
> vd->connections_limit = 32;
>
> qemu_mutex_init(&vd->mutex);
> - vnc_start_worker_thread();
> + if (!vnc_start_worker_thread(errp)) {
> + return;
> + }
>
> vd->dcl.ops = &dcl_ops;
> register_displaychangelistener(&vd->dcl);
These two hunks then also go into the subsequent patch.
> diff --git a/util/compatfd.c b/util/compatfd.c
> index 980bd33e52..886aa249f9 100644
> --- a/util/compatfd.c
> +++ b/util/compatfd.c
> @@ -16,6 +16,7 @@
> #include "qemu/osdep.h"
> #include "qemu-common.h"
> #include "qemu/thread.h"
> +#include "qapi/error.h"
>
> #include <sys/syscall.h>
>
> @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t *mask)
> struct sigfd_compat_info *info;
> QemuThread thread;
> int fds[2];
> + Error *local_err = NULL;
>
> info = malloc(sizeof(*info));
> if (info == NULL) {
> @@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
> memcpy(&info->mask, mask, sizeof(*mask));
> info->fd = fds[1];
>
> - qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
> - QEMU_THREAD_DETACHED);
> + if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
> + info, QEMU_THREAD_DETACHED, &local_err)) {
> + error_reportf_err(local_err, "failed to create sigwait_compat: ");
> + close(fds[0]);
> + close(fds[1]);
> + free(info);
> + return -1;
> + }
>
> return fds[0];
> }
This function is implements signalfd() when the kernel doesn't provide
it.
signalfd() sets errno on failure. The replacement's existing failure
modes set errno. You add a failure mode that doesn't set errno. That's
a bug. To fix it, you can either make qemu_thread_create() set errno,
or you can make it return a value you can use to set errno. The common
way to do the latter is returning a *negated* errno value.
signalfd() doesn't print anything on failure. The replacement's
existing failure modes don't print anything. You add a failure mode
that does print. I think it shouldn't.
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index c1bee2a581..2c779fd634 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
> size_t size_per_thread;
> char *addr = area;
> int i = 0;
> + int started_thread = 0;
> + Error *local_err = NULL;
>
> memset_thread_failed = false;
> memset_num_threads = get_memset_num_threads(smp_cpus);
> + started_thread = memset_num_threads;
> memset_thread = g_new0(MemsetThread, memset_num_threads);
> numpages_per_thread = (numpages / memset_num_threads);
> size_per_thread = (hpagesize * numpages_per_thread);
> @@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
> memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
> numpages : numpages_per_thread;
> memset_thread[i].hpagesize = hpagesize;
> - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
> - do_touch_pages, &memset_thread[i],
> - QEMU_THREAD_JOINABLE);
> + if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
> + do_touch_pages, &memset_thread[i],
> + QEMU_THREAD_JOINABLE, &local_err)) {
> + error_reportf_err(local_err, "failed to create do_touch_pages: ");
> + memset_thread_failed = true;
> + started_thread = i;
> + goto out;
> + }
> addr += size_per_thread;
> numpages -= numpages_per_thread;
> }
> - for (i = 0; i < memset_num_threads; i++) {
> +out:
> + for (i = 0; i < started_thread; i++) {
> qemu_thread_join(&memset_thread[i].pgthread);
> }
> g_free(memset_thread);
You need to convert this function to Error instead, because its caller
os_mem_prealloc() sets an error on failure. I recommend to pass
&error_abort in this patch, and convert to Error in a subsequent patch.
> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
> index 865e476df5..81b40a1ece 100644
> --- a/util/qemu-thread-posix.c
> +++ b/util/qemu-thread-posix.c
> @@ -15,6 +15,7 @@
> #include "qemu/atomic.h"
> #include "qemu/notify.h"
> #include "qemu-thread-common.h"
> +#include "qapi/error.h"
>
> static bool name_threads;
>
> @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
> return r;
> }
>
> -void qemu_thread_create(QemuThread *thread, const char *name,
> - void *(*start_routine)(void*),
> - void *arg, int mode)
> +bool qemu_thread_create(QemuThread *thread, const char *name,
> + void *(*start_routine)(void *),
> + void *arg, int mode, Error **errp)
> {
> sigset_t set, oldset;
> int err;
> @@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>
> err = pthread_attr_init(&attr);
> if (err) {
> - error_exit(err, __func__);
> + error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
> + strerror(err));
> + return false;
> }
>
> if (mode == QEMU_THREAD_DETACHED) {
> @@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char *name,
> qemu_thread_args->name = g_strdup(name);
> qemu_thread_args->start_routine = start_routine;
> qemu_thread_args->arg = arg;
> -
Let's keep the blank line.
> err = pthread_create(&thread->thread, &attr,
> qemu_thread_start, qemu_thread_args);
> -
> - if (err)
> - error_exit(err, __func__);
> + if (err) {
> + error_setg_errno(errp, -err, "pthread_create failed: %s",
> + strerror(err));
> + pthread_attr_destroy(&attr);
> + g_free(qemu_thread_args->name);
> + g_free(qemu_thread_args);
> + return false;
> + }
>
> pthread_sigmask(SIG_SETMASK, &oldset, NULL);
>
> pthread_attr_destroy(&attr);
> + return true;
> }
>
> void qemu_thread_get_self(QemuThread *thread)
> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
> index 4a363ca675..57b1143e97 100644
> --- a/util/qemu-thread-win32.c
> +++ b/util/qemu-thread-win32.c
> @@ -20,6 +20,7 @@
> #include "qemu/thread.h"
> #include "qemu/notify.h"
> #include "qemu-thread-common.h"
> +#include "qapi/error.h"
> #include <process.h>
>
> static bool name_threads;
> @@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
> return ret;
> }
>
> -void qemu_thread_create(QemuThread *thread, const char *name,
> - void *(*start_routine)(void *),
> - void *arg, int mode)
> +bool qemu_thread_create(QemuThread *thread, const char *name,
> + void *(*start_routine)(void *),
> + void *arg, int mode, Error **errp)
> {
> HANDLE hThread;
> struct QemuThreadData *data;
> @@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char *name,
> hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
> data, 0, &thread->tid);
> if (!hThread) {
> - error_exit(GetLastError(), __func__);
> + if (data->mode != QEMU_THREAD_DETACHED) {
> + DeleteCriticalSection(&data->cs);
> + }
> + error_setg_errno(errp, errno,
> + "failed to create win32_start_routine");
> + g_free(data);
> + return false;
> }
> CloseHandle(hThread);
> thread->data = data;
> + return true;
> }
>
> void qemu_thread_get_self(QemuThread *thread)
> diff --git a/util/rcu.c b/util/rcu.c
> index 5676c22bd1..145dcdb0c6 100644
> --- a/util/rcu.c
> +++ b/util/rcu.c
> @@ -32,6 +32,7 @@
> #include "qemu/atomic.h"
> #include "qemu/thread.h"
> #include "qemu/main-loop.h"
> +#include "qapi/error.h"
> #if defined(CONFIG_MALLOC_TRIM)
> #include <malloc.h>
> #endif
> @@ -325,7 +326,7 @@ static void rcu_init_complete(void)
> * must have been quiescent even after forking, just recreate it.
> */
> qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
> - NULL, QEMU_THREAD_DETACHED);
> + NULL, QEMU_THREAD_DETACHED, &error_abort);
>
> rcu_register_thread();
> }
> diff --git a/util/thread-pool.c b/util/thread-pool.c
> index 610646d131..ad0f980783 100644
> --- a/util/thread-pool.c
> +++ b/util/thread-pool.c
> @@ -22,6 +22,7 @@
> #include "trace.h"
> #include "block/thread-pool.h"
> #include "qemu/main-loop.h"
> +#include "qapi/error.h"
>
> static void do_spawn_thread(ThreadPool *pool);
>
> @@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
> pool->new_threads--;
> pool->pending_threads++;
>
> - qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
> + qemu_thread_create(&t, "worker", worker_thread, pool,
> + QEMU_THREAD_DETACHED, &error_abort);
> }
>
> static void spawn_thread_bh_fn(void *opaque)
On Thu, 13 Dec 2018 08:26:48 +0100
Markus Armbruster <armbru@redhat.com> wrote:
> There's a question for David Gibson inline. Please search for /ppc/.
>
> Fei Li <fli@suse.com> writes:
>
> > Make qemu_thread_create() return a Boolean to indicate if it succeeds
> > rather than failing with an error. And add an Error parameter to hold
> > the error message and let the callers handle it.
>
> The "rather than failing with an error" is misleading. Before the
> patch, we report to stderr and abort(). What about:
>
> qemu-thread: Make qemu_thread_create() handle errors properly
>
> qemu_thread_create() abort()s on error. Not nice. Give it a
> return value and an Error ** argument, so it can return success /
> failure.
>
> Still missing from the commit message then: how you update the callers.
> Let's see below.
[snip]
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> > sPAPRPendingHPT *pending = spapr->pending_hpt;
> > uint64_t current_ram_size;
> > int rc;
> > + Error *local_err = NULL;
> >
> > if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
> > return H_AUTHORITY;
> > @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
> > pending->shift = shift;
> > pending->ret = H_HARDWARE;
> >
> > - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
> > - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
> > + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
> > + hpt_prepare_thread, pending,
> > + QEMU_THREAD_DETACHED, &local_err)) {
> > + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
> > + g_free(pending);
> > + return H_RESOURCE;
> > + }
> >
> > spapr->pending_hpt = pending;
> >
>
> This is a caller that returns an error code on failure. You change it
> to report the error, then return failure. The return failure part looks
> fine. Whether reporting the error is appropriate I can't say for sure.
> No other failure mode reports anything. David, what do you think?
I think it's reasonable here. In this context error returns and
reported errors are for different audiences. The error returns are for
the guest, the reported errors are for the guest administrator or
management layers. This particularly failure is essentially a host
side fault that is mostly relevant to the VM management. We have to
say *something* to the guest to explain that the action couldn't go
forward and H_RESOURCE makes as much sense as anything.
--
David Gibson <dgibson@redhat.com>
Principal Software Engineer, Virtualization, Red Hat
David Gibson <dgibson@redhat.com> writes:
> On Thu, 13 Dec 2018 08:26:48 +0100
> Markus Armbruster <armbru@redhat.com> wrote:
>
>> There's a question for David Gibson inline. Please search for /ppc/.
>>
>> Fei Li <fli@suse.com> writes:
>>
>> > Make qemu_thread_create() return a Boolean to indicate if it succeeds
>> > rather than failing with an error. And add an Error parameter to hold
>> > the error message and let the callers handle it.
>>
>> The "rather than failing with an error" is misleading. Before the
>> patch, we report to stderr and abort(). What about:
>>
>> qemu-thread: Make qemu_thread_create() handle errors properly
>>
>> qemu_thread_create() abort()s on error. Not nice. Give it a
>> return value and an Error ** argument, so it can return success /
>> failure.
>>
>> Still missing from the commit message then: how you update the callers.
>> Let's see below.
>
> [snip]
>> > --- a/hw/ppc/spapr_hcall.c
>> > +++ b/hw/ppc/spapr_hcall.c
>> > @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>> > sPAPRPendingHPT *pending = spapr->pending_hpt;
>> > uint64_t current_ram_size;
>> > int rc;
>> > + Error *local_err = NULL;
>> >
>> > if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
>> > return H_AUTHORITY;
>> > @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>> > pending->shift = shift;
>> > pending->ret = H_HARDWARE;
>> >
>> > - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>> > - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
>> > + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>> > + hpt_prepare_thread, pending,
>> > + QEMU_THREAD_DETACHED, &local_err)) {
>> > + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
>> > + g_free(pending);
>> > + return H_RESOURCE;
>> > + }
>> >
>> > spapr->pending_hpt = pending;
>> >
>>
>> This is a caller that returns an error code on failure. You change it
>> to report the error, then return failure. The return failure part looks
>> fine. Whether reporting the error is appropriate I can't say for sure.
>> No other failure mode reports anything. David, what do you think?
>
> I think it's reasonable here. In this context error returns and
> reported errors are for different audiences. The error returns are for
> the guest, the reported errors are for the guest administrator or
> management layers. This particularly failure is essentially a host
> side fault that is mostly relevant to the VM management. We have to
> say *something* to the guest to explain that the action couldn't go
> forward and H_RESOURCE makes as much sense as anything.
Double-checking: is it okay to report some failures of this function
(one of two H_RESOURCE failures, to be precise), but not others?
On Wed, 19 Dec 2018 10:29:41 +0100 Markus Armbruster <armbru@redhat.com> wrote: > David Gibson <dgibson@redhat.com> writes: > > [...] > [...] > [...] > [...] > [...] > [...] > [...] > [...] > > Double-checking: is it okay to report some failures of this function > (one of two H_RESOURCE failures, to be precise), but not others? Yes. The distinction is whether the failure is likely to be of relevance to the *host* administrator, or just to the guest. Although.. come to think of it possibly that reported failure should be H_HARDWARE to the guest, rather than H_RESOURCE. -- David Gibson <dgibson@redhat.com> Principal Software Engineer, Virtualization, Red Hat
On 12/13/2018 03:26 PM, Markus Armbruster wrote:
> There's a question for David Gibson inline. Please search for /ppc/.
>
> Fei Li <fli@suse.com> writes:
>
>> Make qemu_thread_create() return a Boolean to indicate if it succeeds
>> rather than failing with an error. And add an Error parameter to hold
>> the error message and let the callers handle it.
> The "rather than failing with an error" is misleading. Before the
> patch, we report to stderr and abort(). What about:
>
> qemu-thread: Make qemu_thread_create() handle errors properly
>
> qemu_thread_create() abort()s on error. Not nice. Give it a
> return value and an Error ** argument, so it can return success /
> failure.
A nice commit-amend! Thanks!
> Still missing from the commit message then: how you update the callers.
Yes, agree. I think the-how should also be noted here, like
- propagating the err to callers whose call trace already have the Error
paramater;
- just add an &error_abort for qemu_thread_create() and make it a "TODO:
xxx";
> Let's see below.
>
>> Cc: Markus Armbruster <armbru@redhat.com>
>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Signed-off-by: Fei Li <fli@suse.com>
>> ---
>> cpus.c | 45 ++++++++++++++++++++++++-------------
>> dump.c | 6 +++--
>> hw/misc/edu.c | 6 +++--
>> hw/ppc/spapr_hcall.c | 10 +++++++--
>> hw/rdma/rdma_backend.c | 4 +++-
>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>> include/qemu/thread.h | 4 ++--
>> io/task.c | 3 ++-
>> iothread.c | 16 +++++++++-----
>> migration/migration.c | 54 +++++++++++++++++++++++++++++----------------
>> migration/postcopy-ram.c | 14 ++++++++++--
>> migration/ram.c | 40 ++++++++++++++++++++++++---------
>> migration/savevm.c | 11 ++++++---
>> tests/atomic_add-bench.c | 3 ++-
>> tests/iothread.c | 2 +-
>> tests/qht-bench.c | 3 ++-
>> tests/rcutorture.c | 3 ++-
>> tests/test-aio.c | 2 +-
>> tests/test-rcu-list.c | 3 ++-
>> ui/vnc-jobs.c | 17 +++++++++-----
>> ui/vnc-jobs.h | 2 +-
>> ui/vnc.c | 4 +++-
>> util/compatfd.c | 12 ++++++++--
>> util/oslib-posix.c | 17 ++++++++++----
>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>> util/qemu-thread-win32.c | 16 ++++++++++----
>> util/rcu.c | 3 ++-
>> util/thread-pool.c | 4 +++-
>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>
>> diff --git a/cpus.c b/cpus.c
>> index 7b091bda53..e8450e518a 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error **errp)
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
>> cpu->cpu_index);
>>
>> - qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name,
>> + qemu_tcg_cpu_thread_fn, cpu,
>> + QEMU_THREAD_JOINABLE, errp)) {
>> + return;
>> + }
>>
>> } else {
>> /* share a single thread for all cpus with TCG */
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
>> - qemu_thread_create(cpu->thread, thread_name,
>> - qemu_tcg_rr_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name,
>> + qemu_tcg_rr_cpu_thread_fn, cpu,
>> + QEMU_THREAD_JOINABLE, errp)) {
>> + return;
>> + }
>>
>> single_tcg_halt_cond = cpu->halt_cond;
>> single_tcg_cpu_thread = cpu->thread;
> This is a caller that sets an error on failure. You make it set an
> error on qemu_thread_create() failure. Makes sense.
Thanks for the comment!
>> @@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error **errp)
>>
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
>> cpu->cpu_index);
>> - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>> + return;
>> + }
>> #ifdef _WIN32
>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>> #endif
> Likewise. I'll stop commenting on this pattern now.
>
>> @@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error **errp)
>> qemu_cond_init(cpu->halt_cond);
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
>> cpu->cpu_index);
>> - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>> + /* keep 'if' here in case there is further error handling logic */
>> + }
>> }
>>
>> static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>> @@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>>
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
>> cpu->cpu_index);
>> - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>> + /* keep 'if' here in case there is further error handling logic */
>> + }
>> }
>>
>> static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>> @@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>> qemu_cond_init(cpu->halt_cond);
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
>> cpu->cpu_index);
>> - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>> - cpu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>> + return;
>> + }
>> #ifdef _WIN32
>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>> #endif
>> @@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error **errp)
>> qemu_cond_init(cpu->halt_cond);
>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
>> cpu->cpu_index);
>> - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>> + /* keep 'if' here in case there is further error handling logic */
>> + }
>> }
>>
>> bool qemu_init_vcpu(CPUState *cpu, Error **errp)
>> diff --git a/dump.c b/dump.c
>> index 4ec94c5e25..1f003aff9a 100644
>> --- a/dump.c
>> +++ b/dump.c
>> @@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
>> if (detach_p) {
>> /* detached dump */
>> s->detached = true;
>> - qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>> - s, QEMU_THREAD_DETACHED);
>> + if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>> + s, QEMU_THREAD_DETACHED, errp)) {
>> + /* keep 'if' here in case there is further error handling logic */
>> + }
>> } else {
>> /* sync dump */
>> dump_process(s, errp);
>> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
>> index cdcf550dd7..6684c60a96 100644
>> --- a/hw/misc/edu.c
>> +++ b/hw/misc/edu.c
>> @@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>>
>> qemu_mutex_init(&edu->thr_mutex);
>> qemu_cond_init(&edu->thr_cond);
>> - qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>> - edu, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>> + edu, QEMU_THREAD_JOINABLE, errp)) {
>> + return;
>> + }
>>
>> memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
>> "edu-mmio", 1 * MiB);
>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>> index ae913d070f..7c16ade04a 100644
>> --- a/hw/ppc/spapr_hcall.c
>> +++ b/hw/ppc/spapr_hcall.c
>> @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>> sPAPRPendingHPT *pending = spapr->pending_hpt;
>> uint64_t current_ram_size;
>> int rc;
>> + Error *local_err = NULL;
>>
>> if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
>> return H_AUTHORITY;
>> @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>> pending->shift = shift;
>> pending->ret = H_HARDWARE;
>>
>> - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>> - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
>> + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>> + hpt_prepare_thread, pending,
>> + QEMU_THREAD_DETACHED, &local_err)) {
>> + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
>> + g_free(pending);
>> + return H_RESOURCE;
>> + }
>>
>> spapr->pending_hpt = pending;
>>
> This is a caller that returns an error code on failure. You change it
> to report the error, then return failure. The return failure part looks
> fine. Whether reporting the error is appropriate I can't say for sure.
> No other failure mode reports anything. David, what do you think?
Just as David explains. :)
> Fei Li, you could pass &error_abort to side-step this question for now.
>
>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>> index d7a4bbd91f..53a2bd0d85 100644
>> --- a/hw/rdma/rdma_backend.c
>> +++ b/hw/rdma/rdma_backend.c
>> @@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
>> snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>> ibv_get_device_name(backend_dev->ib_dev));
>> backend_dev->comp_thread.run = true;
>> + /* FIXME: let the further caller handle the error instead of abort() here */
>> qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>> - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>> + comp_handler_thread, backend_dev,
>> + QEMU_THREAD_DETACHED, &error_abort);
>> }
>>
> This is a caller that can't return failure. You pass &error_abort. No
> behavioral change.
Actually, yes..The reason why I did not do some change is that I am not
quite
sure about how to fix for the rdma device, esp. setting certain value
for the
dev->regs_data[idx] when it fails.
> I think I'd mark the spot TODO, not FIXME. Matter of taste, I guess.
Sounds good, thanks!
>> void rdma_backend_register_comp_handler(void (*handler)(int status,
>> diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
>> index 25976ed84f..c6783f124a 100644
>> --- a/hw/usb/ccid-card-emulated.c
>> +++ b/hw/usb/ccid-card-emulated.c
>> @@ -33,6 +33,7 @@
>> #include "qemu/main-loop.h"
>> #include "ccid.h"
>> #include "qapi/error.h"
>> +#include "qemu/error-report.h"
>>
>> #define DPRINTF(card, lvl, fmt, ...) \
>> do {\
>> @@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
>> error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
>> goto out2;
>> }
>> - qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>> - card, QEMU_THREAD_JOINABLE);
>> - qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
>> - card, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>> + card, QEMU_THREAD_JOINABLE, errp)) {
>> + error_report("failed to create event_thread");
>> + goto out2;
>> + }
>> + if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
>> + handle_apdu_thread, card,
>> + QEMU_THREAD_JOINABLE, errp)) {
>> + error_report("failed to create handle_apdu_thread");
>> + goto out2;
>> + }
>>
>> out2:
>> clean_event_notifier(card);
> error_report() in a realize() method is almost certainly wrong.
Ok, I will remove these two.
>> diff --git a/include/qemu/thread.h b/include/qemu/thread.h
>> index 55d83a907c..12291f4ccd 100644
>> --- a/include/qemu/thread.h
>> +++ b/include/qemu/thread.h
>> @@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
>> void qemu_event_wait(QemuEvent *ev);
>> void qemu_event_destroy(QemuEvent *ev);
>>
>> -void qemu_thread_create(QemuThread *thread, const char *name,
>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>> void *(*start_routine)(void *),
>> - void *arg, int mode);
>> + void *arg, int mode, Error **errp);
>> void *qemu_thread_join(QemuThread *thread);
>> void qemu_thread_get_self(QemuThread *thread);
>> bool qemu_thread_is_self(QemuThread *thread);
>> diff --git a/io/task.c b/io/task.c
>> index 2886a2c1bc..6d3a18ab80 100644
>> --- a/io/task.c
>> +++ b/io/task.c
>> @@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
>> "io-task-worker",
>> qio_task_thread_worker,
>> data,
>> - QEMU_THREAD_DETACHED);
>> + QEMU_THREAD_DETACHED,
>> + &error_abort);
>> }
>>
>>
> This is a caller that can't return failure. You pass &error_abort. No
> behavioral change. Unlike above, you don't mark this spot FIXME. Any
> particular reason for marking one, but not the other?
Emm, it is a little difficult to add a Error parameter for its callers and
the callers seem does not need the Error. Thus I think passing
&error_abort in this function instead of its further callers is more
direct. :)
The same reasons for the several below.
But just as you mentioned, maybe we should add a "TODO: xxxx" for the direct
&error_abort case in case the callers need the Error parameter in future.
> I'll stop commenting on this pattern now.
>
>> diff --git a/iothread.c b/iothread.c
>> index 2fb1cdf55d..7335dacf0b 100644
>> --- a/iothread.c
>> +++ b/iothread.c
>> @@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>> &local_error);
>> if (local_error) {
>> error_propagate(errp, local_error);
>> - aio_context_unref(iothread->ctx);
>> - iothread->ctx = NULL;
>> - return;
>> + goto fail;
>> }
>>
>> qemu_mutex_init(&iothread->init_done_lock);
>> @@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>> */
>> name = object_get_canonical_path_component(OBJECT(obj));
>> thread_name = g_strdup_printf("IO %s", name);
>> - qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>> - iothread, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>> + iothread, QEMU_THREAD_JOINABLE, errp)) {
>> + g_free(thread_name);
>> + g_free(name);
>> + goto fail;
>> + }
>> g_free(thread_name);
>> g_free(name);
>>
>> @@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>> &iothread->init_done_lock);
>> }
>> qemu_mutex_unlock(&iothread->init_done_lock);
>> + return;
>> +fail:
>> + aio_context_unref(iothread->ctx);
>> + iothread->ctx = NULL;
>> }
>>
>> typedef struct {
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 0537fc0c26..af6c72ac5d 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
>> /* Make sure all file formats flush their mutable metadata */
>> bdrv_invalidate_cache_all(&local_err);
>> if (local_err) {
>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> - MIGRATION_STATUS_FAILED);
>> error_report_err(local_err);
>> - exit(EXIT_FAILURE);
>> + goto fail;
>> }
>>
>> if (colo_init_ram_cache() < 0) {
>> error_report("Init ram cache failed");
>> - exit(EXIT_FAILURE);
>> + goto fail;
>> }
>>
>> - qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>> - colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>> + colo_process_incoming_thread, mis,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err, "failed to create "
>> + "colo_process_incoming_thread: ");
>> + goto fail;
>> + }
>> mis->have_colo_incoming_thread = true;
>> qemu_coroutine_yield();
>>
>> @@ -461,20 +464,22 @@ static void process_incoming_migration_co(void *opaque)
>> }
>>
>> if (ret < 0) {
>> - Error *local_err = NULL;
>> -
>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> - MIGRATION_STATUS_FAILED);
>> error_report("load of migration failed: %s", strerror(-ret));
>> - qemu_fclose(mis->from_src_file);
>> - if (multifd_load_cleanup(&local_err) != 0) {
>> - error_report_err(local_err);
>> - }
>> - exit(EXIT_FAILURE);
>> + goto fail;
>> }
>> mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
>> qemu_bh_schedule(mis->bh);
>> mis->migration_incoming_co = NULL;
>> + return;
>> +fail:
>> + local_err = NULL;
>> + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> + MIGRATION_STATUS_FAILED);
>> + qemu_fclose(mis->from_src_file);
>> + if (multifd_load_cleanup(&local_err) != 0) {
>> + error_report_err(local_err);
>> + }
>> + exit(EXIT_FAILURE);
>> }
> You change handling of errors other than qemu_thread_create(). Separate
> patch, please. I'd put it before this one.
Ok, thanks for the reminder. Will update in the next version.
>>
>> static void migration_incoming_setup(QEMUFile *f)
>> @@ -2345,6 +2350,7 @@ out:
>> static int open_return_path_on_source(MigrationState *ms,
>> bool create_thread)
>> {
>> + Error *local_err = NULL;
>>
>> ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
>> if (!ms->rp_state.from_dst_file) {
>> @@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
>> return 0;
>> }
>>
>> - qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>> - source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>> + source_return_path_thread, ms,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err,
>> + "failed to create source_return_path_thread: ");
>> + return -1;
>> + }
>>
>> trace_open_return_path_on_source_continue();
>>
> This is a caller that returns an error code on failure. You change it
> to report the error, then return failure. This is okay, because its
> sole caller also reports errors that way.
Thanks.
>> @@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>> migrate_fd_cleanup(s);
>> return;
>> }
>> - qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
>> + s, QEMU_THREAD_JOINABLE, &error_in)) {
>> + error_reportf_err(error_in, "failed to create migration_thread: ");
>> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>> + migrate_fd_cleanup(s);
>> + return;
>> + }
>> s->migration_thread_running = true;
>> }
> This is a caller that reports errors. You make it handle
> qemu_thread_create() the same way. Good.
Thanks!
>>
>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>> index fa09dba534..80bfa9c4a2 100644
>> --- a/migration/postcopy-ram.c
>> +++ b/migration/postcopy-ram.c
>> @@ -1083,6 +1083,8 @@ retry:
>>
>> int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>> {
>> + Error *local_err = NULL;
>> +
>> /* Open the fd for the kernel to give us userfaults */
>> mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>> if (mis->userfault_fd == -1) {
>> @@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>> }
>>
>> qemu_sem_init(&mis->fault_thread_sem, 0);
>> - qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>> - postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>> + postcopy_ram_fault_thread, mis,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err,
>> + "failed to create postcopy_ram_fault_thread: ");
>> + close(mis->userfault_event_fd);
>> + close(mis->userfault_fd);
>> + qemu_sem_destroy(&mis->fault_thread_sem);
>> + return -1;
>> + }
>> qemu_sem_wait(&mis->fault_thread_sem);
>> qemu_sem_destroy(&mis->fault_thread_sem);
>> mis->have_fault_thread = true;
> This is a caller that reports errors, then returns failure. You make it
> handle qemu_thread_create() the same way. Good.
>
> Not related to this patch, just spotted while reviewing it:
>
> /* Mark so that we get notified of accesses to unwritten areas */
> if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
> error_report("ram_block_enable_notify failed");
> return -1;
> }
>
> Do we leak mis->userfault_fd, mis->userfault_event_fd,
> mis->fault_thread_sem here?
Actually the patch 5/7 fixes this: we leave the cleanup() handling to
postcopy_ram_incoming_cleanup() when failing to notify here.
Looking back to the history, I falsely did close(these_fds) just here but
David corrected me, and the following is quoted from his earlier comment:
"
I don't think these close() calls are safe. This code is just after
starting the fault thread, and the fault thread has a poll() call on
these fd's, so we can't close them until we've instructed that thread
to exit.
We should fall out through postcopy_ram_incoming_cleanup, and because
the thread exists it should do a notify to the thread, a join and then
only later do the close calls.
"
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 658dfa88a3..6e0cccf066 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>> static int compress_threads_save_setup(void)
>> {
>> int i, thread_count;
>> + Error *local_err = NULL;
>>
>> if (!migrate_use_compression()) {
>> return 0;
>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>> comp_param[i].quit = false;
>> qemu_mutex_init(&comp_param[i].mutex);
>> qemu_cond_init(&comp_param[i].cond);
>> - qemu_thread_create(compress_threads + i, "compress",
>> - do_data_compress, comp_param + i,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(compress_threads + i, "compress",
>> + do_data_compress, comp_param + i,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err, "failed to create do_data_compress: ");
>> + goto exit;
>> + }
>> }
>> return 0;
>>
> Reviewing the migration changes is getting tiresome...
Yes, indeed, the migration involves a lot! Thanks so much for helping to
review!
> Is reporting the
> error appropriate here, and why?
I think the qemu monitor should display the obvious and exact failing
reason for administrators, esp considering that qemu_thread_create()
itself does not print any message thus we have no idea which direct
function fails if gdb is not enabled.
IOW, I think David's answer to that ppc's error_reportf_err() also apply
here:
"The error returns are for the guest, the reported errors are for the
guest administrator or management layers."
>
>> @@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>> p->c = QIO_CHANNEL(sioc);
>> qio_channel_set_delay(p->c, false);
>> p->running = true;
>> - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + migrate_set_error(migrate_get_current(), local_err);
>> + error_reportf_err(local_err,
>> + "failed to create multifd_send_thread: ");
>> + multifd_save_cleanup();
>> + return;
>> + }
>>
>> atomic_inc(&multifd_send_state->count);
>> }
> Same question.
>
>> @@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>> p->num_packets = 1;
>>
>> p->running = true;
>> - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
>> + p, QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_propagate_prepend(errp, local_err,
>> + "failed to create multifd_recv_thread: ");
>> + multifd_recv_terminate_threads(local_err);
>> + return false;
>> + }
>> atomic_inc(&multifd_recv_state->count);
>> return atomic_read(&multifd_recv_state->count) ==
>> migrate_multifd_channels();
>> @@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
>> static int compress_threads_load_setup(QEMUFile *f)
>> {
>> int i, thread_count;
>> + Error *local_err = NULL;
>>
>> if (!migrate_use_compression()) {
>> return 0;
>> @@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>> qemu_cond_init(&decomp_param[i].cond);
>> decomp_param[i].done = true;
>> decomp_param[i].quit = false;
>> - qemu_thread_create(decompress_threads + i, "decompress",
>> - do_data_decompress, decomp_param + i,
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(decompress_threads + i, "decompress",
>> + do_data_decompress, decomp_param + i,
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err,
>> + "failed to create do_data_decompress: ");
>> + goto exit;
>> + }
>> }
>> return 0;
>> exit:
> Same question.
>
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index d784e8aa40..b8bdcde5d8 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -1747,9 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>> mis->have_listen_thread = true;
>> /* Start up the listening thread and wait for it to signal ready */
>> qemu_sem_init(&mis->listen_thread_sem, 0);
>> - qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>> - postcopy_ram_listen_thread, NULL,
>> - QEMU_THREAD_DETACHED);
>> + if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>> + postcopy_ram_listen_thread, NULL,
>> + QEMU_THREAD_DETACHED, &local_err)) {
>> + error_reportf_err(local_err,
>> + "failed to create postcopy_ram_listen_thread: ");
>> + qemu_sem_destroy(&mis->listen_thread_sem);
>> + return -1;
>> + }
>> qemu_sem_wait(&mis->listen_thread_sem);
>> qemu_sem_destroy(&mis->listen_thread_sem);
>>
> This is a caller that reports errors, then returns failure. You make it
> handle qemu_thread_create() the same way. Good.
>
> I'll stop commenting on this pattern now.
Thanks.
>> diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
>> index 2f6c72f63a..338b9563e3 100644
>> --- a/tests/atomic_add-bench.c
>> +++ b/tests/atomic_add-bench.c
>> @@ -2,6 +2,7 @@
>> #include "qemu/thread.h"
>> #include "qemu/host-utils.h"
>> #include "qemu/processor.h"
>> +#include "qapi/error.h"
>>
>> struct thread_info {
>> uint64_t r;
>> @@ -110,7 +111,7 @@ static void create_threads(void)
>>
>> info->r = (i + 1) ^ time(NULL);
>> qemu_thread_create(&threads[i], NULL, thread_func, info,
>> - QEMU_THREAD_JOINABLE);
>> + QEMU_THREAD_JOINABLE, &error_abort);
>> }
>> }
... snip for all tests/xxx.c as all the passed parameter is &error_abort ...
>>
>> diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
>> index 929391f85d..35a652d1fd 100644
>> --- a/ui/vnc-jobs.c
>> +++ b/ui/vnc-jobs.c
>> @@ -31,6 +31,7 @@
>> #include "vnc-jobs.h"
>> #include "qemu/sockets.h"
>> #include "qemu/main-loop.h"
>> +#include "qapi/error.h"
>> #include "block/aio.h"
>>
>> /*
>> @@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
>> return queue; /* Check global queue */
>> }
>>
>> -void vnc_start_worker_thread(void)
>> +bool vnc_start_worker_thread(Error **errp)
>> {
>> VncJobQueue *q;
>>
>> - if (vnc_worker_thread_running())
>> - return ;
>> + if (vnc_worker_thread_running()) {
>> + goto out;
>> + }
>>
>> q = vnc_queue_init();
>> - qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
>> - QEMU_THREAD_DETACHED);
>> + if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
>> + q, QEMU_THREAD_DETACHED, errp)) {
>> + vnc_queue_clear(q);
>> + return false;
>> + }
>> queue = q; /* Set global queue */
>> +out:
>> + return true;
>> }
> I recommend to pass &error_abort to qemu_thread_create() in this patch,
> then convert vnc_start_worker_thread() to Error in a subsequent patch.
Ok, thanks! This makes this patch shorter. :)
BTW, would it be better by adding a "TODO: xxx" comment before the
&error_abort in this patch, and remove it in the subsequent patch?
If it is ok, I will do the same adding for the latter touch_all_pages().
>> diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
>> index 59f66bcc35..14640593db 100644
>> --- a/ui/vnc-jobs.h
>> +++ b/ui/vnc-jobs.h
>> @@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
>> void vnc_jobs_join(VncState *vs);
>>
>> void vnc_jobs_consume_buffer(VncState *vs);
>> -void vnc_start_worker_thread(void);
>> +bool vnc_start_worker_thread(Error **errp);
>>
>> /* Locks */
>> static inline int vnc_trylock_display(VncDisplay *vd)
>> diff --git a/ui/vnc.c b/ui/vnc.c
>> index 0c1b477425..0ffe9e6a5d 100644
>> --- a/ui/vnc.c
>> +++ b/ui/vnc.c
>> @@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
>> vd->connections_limit = 32;
>>
>> qemu_mutex_init(&vd->mutex);
>> - vnc_start_worker_thread();
>> + if (!vnc_start_worker_thread(errp)) {
>> + return;
>> + }
>>
>> vd->dcl.ops = &dcl_ops;
>> register_displaychangelistener(&vd->dcl);
> These two hunks then also go into the subsequent patch.
Ok.
>> diff --git a/util/compatfd.c b/util/compatfd.c
>> index 980bd33e52..886aa249f9 100644
>> --- a/util/compatfd.c
>> +++ b/util/compatfd.c
>> @@ -16,6 +16,7 @@
>> #include "qemu/osdep.h"
>> #include "qemu-common.h"
>> #include "qemu/thread.h"
>> +#include "qapi/error.h"
>>
>> #include <sys/syscall.h>
>>
>> @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>> struct sigfd_compat_info *info;
>> QemuThread thread;
>> int fds[2];
>> + Error *local_err = NULL;
>>
>> info = malloc(sizeof(*info));
>> if (info == NULL) {
>> @@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>> memcpy(&info->mask, mask, sizeof(*mask));
>> info->fd = fds[1];
>>
>> - qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
>> - QEMU_THREAD_DETACHED);
>> + if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
>> + info, QEMU_THREAD_DETACHED, &local_err)) {
>> + error_reportf_err(local_err, "failed to create sigwait_compat: ");
>> + close(fds[0]);
>> + close(fds[1]);
>> + free(info);
>> + return -1;
>> + }
>>
>> return fds[0];
>> }
> This function is implements signalfd() when the kernel doesn't provide
> it.
>
> signalfd() sets errno on failure. The replacement's existing failure
> modes set errno. You add a failure mode that doesn't set errno. That's
> a bug. To fix it, you can either make qemu_thread_create() set errno,
> or you can make it return a value you can use to set errno. The common
> way to do the latter is returning a *negated* errno value.
Oops, I forgot setting the errno for Linux implementation! My fault..
I will set errno inside qemu_thread_create() as follows:
err = pthread_attr_init(&attr);
if (err) {
- error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
- strerror(err));
+ errno = err;
+ error_setg_errno(errp, errno, "pthread_attr_init failed");
return false;
}
> signalfd() doesn't print anything on failure. The replacement's
> existing failure modes don't print anything. You add a failure mode
> that does print. I think it shouldn't.
Ok, I will remove it. Thanks!
>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>> index c1bee2a581..2c779fd634 100644
>> --- a/util/oslib-posix.c
>> +++ b/util/oslib-posix.c
>> @@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>> size_t size_per_thread;
>> char *addr = area;
>> int i = 0;
>> + int started_thread = 0;
>> + Error *local_err = NULL;
>>
>> memset_thread_failed = false;
>> memset_num_threads = get_memset_num_threads(smp_cpus);
>> + started_thread = memset_num_threads;
>> memset_thread = g_new0(MemsetThread, memset_num_threads);
>> numpages_per_thread = (numpages / memset_num_threads);
>> size_per_thread = (hpagesize * numpages_per_thread);
>> @@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>> memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
>> numpages : numpages_per_thread;
>> memset_thread[i].hpagesize = hpagesize;
>> - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>> - do_touch_pages, &memset_thread[i],
>> - QEMU_THREAD_JOINABLE);
>> + if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>> + do_touch_pages, &memset_thread[i],
>> + QEMU_THREAD_JOINABLE, &local_err)) {
>> + error_reportf_err(local_err, "failed to create do_touch_pages: ");
>> + memset_thread_failed = true;
>> + started_thread = i;
>> + goto out;
>> + }
>> addr += size_per_thread;
>> numpages -= numpages_per_thread;
>> }
>> - for (i = 0; i < memset_num_threads; i++) {
>> +out:
>> + for (i = 0; i < started_thread; i++) {
>> qemu_thread_join(&memset_thread[i].pgthread);
>> }
>> g_free(memset_thread);
> You need to convert this function to Error instead, because its caller
> os_mem_prealloc() sets an error on failure. I recommend to pass
> &error_abort in this patch, and convert to Error in a subsequent patch.
Ok, thanks for the advice.
>> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
>> index 865e476df5..81b40a1ece 100644
>> --- a/util/qemu-thread-posix.c
>> +++ b/util/qemu-thread-posix.c
>> @@ -15,6 +15,7 @@
>> #include "qemu/atomic.h"
>> #include "qemu/notify.h"
>> #include "qemu-thread-common.h"
>> +#include "qapi/error.h"
>>
>> static bool name_threads;
>>
>> @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
>> return r;
>> }
>>
>> -void qemu_thread_create(QemuThread *thread, const char *name,
>> - void *(*start_routine)(void*),
>> - void *arg, int mode)
>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>> + void *(*start_routine)(void *),
>> + void *arg, int mode, Error **errp)
>> {
>> sigset_t set, oldset;
>> int err;
>> @@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>
>> err = pthread_attr_init(&attr);
>> if (err) {
>> - error_exit(err, __func__);
>> + error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>> + strerror(err));
>> + return false;
>> }
>>
>> if (mode == QEMU_THREAD_DETACHED) {
>> @@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>> qemu_thread_args->name = g_strdup(name);
>> qemu_thread_args->start_routine = start_routine;
>> qemu_thread_args->arg = arg;
>> -
> Let's keep the blank line.
ok.
Thanks so much for the review! Have a nice day. :)
Fei
>> err = pthread_create(&thread->thread, &attr,
>> qemu_thread_start, qemu_thread_args);
>> -
>> - if (err)
>> - error_exit(err, __func__);
>> + if (err) {
>> + error_setg_errno(errp, -err, "pthread_create failed: %s",
>> + strerror(err));
>> + pthread_attr_destroy(&attr);
>> + g_free(qemu_thread_args->name);
>> + g_free(qemu_thread_args);
>> + return false;
>> + }
>>
>> pthread_sigmask(SIG_SETMASK, &oldset, NULL);
>>
>> pthread_attr_destroy(&attr);
>> + return true;
>> }
>>
>> void qemu_thread_get_self(QemuThread *thread)
>> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>> index 4a363ca675..57b1143e97 100644
>> --- a/util/qemu-thread-win32.c
>> +++ b/util/qemu-thread-win32.c
>> @@ -20,6 +20,7 @@
>> #include "qemu/thread.h"
>> #include "qemu/notify.h"
>> #include "qemu-thread-common.h"
>> +#include "qapi/error.h"
>> #include <process.h>
>>
>> static bool name_threads;
>> @@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
>> return ret;
>> }
>>
>> -void qemu_thread_create(QemuThread *thread, const char *name,
>> - void *(*start_routine)(void *),
>> - void *arg, int mode)
>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>> + void *(*start_routine)(void *),
>> + void *arg, int mode, Error **errp)
>> {
>> HANDLE hThread;
>> struct QemuThreadData *data;
>> @@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>> hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
>> data, 0, &thread->tid);
>> if (!hThread) {
>> - error_exit(GetLastError(), __func__);
>> + if (data->mode != QEMU_THREAD_DETACHED) {
>> + DeleteCriticalSection(&data->cs);
>> + }
>> + error_setg_errno(errp, errno,
>> + "failed to create win32_start_routine");
>> + g_free(data);
>> + return false;
>> }
>> CloseHandle(hThread);
>> thread->data = data;
>> + return true;
>> }
>>
>> void qemu_thread_get_self(QemuThread *thread)
>> diff --git a/util/rcu.c b/util/rcu.c
>> index 5676c22bd1..145dcdb0c6 100644
>> --- a/util/rcu.c
>> +++ b/util/rcu.c
>> @@ -32,6 +32,7 @@
>> #include "qemu/atomic.h"
>> #include "qemu/thread.h"
>> #include "qemu/main-loop.h"
>> +#include "qapi/error.h"
>> #if defined(CONFIG_MALLOC_TRIM)
>> #include <malloc.h>
>> #endif
>> @@ -325,7 +326,7 @@ static void rcu_init_complete(void)
>> * must have been quiescent even after forking, just recreate it.
>> */
>> qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
>> - NULL, QEMU_THREAD_DETACHED);
>> + NULL, QEMU_THREAD_DETACHED, &error_abort);
>>
>> rcu_register_thread();
>> }
>> diff --git a/util/thread-pool.c b/util/thread-pool.c
>> index 610646d131..ad0f980783 100644
>> --- a/util/thread-pool.c
>> +++ b/util/thread-pool.c
>> @@ -22,6 +22,7 @@
>> #include "trace.h"
>> #include "block/thread-pool.h"
>> #include "qemu/main-loop.h"
>> +#include "qapi/error.h"
>>
>> static void do_spawn_thread(ThreadPool *pool);
>>
>> @@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
>> pool->new_threads--;
>> pool->pending_threads++;
>>
>> - qemu_thread_create(&t, "worker", worker_thread, pool, QEMU_THREAD_DETACHED);
>> + qemu_thread_create(&t, "worker", worker_thread, pool,
>> + QEMU_THREAD_DETACHED, &error_abort);
>> }
>>
>> static void spawn_thread_bh_fn(void *opaque)
>
On 12/17/2018 03:29 PM, Fei Li wrote: > > > On 12/13/2018 03:26 PM, Markus Armbruster wrote: >> There's a question for David Gibson inline. Please search for /ppc/. >> >> Fei Li <fli@suse.com> writes: >> >>> Make qemu_thread_create() return a Boolean to indicate if it succeeds >>> rather than failing with an error. And add an Error parameter to hold >>> the error message and let the callers handle it. >> The "rather than failing with an error" is misleading. Before the >> patch, we report to stderr and abort(). What about: >> >> qemu-thread: Make qemu_thread_create() handle errors properly >> >> qemu_thread_create() abort()s on error. Not nice. Give it a >> return value and an Error ** argument, so it can return success / >> failure. > A nice commit-amend! Thanks! >> Still missing from the commit message then: how you update the callers. > Yes, agree. I think the-how should also be noted here, like > - propagating the err to callers whose call trace already have the > Error paramater; > - just add an &error_abort for qemu_thread_create() and make it a > "TODO: xxx"; >> Let's see below. According to your below comment and suggestion, I make a summary for the second paragraph for the commit message, please help to review, thanks. :) /* ...The first paragraph and the middle blank... */ And let's update qemu_thread_create()'s callers by - setting an error on qemu_thread_create() failure for callers that set an error on failure; - reporting the error and returning failure for callers that return an error code on failure; - reporting the error and setting some state for callers that just report errors and choose not to continue on. - passing &error_abort for qemu_thread_create() for callers that can't return failure, and marking a "TODO: " for further change. Have a nice day Fei
Fei Li <fli@suse.com> writes: > On 12/17/2018 03:29 PM, Fei Li wrote: >> >> >> On 12/13/2018 03:26 PM, Markus Armbruster wrote: >>> There's a question for David Gibson inline. Please search for /ppc/. >>> >>> Fei Li <fli@suse.com> writes: >>> >>>> Make qemu_thread_create() return a Boolean to indicate if it succeeds >>>> rather than failing with an error. And add an Error parameter to hold >>>> the error message and let the callers handle it. >>> The "rather than failing with an error" is misleading. Before the >>> patch, we report to stderr and abort(). What about: >>> >>> qemu-thread: Make qemu_thread_create() handle errors properly >>> >>> qemu_thread_create() abort()s on error. Not nice. Give it a >>> return value and an Error ** argument, so it can return success / >>> failure. >> A nice commit-amend! Thanks! >>> Still missing from the commit message then: how you update the callers. >> Yes, agree. I think the-how should also be noted here, like >> - propagating the err to callers whose call trace already have the >> Error paramater; >> - just add an &error_abort for qemu_thread_create() and make it a >> "TODO: xxx"; >>> Let's see below. > According to your below comment and suggestion, I make a summary for > the second paragraph for the commit message, please help to review, > thanks. :) > > /* ...The first paragraph and the middle blank... */ > And let's update qemu_thread_create()'s callers by > - setting an error on qemu_thread_create() failure for callers that > set an error on failure; > - reporting the error and returning failure for callers that return > an error code on failure; > - reporting the error and setting some state for callers that just > report errors and choose not to continue on. > - passing &error_abort for qemu_thread_create() for callers that > can't return failure, and marking a "TODO: " for further change. > > Have a nice day > Fei If you split the patch so that the first part makes all callers pass &error_abort, the first part's commit message becomes much simpler, and the subsequent parts' commit messages should be pretty simple to write, too.
Fei Li <fli@suse.com> writes:
> On 12/13/2018 03:26 PM, Markus Armbruster wrote:
>> There's a question for David Gibson inline. Please search for /ppc/.
>>
>> Fei Li <fli@suse.com> writes:
>>
>>> Make qemu_thread_create() return a Boolean to indicate if it succeeds
>>> rather than failing with an error. And add an Error parameter to hold
>>> the error message and let the callers handle it.
>> The "rather than failing with an error" is misleading. Before the
>> patch, we report to stderr and abort(). What about:
>>
>> qemu-thread: Make qemu_thread_create() handle errors properly
>>
>> qemu_thread_create() abort()s on error. Not nice. Give it a
>> return value and an Error ** argument, so it can return success /
>> failure.
> A nice commit-amend! Thanks!
>> Still missing from the commit message then: how you update the callers.
> Yes, agree. I think the-how should also be noted here, like
> - propagating the err to callers whose call trace already have the
> Error paramater;
> - just add an &error_abort for qemu_thread_create() and make it a
> "TODO: xxx";
>> Let's see below.
>>
>>> Cc: Markus Armbruster <armbru@redhat.com>
>>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>> Signed-off-by: Fei Li <fli@suse.com>
>>> ---
>>> cpus.c | 45 ++++++++++++++++++++++++-------------
>>> dump.c | 6 +++--
>>> hw/misc/edu.c | 6 +++--
>>> hw/ppc/spapr_hcall.c | 10 +++++++--
>>> hw/rdma/rdma_backend.c | 4 +++-
>>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>>> include/qemu/thread.h | 4 ++--
>>> io/task.c | 3 ++-
>>> iothread.c | 16 +++++++++-----
>>> migration/migration.c | 54 +++++++++++++++++++++++++++++----------------
>>> migration/postcopy-ram.c | 14 ++++++++++--
>>> migration/ram.c | 40 ++++++++++++++++++++++++---------
>>> migration/savevm.c | 11 ++++++---
>>> tests/atomic_add-bench.c | 3 ++-
>>> tests/iothread.c | 2 +-
>>> tests/qht-bench.c | 3 ++-
>>> tests/rcutorture.c | 3 ++-
>>> tests/test-aio.c | 2 +-
>>> tests/test-rcu-list.c | 3 ++-
>>> ui/vnc-jobs.c | 17 +++++++++-----
>>> ui/vnc-jobs.h | 2 +-
>>> ui/vnc.c | 4 +++-
>>> util/compatfd.c | 12 ++++++++--
>>> util/oslib-posix.c | 17 ++++++++++----
>>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>>> util/qemu-thread-win32.c | 16 ++++++++++----
>>> util/rcu.c | 3 ++-
>>> util/thread-pool.c | 4 +++-
>>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>>
>>> diff --git a/cpus.c b/cpus.c
>>> index 7b091bda53..e8450e518a 100644
>>> --- a/cpus.c
>>> +++ b/cpus.c
>>> @@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error **errp)
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name,
>>> qemu_tcg_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name,
>>> + qemu_tcg_cpu_thread_fn, cpu,
>>> + QEMU_THREAD_JOINABLE, errp)) {
>>> + return;
>>> + }
>>> } else {
>>> /* share a single thread for all cpus with TCG */
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
>>> - qemu_thread_create(cpu->thread, thread_name,
>>> - qemu_tcg_rr_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name,
>>> + qemu_tcg_rr_cpu_thread_fn, cpu,
>>> + QEMU_THREAD_JOINABLE, errp)) {
>>> + return;
>>> + }
>>> single_tcg_halt_cond = cpu->halt_cond;
>>> single_tcg_cpu_thread = cpu->thread;
>> This is a caller that sets an error on failure. You make it set an
>> error on qemu_thread_create() failure. Makes sense.
> Thanks for the comment!
>>> @@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error **errp)
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>> + return;
>>> + }
>>> #ifdef _WIN32
>>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>>> #endif
>> Likewise. I'll stop commenting on this pattern now.
>>
>>> @@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error **errp)
>>> qemu_cond_init(cpu->halt_cond);
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>> + /* keep 'if' here in case there is further error handling logic */
>>> + }
>>> }
>>> static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>>> @@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>> + /* keep 'if' here in case there is further error handling logic */
>>> + }
>>> }
>>> static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>>> @@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>>> qemu_cond_init(cpu->halt_cond);
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>>> - cpu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>> + return;
>>> + }
>>> #ifdef _WIN32
>>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>>> #endif
>>> @@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error **errp)
>>> qemu_cond_init(cpu->halt_cond);
>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
>>> cpu->cpu_index);
>>> - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>> + /* keep 'if' here in case there is further error handling logic */
>>> + }
>>> }
>>> bool qemu_init_vcpu(CPUState *cpu, Error **errp)
>>> diff --git a/dump.c b/dump.c
>>> index 4ec94c5e25..1f003aff9a 100644
>>> --- a/dump.c
>>> +++ b/dump.c
>>> @@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
>>> if (detach_p) {
>>> /* detached dump */
>>> s->detached = true;
>>> - qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>>> - s, QEMU_THREAD_DETACHED);
>>> + if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>>> + s, QEMU_THREAD_DETACHED, errp)) {
>>> + /* keep 'if' here in case there is further error handling logic */
>>> + }
>>> } else {
>>> /* sync dump */
>>> dump_process(s, errp);
>>> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
>>> index cdcf550dd7..6684c60a96 100644
>>> --- a/hw/misc/edu.c
>>> +++ b/hw/misc/edu.c
>>> @@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>>> qemu_mutex_init(&edu->thr_mutex);
>>> qemu_cond_init(&edu->thr_cond);
>>> - qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>>> - edu, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>>> + edu, QEMU_THREAD_JOINABLE, errp)) {
>>> + return;
>>> + }
>>> memory_region_init_io(&edu->mmio, OBJECT(edu),
>>> &edu_mmio_ops, edu,
>>> "edu-mmio", 1 * MiB);
>>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>>> index ae913d070f..7c16ade04a 100644
>>> --- a/hw/ppc/spapr_hcall.c
>>> +++ b/hw/ppc/spapr_hcall.c
>>> @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>>> sPAPRPendingHPT *pending = spapr->pending_hpt;
>>> uint64_t current_ram_size;
>>> int rc;
>>> + Error *local_err = NULL;
>>> if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
>>> return H_AUTHORITY;
>>> @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>>> pending->shift = shift;
>>> pending->ret = H_HARDWARE;
>>> - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>>> - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
>>> + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>>> + hpt_prepare_thread, pending,
>>> + QEMU_THREAD_DETACHED, &local_err)) {
>>> + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
>>> + g_free(pending);
>>> + return H_RESOURCE;
>>> + }
>>> spapr->pending_hpt = pending;
>>>
>> This is a caller that returns an error code on failure. You change it
>> to report the error, then return failure. The return failure part looks
>> fine. Whether reporting the error is appropriate I can't say for sure.
>> No other failure mode reports anything. David, what do you think?
> Just as David explains. :)
>> Fei Li, you could pass &error_abort to side-step this question for now.
>>
>>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>>> index d7a4bbd91f..53a2bd0d85 100644
>>> --- a/hw/rdma/rdma_backend.c
>>> +++ b/hw/rdma/rdma_backend.c
>>> @@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
>>> snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>> ibv_get_device_name(backend_dev->ib_dev));
>>> backend_dev->comp_thread.run = true;
>>> + /* FIXME: let the further caller handle the error instead of abort() here */
>>> qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>> - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>> + comp_handler_thread, backend_dev,
>>> + QEMU_THREAD_DETACHED, &error_abort);
>>> }
>>>
>> This is a caller that can't return failure. You pass &error_abort. No
>> behavioral change.
> Actually, yes..The reason why I did not do some change is that I am
> not quite
> sure about how to fix for the rdma device, esp. setting certain value
> for the
> dev->regs_data[idx] when it fails.
I recommend to split this patch. First part adds the Error ** parameter
to qemu_thread_create(), passing &error_abort everywhere. No functional
change. Subsequent patches then improve on &error_abort. This way,
each improvement patch can be cc'ed to just that part's maintainer(s).
Parts you don't want to touch you simply leave at &error_abort. Makes
sense?
>> I think I'd mark the spot TODO, not FIXME. Matter of taste, I guess.
> Sounds good, thanks!
>>> void rdma_backend_register_comp_handler(void (*handler)(int status,
>>> diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
>>> index 25976ed84f..c6783f124a 100644
>>> --- a/hw/usb/ccid-card-emulated.c
>>> +++ b/hw/usb/ccid-card-emulated.c
>>> @@ -33,6 +33,7 @@
>>> #include "qemu/main-loop.h"
>>> #include "ccid.h"
>>> #include "qapi/error.h"
>>> +#include "qemu/error-report.h"
>>> #define DPRINTF(card, lvl, fmt, ...) \
>>> do {\
>>> @@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
>>> error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
>>> goto out2;
>>> }
>>> - qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>>> - card, QEMU_THREAD_JOINABLE);
>>> - qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
>>> - card, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>>> + card, QEMU_THREAD_JOINABLE, errp)) {
>>> + error_report("failed to create event_thread");
>>> + goto out2;
>>> + }
>>> + if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
>>> + handle_apdu_thread, card,
>>> + QEMU_THREAD_JOINABLE, errp)) {
>>> + error_report("failed to create handle_apdu_thread");
>>> + goto out2;
>>> + }
>>> out2:
>>> clean_event_notifier(card);
>> error_report() in a realize() method is almost certainly wrong.
> Ok, I will remove these two.
>>> diff --git a/include/qemu/thread.h b/include/qemu/thread.h
>>> index 55d83a907c..12291f4ccd 100644
>>> --- a/include/qemu/thread.h
>>> +++ b/include/qemu/thread.h
>>> @@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
>>> void qemu_event_wait(QemuEvent *ev);
>>> void qemu_event_destroy(QemuEvent *ev);
>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>> void *(*start_routine)(void *),
>>> - void *arg, int mode);
>>> + void *arg, int mode, Error **errp);
>>> void *qemu_thread_join(QemuThread *thread);
>>> void qemu_thread_get_self(QemuThread *thread);
>>> bool qemu_thread_is_self(QemuThread *thread);
>>> diff --git a/io/task.c b/io/task.c
>>> index 2886a2c1bc..6d3a18ab80 100644
>>> --- a/io/task.c
>>> +++ b/io/task.c
>>> @@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
>>> "io-task-worker",
>>> qio_task_thread_worker,
>>> data,
>>> - QEMU_THREAD_DETACHED);
>>> + QEMU_THREAD_DETACHED,
>>> + &error_abort);
>>> }
>>>
>> This is a caller that can't return failure. You pass &error_abort. No
>> behavioral change. Unlike above, you don't mark this spot FIXME. Any
>> particular reason for marking one, but not the other?
> Emm, it is a little difficult to add a Error parameter for its callers and
> the callers seem does not need the Error. Thus I think passing
> &error_abort in this function instead of its further callers is more
> direct. :)
> The same reasons for the several below.
>
> But just as you mentioned, maybe we should add a "TODO: xxxx" for the direct
> &error_abort case in case the callers need the Error parameter in future.
Your use of &error_abort in this patch is fine simply because it's no
worse than before. I'm merely probing your use of FIXME / TODO.
Adding a FIXME is appropriate when you're convinced the code is actually
broken.
Adding a TODO is appropriate when you believe the code should be
improved.
Both are almost always worth mentioning in the commit message.
If you don't really know, and you're not really changing how the code
behaves, then it's better not to add either kind of comment.
>> I'll stop commenting on this pattern now.
>>
>>> diff --git a/iothread.c b/iothread.c
>>> index 2fb1cdf55d..7335dacf0b 100644
>>> --- a/iothread.c
>>> +++ b/iothread.c
>>> @@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>>> &local_error);
>>> if (local_error) {
>>> error_propagate(errp, local_error);
>>> - aio_context_unref(iothread->ctx);
>>> - iothread->ctx = NULL;
>>> - return;
>>> + goto fail;
>>> }
>>> qemu_mutex_init(&iothread->init_done_lock);
>>> @@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>>> */
>>> name = object_get_canonical_path_component(OBJECT(obj));
>>> thread_name = g_strdup_printf("IO %s", name);
>>> - qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>>> - iothread, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>>> + iothread, QEMU_THREAD_JOINABLE, errp)) {
>>> + g_free(thread_name);
>>> + g_free(name);
>>> + goto fail;
>>> + }
>>> g_free(thread_name);
>>> g_free(name);
>>> @@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable
>>> *obj, Error **errp)
>>> &iothread->init_done_lock);
>>> }
>>> qemu_mutex_unlock(&iothread->init_done_lock);
>>> + return;
>>> +fail:
>>> + aio_context_unref(iothread->ctx);
>>> + iothread->ctx = NULL;
>>> }
>>> typedef struct {
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index 0537fc0c26..af6c72ac5d 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
>>> /* Make sure all file formats flush their mutable metadata */
>>> bdrv_invalidate_cache_all(&local_err);
>>> if (local_err) {
>>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>> - MIGRATION_STATUS_FAILED);
>>> error_report_err(local_err);
>>> - exit(EXIT_FAILURE);
>>> + goto fail;
>>> }
>>> if (colo_init_ram_cache() < 0) {
>>> error_report("Init ram cache failed");
>>> - exit(EXIT_FAILURE);
>>> + goto fail;
>>> }
>>> - qemu_thread_create(&mis->colo_incoming_thread, "COLO
>>> incoming",
>>> - colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>> + colo_process_incoming_thread, mis,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err, "failed to create "
>>> + "colo_process_incoming_thread: ");
>>> + goto fail;
>>> + }
>>> mis->have_colo_incoming_thread = true;
>>> qemu_coroutine_yield();
>>> @@ -461,20 +464,22 @@ static void
>>> process_incoming_migration_co(void *opaque)
>>> }
>>> if (ret < 0) {
>>> - Error *local_err = NULL;
>>> -
>>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>> - MIGRATION_STATUS_FAILED);
>>> error_report("load of migration failed: %s", strerror(-ret));
>>> - qemu_fclose(mis->from_src_file);
>>> - if (multifd_load_cleanup(&local_err) != 0) {
>>> - error_report_err(local_err);
>>> - }
>>> - exit(EXIT_FAILURE);
>>> + goto fail;
>>> }
>>> mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
>>> qemu_bh_schedule(mis->bh);
>>> mis->migration_incoming_co = NULL;
>>> + return;
>>> +fail:
>>> + local_err = NULL;
>>> + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>> + MIGRATION_STATUS_FAILED);
>>> + qemu_fclose(mis->from_src_file);
>>> + if (multifd_load_cleanup(&local_err) != 0) {
>>> + error_report_err(local_err);
>>> + }
>>> + exit(EXIT_FAILURE);
>>> }
>> You change handling of errors other than qemu_thread_create(). Separate
>> patch, please. I'd put it before this one.
> Ok, thanks for the reminder. Will update in the next version.
>>> static void migration_incoming_setup(QEMUFile *f)
>>> @@ -2345,6 +2350,7 @@ out:
>>> static int open_return_path_on_source(MigrationState *ms,
>>> bool create_thread)
>>> {
>>> + Error *local_err = NULL;
>>> ms->rp_state.from_dst_file =
>>> qemu_file_get_return_path(ms->to_dst_file);
>>> if (!ms->rp_state.from_dst_file) {
>>> @@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
>>> return 0;
>>> }
>>> - qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> - source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>> + source_return_path_thread, ms,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err,
>>> + "failed to create source_return_path_thread: ");
>>> + return -1;
>>> + }
>>> trace_open_return_path_on_source_continue();
>>>
>> This is a caller that returns an error code on failure. You change it
>> to report the error, then return failure. This is okay, because its
>> sole caller also reports errors that way.
> Thanks.
>>> @@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>>> migrate_fd_cleanup(s);
>>> return;
>>> }
>>> - qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
>>> + s, QEMU_THREAD_JOINABLE, &error_in)) {
>>> + error_reportf_err(error_in, "failed to create migration_thread: ");
>>> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>>> + migrate_fd_cleanup(s);
>>> + return;
>>> + }
>>> s->migration_thread_running = true;
>>> }
>> This is a caller that reports errors. You make it handle
>> qemu_thread_create() the same way. Good.
> Thanks!
>>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>>> index fa09dba534..80bfa9c4a2 100644
>>> --- a/migration/postcopy-ram.c
>>> +++ b/migration/postcopy-ram.c
>>> @@ -1083,6 +1083,8 @@ retry:
>>> int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>> {
>>> + Error *local_err = NULL;
>>> +
>>> /* Open the fd for the kernel to give us userfaults */
>>> mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>>> if (mis->userfault_fd == -1) {
>>> @@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>> }
>>> qemu_sem_init(&mis->fault_thread_sem, 0);
>>> - qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> - postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>> + postcopy_ram_fault_thread, mis,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err,
>>> + "failed to create postcopy_ram_fault_thread: ");
>>> + close(mis->userfault_event_fd);
>>> + close(mis->userfault_fd);
>>> + qemu_sem_destroy(&mis->fault_thread_sem);
>>> + return -1;
>>> + }
>>> qemu_sem_wait(&mis->fault_thread_sem);
>>> qemu_sem_destroy(&mis->fault_thread_sem);
>>> mis->have_fault_thread = true;
>> This is a caller that reports errors, then returns failure. You make it
>> handle qemu_thread_create() the same way. Good.
>>
>> Not related to this patch, just spotted while reviewing it:
>>
>> /* Mark so that we get notified of accesses to unwritten areas */
>> if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>> error_report("ram_block_enable_notify failed");
>> return -1;
>> }
>>
>> Do we leak mis->userfault_fd, mis->userfault_event_fd,
>> mis->fault_thread_sem here?
> Actually the patch 5/7 fixes this: we leave the cleanup() handling to
> postcopy_ram_incoming_cleanup() when failing to notify here.
> Looking back to the history, I falsely did close(these_fds) just here but
> David corrected me, and the following is quoted from his earlier comment:
> "
> I don't think these close() calls are safe. This code is just after
> starting the fault thread, and the fault thread has a poll() call on
> these fd's, so we can't close them until we've instructed that thread
> to exit.
>
> We should fall out through postcopy_ram_incoming_cleanup, and because
> the thread exists it should do a notify to the thread, a join and then
> only later do the close calls.
> "
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 658dfa88a3..6e0cccf066 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>> static int compress_threads_save_setup(void)
>>> {
>>> int i, thread_count;
>>> + Error *local_err = NULL;
>>> if (!migrate_use_compression()) {
>>> return 0;
>>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>>> comp_param[i].quit = false;
>>> qemu_mutex_init(&comp_param[i].mutex);
>>> qemu_cond_init(&comp_param[i].cond);
>>> - qemu_thread_create(compress_threads + i, "compress",
>>> - do_data_compress, comp_param + i,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(compress_threads + i, "compress",
>>> + do_data_compress, comp_param + i,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err, "failed to create do_data_compress: ");
>>> + goto exit;
>>> + }
>>> }
>>> return 0;
>>>
>> Reviewing the migration changes is getting tiresome...
> Yes, indeed, the migration involves a lot! Thanks so much for helping
> to review!
>> Is reporting the
>> error appropriate here, and why?
> I think the qemu monitor should display the obvious and exact failing
> reason for administrators, esp considering that qemu_thread_create()
> itself does not print any message thus we have no idea which direct
> function fails if gdb is not enabled.
> IOW, I think David's answer to that ppc's error_reportf_err() also
> apply here:
>
> "The error returns are for the guest, the reported errors are for the
> guest administrator or management layers."
There could well be an issue with the "management layers" part. Should
this error be sent to the management layer via QMP somehow? Migration
maintainers should be able to assist with this question.
>>> @@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>>> p->c = QIO_CHANNEL(sioc);
>>> qio_channel_set_delay(p->c, false);
>>> p->running = true;
>>> - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + migrate_set_error(migrate_get_current(), local_err);
>>> + error_reportf_err(local_err,
>>> + "failed to create multifd_send_thread: ");
>>> + multifd_save_cleanup();
>>> + return;
>>> + }
>>> atomic_inc(&multifd_send_state->count);
>>> }
>> Same question.
>>
>>> @@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>>> p->num_packets = 1;
>>> p->running = true;
>>> - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
>>> + p, QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_propagate_prepend(errp, local_err,
>>> + "failed to create multifd_recv_thread: ");
>>> + multifd_recv_terminate_threads(local_err);
>>> + return false;
>>> + }
>>> atomic_inc(&multifd_recv_state->count);
>>> return atomic_read(&multifd_recv_state->count) ==
>>> migrate_multifd_channels();
>>> @@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
>>> static int compress_threads_load_setup(QEMUFile *f)
>>> {
>>> int i, thread_count;
>>> + Error *local_err = NULL;
>>> if (!migrate_use_compression()) {
>>> return 0;
>>> @@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>>> qemu_cond_init(&decomp_param[i].cond);
>>> decomp_param[i].done = true;
>>> decomp_param[i].quit = false;
>>> - qemu_thread_create(decompress_threads + i, "decompress",
>>> - do_data_decompress, decomp_param + i,
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(decompress_threads + i, "decompress",
>>> + do_data_decompress, decomp_param + i,
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err,
>>> + "failed to create do_data_decompress: ");
>>> + goto exit;
>>> + }
>>> }
>>> return 0;
>>> exit:
>> Same question.
>>
>>> diff --git a/migration/savevm.c b/migration/savevm.c
>>> index d784e8aa40..b8bdcde5d8 100644
>>> --- a/migration/savevm.c
>>> +++ b/migration/savevm.c
>>> @@ -1747,9 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>>> mis->have_listen_thread = true;
>>> /* Start up the listening thread and wait for it to signal ready */
>>> qemu_sem_init(&mis->listen_thread_sem, 0);
>>> - qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> - postcopy_ram_listen_thread, NULL,
>>> - QEMU_THREAD_DETACHED);
>>> + if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>> + postcopy_ram_listen_thread, NULL,
>>> + QEMU_THREAD_DETACHED, &local_err)) {
>>> + error_reportf_err(local_err,
>>> + "failed to create postcopy_ram_listen_thread: ");
>>> + qemu_sem_destroy(&mis->listen_thread_sem);
>>> + return -1;
>>> + }
>>> qemu_sem_wait(&mis->listen_thread_sem);
>>> qemu_sem_destroy(&mis->listen_thread_sem);
>>>
>> This is a caller that reports errors, then returns failure. You make it
>> handle qemu_thread_create() the same way. Good.
>>
>> I'll stop commenting on this pattern now.
> Thanks.
>>> diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
>>> index 2f6c72f63a..338b9563e3 100644
>>> --- a/tests/atomic_add-bench.c
>>> +++ b/tests/atomic_add-bench.c
>>> @@ -2,6 +2,7 @@
>>> #include "qemu/thread.h"
>>> #include "qemu/host-utils.h"
>>> #include "qemu/processor.h"
>>> +#include "qapi/error.h"
>>> struct thread_info {
>>> uint64_t r;
>>> @@ -110,7 +111,7 @@ static void create_threads(void)
>>> info->r = (i + 1) ^ time(NULL);
>>> qemu_thread_create(&threads[i], NULL, thread_func, info,
>>> - QEMU_THREAD_JOINABLE);
>>> + QEMU_THREAD_JOINABLE, &error_abort);
>>> }
>>> }
> ... snip for all tests/xxx.c as all the passed parameter is &error_abort ...
>>> diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
>>> index 929391f85d..35a652d1fd 100644
>>> --- a/ui/vnc-jobs.c
>>> +++ b/ui/vnc-jobs.c
>>> @@ -31,6 +31,7 @@
>>> #include "vnc-jobs.h"
>>> #include "qemu/sockets.h"
>>> #include "qemu/main-loop.h"
>>> +#include "qapi/error.h"
>>> #include "block/aio.h"
>>> /*
>>> @@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
>>> return queue; /* Check global queue */
>>> }
>>> -void vnc_start_worker_thread(void)
>>> +bool vnc_start_worker_thread(Error **errp)
>>> {
>>> VncJobQueue *q;
>>> - if (vnc_worker_thread_running())
>>> - return ;
>>> + if (vnc_worker_thread_running()) {
>>> + goto out;
>>> + }
>>> q = vnc_queue_init();
>>> - qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
>>> - QEMU_THREAD_DETACHED);
>>> + if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
>>> + q, QEMU_THREAD_DETACHED, errp)) {
>>> + vnc_queue_clear(q);
>>> + return false;
>>> + }
>>> queue = q; /* Set global queue */
>>> +out:
>>> + return true;
>>> }
>> I recommend to pass &error_abort to qemu_thread_create() in this patch,
>> then convert vnc_start_worker_thread() to Error in a subsequent patch.
> Ok, thanks! This makes this patch shorter. :)
> BTW, would it be better by adding a "TODO: xxx" comment before the
> &error_abort in this patch, and remove it in the subsequent patch?
> If it is ok, I will do the same adding for the latter touch_all_pages().
See my remark on use of FIXME and TODO above.
Adding a TODO only to remove it later in the same series is fine. More
so when it helps avoid review questions like "I think you need to do X
here", followed by "Oh, I see you're doing X here" when the reviewer
gets to the later patch.
>>> diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
>>> index 59f66bcc35..14640593db 100644
>>> --- a/ui/vnc-jobs.h
>>> +++ b/ui/vnc-jobs.h
>>> @@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
>>> void vnc_jobs_join(VncState *vs);
>>> void vnc_jobs_consume_buffer(VncState *vs);
>>> -void vnc_start_worker_thread(void);
>>> +bool vnc_start_worker_thread(Error **errp);
>>> /* Locks */
>>> static inline int vnc_trylock_display(VncDisplay *vd)
>>> diff --git a/ui/vnc.c b/ui/vnc.c
>>> index 0c1b477425..0ffe9e6a5d 100644
>>> --- a/ui/vnc.c
>>> +++ b/ui/vnc.c
>>> @@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
>>> vd->connections_limit = 32;
>>> qemu_mutex_init(&vd->mutex);
>>> - vnc_start_worker_thread();
>>> + if (!vnc_start_worker_thread(errp)) {
>>> + return;
>>> + }
>>> vd->dcl.ops = &dcl_ops;
>>> register_displaychangelistener(&vd->dcl);
>> These two hunks then also go into the subsequent patch.
> Ok.
>>> diff --git a/util/compatfd.c b/util/compatfd.c
>>> index 980bd33e52..886aa249f9 100644
>>> --- a/util/compatfd.c
>>> +++ b/util/compatfd.c
>>> @@ -16,6 +16,7 @@
>>> #include "qemu/osdep.h"
>>> #include "qemu-common.h"
>>> #include "qemu/thread.h"
>>> +#include "qapi/error.h"
>>> #include <sys/syscall.h>
>>> @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t
>>> *mask)
>>> struct sigfd_compat_info *info;
>>> QemuThread thread;
>>> int fds[2];
>>> + Error *local_err = NULL;
>>> info = malloc(sizeof(*info));
>>> if (info == NULL) {
>>> @@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>> memcpy(&info->mask, mask, sizeof(*mask));
>>> info->fd = fds[1];
>>> - qemu_thread_create(&thread, "signalfd_compat",
>>> sigwait_compat, info,
>>> - QEMU_THREAD_DETACHED);
>>> + if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
>>> + info, QEMU_THREAD_DETACHED, &local_err)) {
>>> + error_reportf_err(local_err, "failed to create sigwait_compat: ");
>>> + close(fds[0]);
>>> + close(fds[1]);
>>> + free(info);
>>> + return -1;
>>> + }
>>> return fds[0];
>>> }
>> This function is implements signalfd() when the kernel doesn't provide
>> it.
>>
>> signalfd() sets errno on failure. The replacement's existing failure
>> modes set errno. You add a failure mode that doesn't set errno. That's
>> a bug. To fix it, you can either make qemu_thread_create() set errno,
>> or you can make it return a value you can use to set errno. The common
>> way to do the latter is returning a *negated* errno value.
> Oops, I forgot setting the errno for Linux implementation! My fault..
> I will set errno inside qemu_thread_create() as follows:
> err = pthread_attr_init(&attr);
> if (err) {
> - error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
> - strerror(err));
> + errno = err;
> + error_setg_errno(errp, errno, "pthread_attr_init failed");
> return false;
> }
Make sure to set errno on all failures, not just this one.
Also add a function comment. I suspect returning negated errno would
lead to a shorter function comment. Yet another reason to write
function comments! Making myself document the mess I made has made me
clean it up before I submit it many times :)
>
>> signalfd() doesn't print anything on failure. The replacement's
>> existing failure modes don't print anything. You add a failure mode
>> that does print. I think it shouldn't.
> Ok, I will remove it. Thanks!
>>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>>> index c1bee2a581..2c779fd634 100644
>>> --- a/util/oslib-posix.c
>>> +++ b/util/oslib-posix.c
>>> @@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>>> size_t size_per_thread;
>>> char *addr = area;
>>> int i = 0;
>>> + int started_thread = 0;
>>> + Error *local_err = NULL;
>>> memset_thread_failed = false;
>>> memset_num_threads = get_memset_num_threads(smp_cpus);
>>> + started_thread = memset_num_threads;
>>> memset_thread = g_new0(MemsetThread, memset_num_threads);
>>> numpages_per_thread = (numpages / memset_num_threads);
>>> size_per_thread = (hpagesize * numpages_per_thread);
>>> @@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>>> memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
>>> numpages : numpages_per_thread;
>>> memset_thread[i].hpagesize = hpagesize;
>>> - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>>> - do_touch_pages, &memset_thread[i],
>>> - QEMU_THREAD_JOINABLE);
>>> + if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>>> + do_touch_pages, &memset_thread[i],
>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>> + error_reportf_err(local_err, "failed to create do_touch_pages: ");
>>> + memset_thread_failed = true;
>>> + started_thread = i;
>>> + goto out;
>>> + }
>>> addr += size_per_thread;
>>> numpages -= numpages_per_thread;
>>> }
>>> - for (i = 0; i < memset_num_threads; i++) {
>>> +out:
>>> + for (i = 0; i < started_thread; i++) {
>>> qemu_thread_join(&memset_thread[i].pgthread);
>>> }
>>> g_free(memset_thread);
>> You need to convert this function to Error instead, because its caller
>> os_mem_prealloc() sets an error on failure. I recommend to pass
>> &error_abort in this patch, and convert to Error in a subsequent patch.
> Ok, thanks for the advice.
>>> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
>>> index 865e476df5..81b40a1ece 100644
>>> --- a/util/qemu-thread-posix.c
>>> +++ b/util/qemu-thread-posix.c
>>> @@ -15,6 +15,7 @@
>>> #include "qemu/atomic.h"
>>> #include "qemu/notify.h"
>>> #include "qemu-thread-common.h"
>>> +#include "qapi/error.h"
>>> static bool name_threads;
>>> @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
>>> return r;
>>> }
>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>> - void *(*start_routine)(void*),
>>> - void *arg, int mode)
>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>> + void *(*start_routine)(void *),
>>> + void *arg, int mode, Error **errp)
>>> {
>>> sigset_t set, oldset;
>>> int err;
>>> @@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>> err = pthread_attr_init(&attr);
>>> if (err) {
>>> - error_exit(err, __func__);
>>> + error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>>> + strerror(err));
-err is actually wrong: pthread_attr_init() returns a *positive* errno
code on failure.
>>> + return false;
>>> }
>>> if (mode == QEMU_THREAD_DETACHED) {
>>> @@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>> qemu_thread_args->name = g_strdup(name);
>>> qemu_thread_args->start_routine = start_routine;
>>> qemu_thread_args->arg = arg;
>>> -
>> Let's keep the blank line.
> ok.
>
> Thanks so much for the review! Have a nice day. :)
> Fei
You're welcome :)
>>> err = pthread_create(&thread->thread, &attr,
>>> qemu_thread_start, qemu_thread_args);
>>> -
>>> - if (err)
>>> - error_exit(err, __func__);
>>> + if (err) {
>>> + error_setg_errno(errp, -err, "pthread_create failed: %s",
>>> + strerror(err));
>>> + pthread_attr_destroy(&attr);
>>> + g_free(qemu_thread_args->name);
>>> + g_free(qemu_thread_args);
>>> + return false;
>>> + }
>>> pthread_sigmask(SIG_SETMASK, &oldset, NULL);
>>> pthread_attr_destroy(&attr);
>>> + return true;
>>> }
>>> void qemu_thread_get_self(QemuThread *thread)
>>> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>>> index 4a363ca675..57b1143e97 100644
>>> --- a/util/qemu-thread-win32.c
>>> +++ b/util/qemu-thread-win32.c
>>> @@ -20,6 +20,7 @@
>>> #include "qemu/thread.h"
>>> #include "qemu/notify.h"
>>> #include "qemu-thread-common.h"
>>> +#include "qapi/error.h"
>>> #include <process.h>
>>> static bool name_threads;
>>> @@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
>>> return ret;
>>> }
>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>> - void *(*start_routine)(void *),
>>> - void *arg, int mode)
>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>> + void *(*start_routine)(void *),
>>> + void *arg, int mode, Error **errp)
>>> {
>>> HANDLE hThread;
>>> struct QemuThreadData *data;
>>> @@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>> hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
>>> data, 0, &thread->tid);
>>> if (!hThread) {
>>> - error_exit(GetLastError(), __func__);
>>> + if (data->mode != QEMU_THREAD_DETACHED) {
>>> + DeleteCriticalSection(&data->cs);
>>> + }
>>> + error_setg_errno(errp, errno,
>>> + "failed to create win32_start_routine");
>>> + g_free(data);
>>> + return false;
>>> }
>>> CloseHandle(hThread);
>>> thread->data = data;
>>> + return true;
>>> }
>>> void qemu_thread_get_self(QemuThread *thread)
>>> diff --git a/util/rcu.c b/util/rcu.c
>>> index 5676c22bd1..145dcdb0c6 100644
>>> --- a/util/rcu.c
>>> +++ b/util/rcu.c
>>> @@ -32,6 +32,7 @@
>>> #include "qemu/atomic.h"
>>> #include "qemu/thread.h"
>>> #include "qemu/main-loop.h"
>>> +#include "qapi/error.h"
>>> #if defined(CONFIG_MALLOC_TRIM)
>>> #include <malloc.h>
>>> #endif
>>> @@ -325,7 +326,7 @@ static void rcu_init_complete(void)
>>> * must have been quiescent even after forking, just recreate it.
>>> */
>>> qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
>>> - NULL, QEMU_THREAD_DETACHED);
>>> + NULL, QEMU_THREAD_DETACHED, &error_abort);
>>> rcu_register_thread();
>>> }
>>> diff --git a/util/thread-pool.c b/util/thread-pool.c
>>> index 610646d131..ad0f980783 100644
>>> --- a/util/thread-pool.c
>>> +++ b/util/thread-pool.c
>>> @@ -22,6 +22,7 @@
>>> #include "trace.h"
>>> #include "block/thread-pool.h"
>>> #include "qemu/main-loop.h"
>>> +#include "qapi/error.h"
>>> static void do_spawn_thread(ThreadPool *pool);
>>> @@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
>>> pool->new_threads--;
>>> pool->pending_threads++;
>>> - qemu_thread_create(&t, "worker", worker_thread, pool,
>>> QEMU_THREAD_DETACHED);
>>> + qemu_thread_create(&t, "worker", worker_thread, pool,
>>> + QEMU_THREAD_DETACHED, &error_abort);
>>> }
>>> static void spawn_thread_bh_fn(void *opaque)
>>
On 12/19/2018 06:10 PM, Markus Armbruster wrote:
> Fei Li <fli@suse.com> writes:
>
>> On 12/13/2018 03:26 PM, Markus Armbruster wrote:
>>> There's a question for David Gibson inline. Please search for /ppc/.
>>>
>>> Fei Li <fli@suse.com> writes:
>>>
>>>> Make qemu_thread_create() return a Boolean to indicate if it succeeds
>>>> rather than failing with an error. And add an Error parameter to hold
>>>> the error message and let the callers handle it.
>>> The "rather than failing with an error" is misleading. Before the
>>> patch, we report to stderr and abort(). What about:
>>>
>>> qemu-thread: Make qemu_thread_create() handle errors properly
>>>
>>> qemu_thread_create() abort()s on error. Not nice. Give it a
>>> return value and an Error ** argument, so it can return success /
>>> failure.
>> A nice commit-amend! Thanks!
>>> Still missing from the commit message then: how you update the callers.
>> Yes, agree. I think the-how should also be noted here, like
>> - propagating the err to callers whose call trace already have the
>> Error paramater;
>> - just add an &error_abort for qemu_thread_create() and make it a
>> "TODO: xxx";
>>> Let's see below.
>>>
>>>> Cc: Markus Armbruster <armbru@redhat.com>
>>>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> Signed-off-by: Fei Li <fli@suse.com>
>>>> ---
>>>> cpus.c | 45 ++++++++++++++++++++++++-------------
>>>> dump.c | 6 +++--
>>>> hw/misc/edu.c | 6 +++--
>>>> hw/ppc/spapr_hcall.c | 10 +++++++--
>>>> hw/rdma/rdma_backend.c | 4 +++-
>>>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>>>> include/qemu/thread.h | 4 ++--
>>>> io/task.c | 3 ++-
>>>> iothread.c | 16 +++++++++-----
>>>> migration/migration.c | 54 +++++++++++++++++++++++++++++----------------
>>>> migration/postcopy-ram.c | 14 ++++++++++--
>>>> migration/ram.c | 40 ++++++++++++++++++++++++---------
>>>> migration/savevm.c | 11 ++++++---
>>>> tests/atomic_add-bench.c | 3 ++-
>>>> tests/iothread.c | 2 +-
>>>> tests/qht-bench.c | 3 ++-
>>>> tests/rcutorture.c | 3 ++-
>>>> tests/test-aio.c | 2 +-
>>>> tests/test-rcu-list.c | 3 ++-
>>>> ui/vnc-jobs.c | 17 +++++++++-----
>>>> ui/vnc-jobs.h | 2 +-
>>>> ui/vnc.c | 4 +++-
>>>> util/compatfd.c | 12 ++++++++--
>>>> util/oslib-posix.c | 17 ++++++++++----
>>>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>>>> util/qemu-thread-win32.c | 16 ++++++++++----
>>>> util/rcu.c | 3 ++-
>>>> util/thread-pool.c | 4 +++-
>>>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>>>
>>>> diff --git a/cpus.c b/cpus.c
>>>> index 7b091bda53..e8450e518a 100644
>>>> --- a/cpus.c
>>>> +++ b/cpus.c
>>>> @@ -1961,15 +1961,20 @@ static void qemu_tcg_init_vcpu(CPUState *cpu, Error **errp)
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name,
>>>> qemu_tcg_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name,
>>>> + qemu_tcg_cpu_thread_fn, cpu,
>>>> + QEMU_THREAD_JOINABLE, errp)) {
>>>> + return;
>>>> + }
>>>> } else {
>>>> /* share a single thread for all cpus with TCG */
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
>>>> - qemu_thread_create(cpu->thread, thread_name,
>>>> - qemu_tcg_rr_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name,
>>>> + qemu_tcg_rr_cpu_thread_fn, cpu,
>>>> + QEMU_THREAD_JOINABLE, errp)) {
>>>> + return;
>>>> + }
>>>> single_tcg_halt_cond = cpu->halt_cond;
>>>> single_tcg_cpu_thread = cpu->thread;
>>> This is a caller that sets an error on failure. You make it set an
>>> error on qemu_thread_create() failure. Makes sense.
>> Thanks for the comment!
>>>> @@ -1997,8 +2002,10 @@ static void qemu_hax_start_vcpu(CPUState *cpu, Error **errp)
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
>>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + return;
>>>> + }
>>>> #ifdef _WIN32
>>>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>>>> #endif
>>> Likewise. I'll stop commenting on this pattern now.
>>>
>>>> @@ -2013,8 +2020,10 @@ static void qemu_kvm_start_vcpu(CPUState *cpu, Error **errp)
>>>> qemu_cond_init(cpu->halt_cond);
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
>>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + /* keep 'if' here in case there is further error handling logic */
>>>> + }
>>>> }
>>>> static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>>>> @@ -2031,8 +2040,10 @@ static void qemu_hvf_start_vcpu(CPUState *cpu, Error **errp)
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
>>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + /* keep 'if' here in case there is further error handling logic */
>>>> + }
>>>> }
>>>> static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>>>> @@ -2044,8 +2055,10 @@ static void qemu_whpx_start_vcpu(CPUState *cpu, Error **errp)
>>>> qemu_cond_init(cpu->halt_cond);
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>>>> - cpu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
>>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + return;
>>>> + }
>>>> #ifdef _WIN32
>>>> cpu->hThread = qemu_thread_get_handle(cpu->thread);
>>>> #endif
>>>> @@ -2060,8 +2073,10 @@ static void qemu_dummy_start_vcpu(CPUState *cpu, Error **errp)
>>>> qemu_cond_init(cpu->halt_cond);
>>>> snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
>>>> cpu->cpu_index);
>>>> - qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn,
>>>> + cpu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + /* keep 'if' here in case there is further error handling logic */
>>>> + }
>>>> }
>>>> bool qemu_init_vcpu(CPUState *cpu, Error **errp)
>>>> diff --git a/dump.c b/dump.c
>>>> index 4ec94c5e25..1f003aff9a 100644
>>>> --- a/dump.c
>>>> +++ b/dump.c
>>>> @@ -2020,8 +2020,10 @@ void qmp_dump_guest_memory(bool paging, const char *file,
>>>> if (detach_p) {
>>>> /* detached dump */
>>>> s->detached = true;
>>>> - qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>>>> - s, QEMU_THREAD_DETACHED);
>>>> + if (!qemu_thread_create(&s->dump_thread, "dump_thread", dump_thread,
>>>> + s, QEMU_THREAD_DETACHED, errp)) {
>>>> + /* keep 'if' here in case there is further error handling logic */
>>>> + }
>>>> } else {
>>>> /* sync dump */
>>>> dump_process(s, errp);
>>>> diff --git a/hw/misc/edu.c b/hw/misc/edu.c
>>>> index cdcf550dd7..6684c60a96 100644
>>>> --- a/hw/misc/edu.c
>>>> +++ b/hw/misc/edu.c
>>>> @@ -355,8 +355,10 @@ static void pci_edu_realize(PCIDevice *pdev, Error **errp)
>>>> qemu_mutex_init(&edu->thr_mutex);
>>>> qemu_cond_init(&edu->thr_cond);
>>>> - qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>>>> - edu, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
>>>> + edu, QEMU_THREAD_JOINABLE, errp)) {
>>>> + return;
>>>> + }
>>>> memory_region_init_io(&edu->mmio, OBJECT(edu),
>>>> &edu_mmio_ops, edu,
>>>> "edu-mmio", 1 * MiB);
>>>> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
>>>> index ae913d070f..7c16ade04a 100644
>>>> --- a/hw/ppc/spapr_hcall.c
>>>> +++ b/hw/ppc/spapr_hcall.c
>>>> @@ -478,6 +478,7 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>>>> sPAPRPendingHPT *pending = spapr->pending_hpt;
>>>> uint64_t current_ram_size;
>>>> int rc;
>>>> + Error *local_err = NULL;
>>>> if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
>>>> return H_AUTHORITY;
>>>> @@ -538,8 +539,13 @@ static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
>>>> pending->shift = shift;
>>>> pending->ret = H_HARDWARE;
>>>> - qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>>>> - hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
>>>> + if (!qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
>>>> + hpt_prepare_thread, pending,
>>>> + QEMU_THREAD_DETACHED, &local_err)) {
>>>> + error_reportf_err(local_err, "failed to create hpt_prepare_thread: ");
>>>> + g_free(pending);
>>>> + return H_RESOURCE;
>>>> + }
>>>> spapr->pending_hpt = pending;
>>>>
>>> This is a caller that returns an error code on failure. You change it
>>> to report the error, then return failure. The return failure part looks
>>> fine. Whether reporting the error is appropriate I can't say for sure.
>>> No other failure mode reports anything. David, what do you think?
>> Just as David explains. :)
>>> Fei Li, you could pass &error_abort to side-step this question for now.
>>>
>>>> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
>>>> index d7a4bbd91f..53a2bd0d85 100644
>>>> --- a/hw/rdma/rdma_backend.c
>>>> +++ b/hw/rdma/rdma_backend.c
>>>> @@ -164,8 +164,10 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
>>>> snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
>>>> ibv_get_device_name(backend_dev->ib_dev));
>>>> backend_dev->comp_thread.run = true;
>>>> + /* FIXME: let the further caller handle the error instead of abort() here */
>>>> qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
>>>> - comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
>>>> + comp_handler_thread, backend_dev,
>>>> + QEMU_THREAD_DETACHED, &error_abort);
>>>> }
>>>>
>>> This is a caller that can't return failure. You pass &error_abort. No
>>> behavioral change.
>> Actually, yes..The reason why I did not do some change is that I am
>> not quite
>> sure about how to fix for the rdma device, esp. setting certain value
>> for the
>> dev->regs_data[idx] when it fails.
> I recommend to split this patch. First part adds the Error ** parameter
> to qemu_thread_create(), passing &error_abort everywhere. No functional
> change. Subsequent patches then improve on &error_abort. This way,
> each improvement patch can be cc'ed to just that part's maintainer(s).
> Parts you don't want to touch you simply leave at &error_abort. Makes
> sense?
Yes, I think this makes sense, much clearer. :) But I am a little
worried about
whether too many subsequent improvement patches (some of them are quite
small changes) are acceptable.
BTW, referring to the split, I think the previous "[2/7] qemu_init_vcpu:
add a
new Error parameter to propagate" should be merged into the later
improvement for qemu_xxx_init_vcpu. What do you think?
>>> I think I'd mark the spot TODO, not FIXME. Matter of taste, I guess.
>> Sounds good, thanks!
>>>> void rdma_backend_register_comp_handler(void (*handler)(int status,
>>>> diff --git a/hw/usb/ccid-card-emulated.c b/hw/usb/ccid-card-emulated.c
>>>> index 25976ed84f..c6783f124a 100644
>>>> --- a/hw/usb/ccid-card-emulated.c
>>>> +++ b/hw/usb/ccid-card-emulated.c
>>>> @@ -33,6 +33,7 @@
>>>> #include "qemu/main-loop.h"
>>>> #include "ccid.h"
>>>> #include "qapi/error.h"
>>>> +#include "qemu/error-report.h"
>>>> #define DPRINTF(card, lvl, fmt, ...) \
>>>> do {\
>>>> @@ -544,10 +545,17 @@ static void emulated_realize(CCIDCardState *base, Error **errp)
>>>> error_setg(errp, "%s: failed to initialize vcard", TYPE_EMULATED_CCID);
>>>> goto out2;
>>>> }
>>>> - qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>>>> - card, QEMU_THREAD_JOINABLE);
>>>> - qemu_thread_create(&card->apdu_thread_id, "ccid/apdu", handle_apdu_thread,
>>>> - card, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&card->event_thread_id, "ccid/event", event_thread,
>>>> + card, QEMU_THREAD_JOINABLE, errp)) {
>>>> + error_report("failed to create event_thread");
>>>> + goto out2;
>>>> + }
>>>> + if (!qemu_thread_create(&card->apdu_thread_id, "ccid/apdu",
>>>> + handle_apdu_thread, card,
>>>> + QEMU_THREAD_JOINABLE, errp)) {
>>>> + error_report("failed to create handle_apdu_thread");
>>>> + goto out2;
>>>> + }
>>>> out2:
>>>> clean_event_notifier(card);
>>> error_report() in a realize() method is almost certainly wrong.
>> Ok, I will remove these two.
>>>> diff --git a/include/qemu/thread.h b/include/qemu/thread.h
>>>> index 55d83a907c..12291f4ccd 100644
>>>> --- a/include/qemu/thread.h
>>>> +++ b/include/qemu/thread.h
>>>> @@ -152,9 +152,9 @@ void qemu_event_reset(QemuEvent *ev);
>>>> void qemu_event_wait(QemuEvent *ev);
>>>> void qemu_event_destroy(QemuEvent *ev);
>>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>>> void *(*start_routine)(void *),
>>>> - void *arg, int mode);
>>>> + void *arg, int mode, Error **errp);
>>>> void *qemu_thread_join(QemuThread *thread);
>>>> void qemu_thread_get_self(QemuThread *thread);
>>>> bool qemu_thread_is_self(QemuThread *thread);
>>>> diff --git a/io/task.c b/io/task.c
>>>> index 2886a2c1bc..6d3a18ab80 100644
>>>> --- a/io/task.c
>>>> +++ b/io/task.c
>>>> @@ -149,7 +149,8 @@ void qio_task_run_in_thread(QIOTask *task,
>>>> "io-task-worker",
>>>> qio_task_thread_worker,
>>>> data,
>>>> - QEMU_THREAD_DETACHED);
>>>> + QEMU_THREAD_DETACHED,
>>>> + &error_abort);
>>>> }
>>>>
>>> This is a caller that can't return failure. You pass &error_abort. No
>>> behavioral change. Unlike above, you don't mark this spot FIXME. Any
>>> particular reason for marking one, but not the other?
>> Emm, it is a little difficult to add a Error parameter for its callers and
>> the callers seem does not need the Error. Thus I think passing
>> &error_abort in this function instead of its further callers is more
>> direct. :)
>> The same reasons for the several below.
>>
>> But just as you mentioned, maybe we should add a "TODO: xxxx" for the direct
>> &error_abort case in case the callers need the Error parameter in future.
> Your use of &error_abort in this patch is fine simply because it's no
> worse than before. I'm merely probing your use of FIXME / TODO.
>
> Adding a FIXME is appropriate when you're convinced the code is actually
> broken.
>
> Adding a TODO is appropriate when you believe the code should be
> improved.
>
> Both are almost always worth mentioning in the commit message.
>
> If you don't really know, and you're not really changing how the code
> behaves, then it's better not to add either kind of comment.
Ok, for such cases, I will not add either comment.
Thanks for the detail explanation!
>
>>> I'll stop commenting on this pattern now.
>>>
>>>> diff --git a/iothread.c b/iothread.c
>>>> index 2fb1cdf55d..7335dacf0b 100644
>>>> --- a/iothread.c
>>>> +++ b/iothread.c
>>>> @@ -164,9 +164,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>>>> &local_error);
>>>> if (local_error) {
>>>> error_propagate(errp, local_error);
>>>> - aio_context_unref(iothread->ctx);
>>>> - iothread->ctx = NULL;
>>>> - return;
>>>> + goto fail;
>>>> }
>>>> qemu_mutex_init(&iothread->init_done_lock);
>>>> @@ -178,8 +176,12 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
>>>> */
>>>> name = object_get_canonical_path_component(OBJECT(obj));
>>>> thread_name = g_strdup_printf("IO %s", name);
>>>> - qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>>>> - iothread, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&iothread->thread, thread_name, iothread_run,
>>>> + iothread, QEMU_THREAD_JOINABLE, errp)) {
>>>> + g_free(thread_name);
>>>> + g_free(name);
>>>> + goto fail;
>>>> + }
>>>> g_free(thread_name);
>>>> g_free(name);
>>>> @@ -190,6 +192,10 @@ static void iothread_complete(UserCreatable
>>>> *obj, Error **errp)
>>>> &iothread->init_done_lock);
>>>> }
>>>> qemu_mutex_unlock(&iothread->init_done_lock);
>>>> + return;
>>>> +fail:
>>>> + aio_context_unref(iothread->ctx);
>>>> + iothread->ctx = NULL;
>>>> }
>>>> typedef struct {
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index 0537fc0c26..af6c72ac5d 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -438,19 +438,22 @@ static void process_incoming_migration_co(void *opaque)
>>>> /* Make sure all file formats flush their mutable metadata */
>>>> bdrv_invalidate_cache_all(&local_err);
>>>> if (local_err) {
>>>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>>> - MIGRATION_STATUS_FAILED);
>>>> error_report_err(local_err);
>>>> - exit(EXIT_FAILURE);
>>>> + goto fail;
>>>> }
>>>> if (colo_init_ram_cache() < 0) {
>>>> error_report("Init ram cache failed");
>>>> - exit(EXIT_FAILURE);
>>>> + goto fail;
>>>> }
>>>> - qemu_thread_create(&mis->colo_incoming_thread, "COLO
>>>> incoming",
>>>> - colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
>>>> + colo_process_incoming_thread, mis,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err, "failed to create "
>>>> + "colo_process_incoming_thread: ");
>>>> + goto fail;
>>>> + }
>>>> mis->have_colo_incoming_thread = true;
>>>> qemu_coroutine_yield();
>>>> @@ -461,20 +464,22 @@ static void
>>>> process_incoming_migration_co(void *opaque)
>>>> }
>>>> if (ret < 0) {
>>>> - Error *local_err = NULL;
>>>> -
>>>> - migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>>> - MIGRATION_STATUS_FAILED);
>>>> error_report("load of migration failed: %s", strerror(-ret));
>>>> - qemu_fclose(mis->from_src_file);
>>>> - if (multifd_load_cleanup(&local_err) != 0) {
>>>> - error_report_err(local_err);
>>>> - }
>>>> - exit(EXIT_FAILURE);
>>>> + goto fail;
>>>> }
>>>> mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
>>>> qemu_bh_schedule(mis->bh);
>>>> mis->migration_incoming_co = NULL;
>>>> + return;
>>>> +fail:
>>>> + local_err = NULL;
>>>> + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>>>> + MIGRATION_STATUS_FAILED);
>>>> + qemu_fclose(mis->from_src_file);
>>>> + if (multifd_load_cleanup(&local_err) != 0) {
>>>> + error_report_err(local_err);
>>>> + }
>>>> + exit(EXIT_FAILURE);
>>>> }
>>> You change handling of errors other than qemu_thread_create(). Separate
>>> patch, please. I'd put it before this one.
>> Ok, thanks for the reminder. Will update in the next version.
>>>> static void migration_incoming_setup(QEMUFile *f)
>>>> @@ -2345,6 +2350,7 @@ out:
>>>> static int open_return_path_on_source(MigrationState *ms,
>>>> bool create_thread)
>>>> {
>>>> + Error *local_err = NULL;
>>>> ms->rp_state.from_dst_file =
>>>> qemu_file_get_return_path(ms->to_dst_file);
>>>> if (!ms->rp_state.from_dst_file) {
>>>> @@ -2358,8 +2364,13 @@ static int open_return_path_on_source(MigrationState *ms,
>>>> return 0;
>>>> }
>>>> - qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>>> - source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&ms->rp_state.rp_thread, "return path",
>>>> + source_return_path_thread, ms,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err,
>>>> + "failed to create source_return_path_thread: ");
>>>> + return -1;
>>>> + }
>>>> trace_open_return_path_on_source_continue();
>>>>
>>> This is a caller that returns an error code on failure. You change it
>>> to report the error, then return failure. This is okay, because its
>>> sole caller also reports errors that way.
>> Thanks.
>>>> @@ -3189,8 +3200,13 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
>>>> migrate_fd_cleanup(s);
>>>> return;
>>>> }
>>>> - qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&s->thread, "live_migration", migration_thread,
>>>> + s, QEMU_THREAD_JOINABLE, &error_in)) {
>>>> + error_reportf_err(error_in, "failed to create migration_thread: ");
>>>> + migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
>>>> + migrate_fd_cleanup(s);
>>>> + return;
>>>> + }
>>>> s->migration_thread_running = true;
>>>> }
>>> This is a caller that reports errors. You make it handle
>>> qemu_thread_create() the same way. Good.
>> Thanks!
>>>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>>>> index fa09dba534..80bfa9c4a2 100644
>>>> --- a/migration/postcopy-ram.c
>>>> +++ b/migration/postcopy-ram.c
>>>> @@ -1083,6 +1083,8 @@ retry:
>>>> int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>> {
>>>> + Error *local_err = NULL;
>>>> +
>>>> /* Open the fd for the kernel to give us userfaults */
>>>> mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
>>>> if (mis->userfault_fd == -1) {
>>>> @@ -1109,8 +1111,16 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>>>> }
>>>> qemu_sem_init(&mis->fault_thread_sem, 0);
>>>> - qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>>> - postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&mis->fault_thread, "postcopy/fault",
>>>> + postcopy_ram_fault_thread, mis,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err,
>>>> + "failed to create postcopy_ram_fault_thread: ");
>>>> + close(mis->userfault_event_fd);
>>>> + close(mis->userfault_fd);
>>>> + qemu_sem_destroy(&mis->fault_thread_sem);
>>>> + return -1;
>>>> + }
>>>> qemu_sem_wait(&mis->fault_thread_sem);
>>>> qemu_sem_destroy(&mis->fault_thread_sem);
>>>> mis->have_fault_thread = true;
>>> This is a caller that reports errors, then returns failure. You make it
>>> handle qemu_thread_create() the same way. Good.
>>>
>>> Not related to this patch, just spotted while reviewing it:
>>>
>>> /* Mark so that we get notified of accesses to unwritten areas */
>>> if (qemu_ram_foreach_migratable_block(ram_block_enable_notify, mis)) {
>>> error_report("ram_block_enable_notify failed");
>>> return -1;
>>> }
>>>
>>> Do we leak mis->userfault_fd, mis->userfault_event_fd,
>>> mis->fault_thread_sem here?
>> Actually the patch 5/7 fixes this: we leave the cleanup() handling to
>> postcopy_ram_incoming_cleanup() when failing to notify here.
>> Looking back to the history, I falsely did close(these_fds) just here but
>> David corrected me, and the following is quoted from his earlier comment:
>> "
>> I don't think these close() calls are safe. This code is just after
>> starting the fault thread, and the fault thread has a poll() call on
>> these fd's, so we can't close them until we've instructed that thread
>> to exit.
>>
>> We should fall out through postcopy_ram_incoming_cleanup, and because
>> the thread exists it should do a notify to the thread, a join and then
>> only later do the close calls.
>> "
>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>> index 658dfa88a3..6e0cccf066 100644
>>>> --- a/migration/ram.c
>>>> +++ b/migration/ram.c
>>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>>> static int compress_threads_save_setup(void)
>>>> {
>>>> int i, thread_count;
>>>> + Error *local_err = NULL;
>>>> if (!migrate_use_compression()) {
>>>> return 0;
>>>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>>>> comp_param[i].quit = false;
>>>> qemu_mutex_init(&comp_param[i].mutex);
>>>> qemu_cond_init(&comp_param[i].cond);
>>>> - qemu_thread_create(compress_threads + i, "compress",
>>>> - do_data_compress, comp_param + i,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(compress_threads + i, "compress",
>>>> + do_data_compress, comp_param + i,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err, "failed to create do_data_compress: ");
>>>> + goto exit;
>>>> + }
>>>> }
>>>> return 0;
>>>>
>>> Reviewing the migration changes is getting tiresome...
>> Yes, indeed, the migration involves a lot! Thanks so much for helping
>> to review!
>>> Is reporting the
>>> error appropriate here, and why?
>> I think the qemu monitor should display the obvious and exact failing
>> reason for administrators, esp considering that qemu_thread_create()
>> itself does not print any message thus we have no idea which direct
>> function fails if gdb is not enabled.
>> IOW, I think David's answer to that ppc's error_reportf_err() also
>> apply here:
>>
>> "The error returns are for the guest, the reported errors are for the
>> guest administrator or management layers."
> There could well be an issue with the "management layers" part. Should
> this error be sent to the management layer via QMP somehow? Migration
> maintainers should be able to assist with this question.
>
>>>> @@ -1075,8 +1079,14 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
>>>> p->c = QIO_CHANNEL(sioc);
>>>> qio_channel_set_delay(p->c, false);
>>>> p->running = true;
>>>> - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + migrate_set_error(migrate_get_current(), local_err);
>>>> + error_reportf_err(local_err,
>>>> + "failed to create multifd_send_thread: ");
>>>> + multifd_save_cleanup();
>>>> + return;
>>>> + }
>>>> atomic_inc(&multifd_send_state->count);
>>>> }
>>> Same question.
>>>
>>>> @@ -1350,8 +1360,13 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
>>>> p->num_packets = 1;
>>>> p->running = true;
>>>> - qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&p->thread, p->name, multifd_recv_thread,
>>>> + p, QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_propagate_prepend(errp, local_err,
>>>> + "failed to create multifd_recv_thread: ");
>>>> + multifd_recv_terminate_threads(local_err);
>>>> + return false;
>>>> + }
>>>> atomic_inc(&multifd_recv_state->count);
>>>> return atomic_read(&multifd_recv_state->count) ==
>>>> migrate_multifd_channels();
>>>> @@ -3617,6 +3632,7 @@ static void compress_threads_load_cleanup(void)
>>>> static int compress_threads_load_setup(QEMUFile *f)
>>>> {
>>>> int i, thread_count;
>>>> + Error *local_err = NULL;
>>>> if (!migrate_use_compression()) {
>>>> return 0;
>>>> @@ -3638,9 +3654,13 @@ static int compress_threads_load_setup(QEMUFile *f)
>>>> qemu_cond_init(&decomp_param[i].cond);
>>>> decomp_param[i].done = true;
>>>> decomp_param[i].quit = false;
>>>> - qemu_thread_create(decompress_threads + i, "decompress",
>>>> - do_data_decompress, decomp_param + i,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(decompress_threads + i, "decompress",
>>>> + do_data_decompress, decomp_param + i,
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err,
>>>> + "failed to create do_data_decompress: ");
>>>> + goto exit;
>>>> + }
>>>> }
>>>> return 0;
>>>> exit:
>>> Same question.
>>>
>>>> diff --git a/migration/savevm.c b/migration/savevm.c
>>>> index d784e8aa40..b8bdcde5d8 100644
>>>> --- a/migration/savevm.c
>>>> +++ b/migration/savevm.c
>>>> @@ -1747,9 +1747,14 @@ static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
>>>> mis->have_listen_thread = true;
>>>> /* Start up the listening thread and wait for it to signal ready */
>>>> qemu_sem_init(&mis->listen_thread_sem, 0);
>>>> - qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>>> - postcopy_ram_listen_thread, NULL,
>>>> - QEMU_THREAD_DETACHED);
>>>> + if (!qemu_thread_create(&mis->listen_thread, "postcopy/listen",
>>>> + postcopy_ram_listen_thread, NULL,
>>>> + QEMU_THREAD_DETACHED, &local_err)) {
>>>> + error_reportf_err(local_err,
>>>> + "failed to create postcopy_ram_listen_thread: ");
>>>> + qemu_sem_destroy(&mis->listen_thread_sem);
>>>> + return -1;
>>>> + }
>>>> qemu_sem_wait(&mis->listen_thread_sem);
>>>> qemu_sem_destroy(&mis->listen_thread_sem);
>>>>
>>> This is a caller that reports errors, then returns failure. You make it
>>> handle qemu_thread_create() the same way. Good.
>>>
>>> I'll stop commenting on this pattern now.
>> Thanks.
>>>> diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
>>>> index 2f6c72f63a..338b9563e3 100644
>>>> --- a/tests/atomic_add-bench.c
>>>> +++ b/tests/atomic_add-bench.c
>>>> @@ -2,6 +2,7 @@
>>>> #include "qemu/thread.h"
>>>> #include "qemu/host-utils.h"
>>>> #include "qemu/processor.h"
>>>> +#include "qapi/error.h"
>>>> struct thread_info {
>>>> uint64_t r;
>>>> @@ -110,7 +111,7 @@ static void create_threads(void)
>>>> info->r = (i + 1) ^ time(NULL);
>>>> qemu_thread_create(&threads[i], NULL, thread_func, info,
>>>> - QEMU_THREAD_JOINABLE);
>>>> + QEMU_THREAD_JOINABLE, &error_abort);
>>>> }
>>>> }
>> ... snip for all tests/xxx.c as all the passed parameter is &error_abort ...
>>>> diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
>>>> index 929391f85d..35a652d1fd 100644
>>>> --- a/ui/vnc-jobs.c
>>>> +++ b/ui/vnc-jobs.c
>>>> @@ -31,6 +31,7 @@
>>>> #include "vnc-jobs.h"
>>>> #include "qemu/sockets.h"
>>>> #include "qemu/main-loop.h"
>>>> +#include "qapi/error.h"
>>>> #include "block/aio.h"
>>>> /*
>>>> @@ -331,15 +332,21 @@ static bool vnc_worker_thread_running(void)
>>>> return queue; /* Check global queue */
>>>> }
>>>> -void vnc_start_worker_thread(void)
>>>> +bool vnc_start_worker_thread(Error **errp)
>>>> {
>>>> VncJobQueue *q;
>>>> - if (vnc_worker_thread_running())
>>>> - return ;
>>>> + if (vnc_worker_thread_running()) {
>>>> + goto out;
>>>> + }
>>>> q = vnc_queue_init();
>>>> - qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread, q,
>>>> - QEMU_THREAD_DETACHED);
>>>> + if (!qemu_thread_create(&q->thread, "vnc_worker", vnc_worker_thread,
>>>> + q, QEMU_THREAD_DETACHED, errp)) {
>>>> + vnc_queue_clear(q);
>>>> + return false;
>>>> + }
>>>> queue = q; /* Set global queue */
>>>> +out:
>>>> + return true;
>>>> }
>>> I recommend to pass &error_abort to qemu_thread_create() in this patch,
>>> then convert vnc_start_worker_thread() to Error in a subsequent patch.
>> Ok, thanks! This makes this patch shorter. :)
>> BTW, would it be better by adding a "TODO: xxx" comment before the
>> &error_abort in this patch, and remove it in the subsequent patch?
>> If it is ok, I will do the same adding for the latter touch_all_pages().
> See my remark on use of FIXME and TODO above.
>
> Adding a TODO only to remove it later in the same series is fine. More
> so when it helps avoid review questions like "I think you need to do X
> here", followed by "Oh, I see you're doing X here" when the reviewer
> gets to the later patch.
Ok, will do so then, thanks for the advice.
>
>>>> diff --git a/ui/vnc-jobs.h b/ui/vnc-jobs.h
>>>> index 59f66bcc35..14640593db 100644
>>>> --- a/ui/vnc-jobs.h
>>>> +++ b/ui/vnc-jobs.h
>>>> @@ -37,7 +37,7 @@ void vnc_job_push(VncJob *job);
>>>> void vnc_jobs_join(VncState *vs);
>>>> void vnc_jobs_consume_buffer(VncState *vs);
>>>> -void vnc_start_worker_thread(void);
>>>> +bool vnc_start_worker_thread(Error **errp);
>>>> /* Locks */
>>>> static inline int vnc_trylock_display(VncDisplay *vd)
>>>> diff --git a/ui/vnc.c b/ui/vnc.c
>>>> index 0c1b477425..0ffe9e6a5d 100644
>>>> --- a/ui/vnc.c
>>>> +++ b/ui/vnc.c
>>>> @@ -3236,7 +3236,9 @@ void vnc_display_init(const char *id, Error **errp)
>>>> vd->connections_limit = 32;
>>>> qemu_mutex_init(&vd->mutex);
>>>> - vnc_start_worker_thread();
>>>> + if (!vnc_start_worker_thread(errp)) {
>>>> + return;
>>>> + }
>>>> vd->dcl.ops = &dcl_ops;
>>>> register_displaychangelistener(&vd->dcl);
>>> These two hunks then also go into the subsequent patch.
>> Ok.
>>>> diff --git a/util/compatfd.c b/util/compatfd.c
>>>> index 980bd33e52..886aa249f9 100644
>>>> --- a/util/compatfd.c
>>>> +++ b/util/compatfd.c
>>>> @@ -16,6 +16,7 @@
>>>> #include "qemu/osdep.h"
>>>> #include "qemu-common.h"
>>>> #include "qemu/thread.h"
>>>> +#include "qapi/error.h"
>>>> #include <sys/syscall.h>
>>>> @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t
>>>> *mask)
>>>> struct sigfd_compat_info *info;
>>>> QemuThread thread;
>>>> int fds[2];
>>>> + Error *local_err = NULL;
>>>> info = malloc(sizeof(*info));
>>>> if (info == NULL) {
>>>> @@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t *mask)
>>>> memcpy(&info->mask, mask, sizeof(*mask));
>>>> info->fd = fds[1];
>>>> - qemu_thread_create(&thread, "signalfd_compat",
>>>> sigwait_compat, info,
>>>> - QEMU_THREAD_DETACHED);
>>>> + if (!qemu_thread_create(&thread, "signalfd_compat", sigwait_compat,
>>>> + info, QEMU_THREAD_DETACHED, &local_err)) {
>>>> + error_reportf_err(local_err, "failed to create sigwait_compat: ");
>>>> + close(fds[0]);
>>>> + close(fds[1]);
>>>> + free(info);
>>>> + return -1;
>>>> + }
>>>> return fds[0];
>>>> }
>>> This function is implements signalfd() when the kernel doesn't provide
>>> it.
>>>
>>> signalfd() sets errno on failure. The replacement's existing failure
>>> modes set errno. You add a failure mode that doesn't set errno. That's
>>> a bug. To fix it, you can either make qemu_thread_create() set errno,
>>> or you can make it return a value you can use to set errno. The common
>>> way to do the latter is returning a *negated* errno value.
>> Oops, I forgot setting the errno for Linux implementation! My fault..
>> I will set errno inside qemu_thread_create() as follows:
>> err = pthread_attr_init(&attr);
>> if (err) {
>> - error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>> - strerror(err));
>> + errno = err;
>> + error_setg_errno(errp, errno, "pthread_attr_init failed");
>> return false;
>> }
> Make sure to set errno on all failures, not just this one.
Actually, this code update is changed for qemu_thread_create() itself,
I think if the errno is set in this function, no callers' errno need to
be set.
Please correct me if I understand wrong. :)
> Also add a function comment. I suspect returning negated errno would
> lead to a shorter function comment.
Actually only one caller needs the errno, that is the above
qemu_signalfd_compat().
For the returning value, I remember there's once a email thread talking
about it:
returning a bool (and let the passed errp hold the error message) is to
keep the
consistency with glib. IMO, returning a bool or returning the -errno is
equal to
me if we do not use the return value again in the callers, it just
involves the
judgement. But if we want to reuse the return value, like:
ret = qemu_thread_create(xx, xx, &local_err);
I do not think it is much needed. What do you think?
> Yet another reason to write
> function comments! Making myself document the mess I made has made me
> clean it up before I submit it many times :)
Ok, thanks for the experience. Will add the comment. :)
>
>>> signalfd() doesn't print anything on failure. The replacement's
>>> existing failure modes don't print anything. You add a failure mode
>>> that does print. I think it shouldn't.
>> Ok, I will remove it. Thanks!
>>>> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
>>>> index c1bee2a581..2c779fd634 100644
>>>> --- a/util/oslib-posix.c
>>>> +++ b/util/oslib-posix.c
>>>> @@ -437,9 +437,12 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>>>> size_t size_per_thread;
>>>> char *addr = area;
>>>> int i = 0;
>>>> + int started_thread = 0;
>>>> + Error *local_err = NULL;
>>>> memset_thread_failed = false;
>>>> memset_num_threads = get_memset_num_threads(smp_cpus);
>>>> + started_thread = memset_num_threads;
>>>> memset_thread = g_new0(MemsetThread, memset_num_threads);
>>>> numpages_per_thread = (numpages / memset_num_threads);
>>>> size_per_thread = (hpagesize * numpages_per_thread);
>>>> @@ -448,13 +451,19 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
>>>> memset_thread[i].numpages = (i == (memset_num_threads - 1)) ?
>>>> numpages : numpages_per_thread;
>>>> memset_thread[i].hpagesize = hpagesize;
>>>> - qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>>>> - do_touch_pages, &memset_thread[i],
>>>> - QEMU_THREAD_JOINABLE);
>>>> + if (!qemu_thread_create(&memset_thread[i].pgthread, "touch_pages",
>>>> + do_touch_pages, &memset_thread[i],
>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>> + error_reportf_err(local_err, "failed to create do_touch_pages: ");
>>>> + memset_thread_failed = true;
>>>> + started_thread = i;
>>>> + goto out;
>>>> + }
>>>> addr += size_per_thread;
>>>> numpages -= numpages_per_thread;
>>>> }
>>>> - for (i = 0; i < memset_num_threads; i++) {
>>>> +out:
>>>> + for (i = 0; i < started_thread; i++) {
>>>> qemu_thread_join(&memset_thread[i].pgthread);
>>>> }
>>>> g_free(memset_thread);
>>> You need to convert this function to Error instead, because its caller
>>> os_mem_prealloc() sets an error on failure. I recommend to pass
>>> &error_abort in this patch, and convert to Error in a subsequent patch.
>> Ok, thanks for the advice.
>>>> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
>>>> index 865e476df5..81b40a1ece 100644
>>>> --- a/util/qemu-thread-posix.c
>>>> +++ b/util/qemu-thread-posix.c
>>>> @@ -15,6 +15,7 @@
>>>> #include "qemu/atomic.h"
>>>> #include "qemu/notify.h"
>>>> #include "qemu-thread-common.h"
>>>> +#include "qapi/error.h"
>>>> static bool name_threads;
>>>> @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
>>>> return r;
>>>> }
>>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>>> - void *(*start_routine)(void*),
>>>> - void *arg, int mode)
>>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>>> + void *(*start_routine)(void *),
>>>> + void *arg, int mode, Error **errp)
>>>> {
>>>> sigset_t set, oldset;
>>>> int err;
>>>> @@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>>> err = pthread_attr_init(&attr);
>>>> if (err) {
>>>> - error_exit(err, __func__);
>>>> + error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>>>> + strerror(err));
> -err is actually wrong: pthread_attr_init() returns a *positive* errno
> code on failure.
Yes, a definite wrong code.. :( Actually, pthread_attr_init() returns a
nonzero error
number, thus I do the below update by assigning the return err to errno.
err = pthread_attr_init(&attr);
if (err) {
- error_exit(err, __func__);
+ errno = err;
+ error_setg_errno(errp, errno, "pthread_attr_init failed");
+ return false;
}
Have a nice day, thanks so much for the review! ;)
Fei
>
>>>> + return false;
>>>> }
>>>> if (mode == QEMU_THREAD_DETACHED) {
>>>> @@ -526,16 +529,21 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>>> qemu_thread_args->name = g_strdup(name);
>>>> qemu_thread_args->start_routine = start_routine;
>>>> qemu_thread_args->arg = arg;
>>>> -
>>> Let's keep the blank line.
>> ok.
>>
>> Thanks so much for the review! Have a nice day. :)
>> Fei
> You're welcome :)
>
>>>> err = pthread_create(&thread->thread, &attr,
>>>> qemu_thread_start, qemu_thread_args);
>>>> -
>>>> - if (err)
>>>> - error_exit(err, __func__);
>>>> + if (err) {
>>>> + error_setg_errno(errp, -err, "pthread_create failed: %s",
>>>> + strerror(err));
>>>> + pthread_attr_destroy(&attr);
>>>> + g_free(qemu_thread_args->name);
>>>> + g_free(qemu_thread_args);
>>>> + return false;
>>>> + }
>>>> pthread_sigmask(SIG_SETMASK, &oldset, NULL);
>>>> pthread_attr_destroy(&attr);
>>>> + return true;
>>>> }
>>>> void qemu_thread_get_self(QemuThread *thread)
>>>> diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
>>>> index 4a363ca675..57b1143e97 100644
>>>> --- a/util/qemu-thread-win32.c
>>>> +++ b/util/qemu-thread-win32.c
>>>> @@ -20,6 +20,7 @@
>>>> #include "qemu/thread.h"
>>>> #include "qemu/notify.h"
>>>> #include "qemu-thread-common.h"
>>>> +#include "qapi/error.h"
>>>> #include <process.h>
>>>> static bool name_threads;
>>>> @@ -388,9 +389,9 @@ void *qemu_thread_join(QemuThread *thread)
>>>> return ret;
>>>> }
>>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>>> - void *(*start_routine)(void *),
>>>> - void *arg, int mode)
>>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>>> + void *(*start_routine)(void *),
>>>> + void *arg, int mode, Error **errp)
>>>> {
>>>> HANDLE hThread;
>>>> struct QemuThreadData *data;
>>>> @@ -409,10 +410,17 @@ void qemu_thread_create(QemuThread *thread, const char *name,
>>>> hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
>>>> data, 0, &thread->tid);
>>>> if (!hThread) {
>>>> - error_exit(GetLastError(), __func__);
>>>> + if (data->mode != QEMU_THREAD_DETACHED) {
>>>> + DeleteCriticalSection(&data->cs);
>>>> + }
>>>> + error_setg_errno(errp, errno,
>>>> + "failed to create win32_start_routine");
>>>> + g_free(data);
>>>> + return false;
>>>> }
>>>> CloseHandle(hThread);
>>>> thread->data = data;
>>>> + return true;
>>>> }
>>>> void qemu_thread_get_self(QemuThread *thread)
>>>> diff --git a/util/rcu.c b/util/rcu.c
>>>> index 5676c22bd1..145dcdb0c6 100644
>>>> --- a/util/rcu.c
>>>> +++ b/util/rcu.c
>>>> @@ -32,6 +32,7 @@
>>>> #include "qemu/atomic.h"
>>>> #include "qemu/thread.h"
>>>> #include "qemu/main-loop.h"
>>>> +#include "qapi/error.h"
>>>> #if defined(CONFIG_MALLOC_TRIM)
>>>> #include <malloc.h>
>>>> #endif
>>>> @@ -325,7 +326,7 @@ static void rcu_init_complete(void)
>>>> * must have been quiescent even after forking, just recreate it.
>>>> */
>>>> qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
>>>> - NULL, QEMU_THREAD_DETACHED);
>>>> + NULL, QEMU_THREAD_DETACHED, &error_abort);
>>>> rcu_register_thread();
>>>> }
>>>> diff --git a/util/thread-pool.c b/util/thread-pool.c
>>>> index 610646d131..ad0f980783 100644
>>>> --- a/util/thread-pool.c
>>>> +++ b/util/thread-pool.c
>>>> @@ -22,6 +22,7 @@
>>>> #include "trace.h"
>>>> #include "block/thread-pool.h"
>>>> #include "qemu/main-loop.h"
>>>> +#include "qapi/error.h"
>>>> static void do_spawn_thread(ThreadPool *pool);
>>>> @@ -132,7 +133,8 @@ static void do_spawn_thread(ThreadPool *pool)
>>>> pool->new_threads--;
>>>> pool->pending_threads++;
>>>> - qemu_thread_create(&t, "worker", worker_thread, pool,
>>>> QEMU_THREAD_DETACHED);
>>>> + qemu_thread_create(&t, "worker", worker_thread, pool,
>>>> + QEMU_THREAD_DETACHED, &error_abort);
>>>> }
>>>> static void spawn_thread_bh_fn(void *opaque)
>
On 12/19/18 6:14 AM, Fei Li wrote: >>>>> 28 files changed, 243 insertions(+), 101 deletions(-) >> I recommend to split this patch. First part adds the Error ** parameter >> to qemu_thread_create(), passing &error_abort everywhere. No functional >> change. Subsequent patches then improve on &error_abort. This way, >> each improvement patch can be cc'ed to just that part's maintainer(s). >> Parts you don't want to touch you simply leave at &error_abort. Makes >> sense? > Yes, I think this makes sense, much clearer. :) But I am a little > worried about > whether too many subsequent improvement patches (some of them are quite > small changes) are acceptable. A long series of small patches, where each patch is cc'd to an appropriate maintainer, will likely get cumulative reviews faster than a single monolithic patch where no one person is the expert on every line touched. -- Eric Blake, Principal Software Engineer Red Hat, Inc. +1-919-301-3266 Virtualization: qemu.org | libvirt.org
On 12/20/2018 01:29 AM, Eric Blake wrote: > On 12/19/18 6:14 AM, Fei Li wrote: > >>>>>> 28 files changed, 243 insertions(+), 101 deletions(-) > >>> I recommend to split this patch. First part adds the Error ** >>> parameter >>> to qemu_thread_create(), passing &error_abort everywhere. No functional >>> change. Subsequent patches then improve on &error_abort. This way, >>> each improvement patch can be cc'ed to just that part's maintainer(s). >>> Parts you don't want to touch you simply leave at &error_abort. Makes >>> sense? >> Yes, I think this makes sense, much clearer. :) But I am a little >> worried about >> whether too many subsequent improvement patches (some of them are quite >> small changes) are acceptable. > > A long series of small patches, where each patch is cc'd to an > appropriate maintainer, will likely get cumulative reviews faster than > a single monolithic patch where no one person is the expert on every > line touched. > Ok, thanks for the advice! Have a nice day Fei
On 12/19/2018 08:14 PM, Fei Li wrote:
>
> On 12/19/2018 06:10 PM, Markus Armbruster wrote:
>> Fei Li <fli@suse.com> writes:
>>
>>> On 12/13/2018 03:26 PM, Markus Armbruster wrote:
>>>> There's a question for David Gibson inline. Please search for /ppc/.
>>>>
>>>> Fei Li <fli@suse.com> writes:
>>>>
>>>>> Make qemu_thread_create() return a Boolean to indicate if it succeeds
>>>>> rather than failing with an error. And add an Error parameter to hold
>>>>> the error message and let the callers handle it.
>>>> The "rather than failing with an error" is misleading. Before the
>>>> patch, we report to stderr and abort(). What about:
>>>>
>>>> qemu-thread: Make qemu_thread_create() handle errors properly
>>>>
>>>> qemu_thread_create() abort()s on error. Not nice. Give it a
>>>> return value and an Error ** argument, so it can return
>>>> success /
>>>> failure.
>>> A nice commit-amend! Thanks!
>>>> Still missing from the commit message then: how you update the
>>>> callers.
>>> Yes, agree. I think the-how should also be noted here, like
>>> - propagating the err to callers whose call trace already have the
>>> Error paramater;
>>> - just add an &error_abort for qemu_thread_create() and make it a
>>> "TODO: xxx";
>>>> Let's see below.
>>>>
>>>>> Cc: Markus Armbruster <armbru@redhat.com>
>>>>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>> Signed-off-by: Fei Li <fli@suse.com>
>>>>> ---
>>>>> cpus.c | 45
>>>>> ++++++++++++++++++++++++-------------
>>>>> dump.c | 6 +++--
>>>>> hw/misc/edu.c | 6 +++--
>>>>> hw/ppc/spapr_hcall.c | 10 +++++++--
>>>>> hw/rdma/rdma_backend.c | 4 +++-
>>>>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>>>>> include/qemu/thread.h | 4 ++--
>>>>> io/task.c | 3 ++-
>>>>> iothread.c | 16 +++++++++-----
>>>>> migration/migration.c | 54
>>>>> +++++++++++++++++++++++++++++----------------
>>>>> migration/postcopy-ram.c | 14 ++++++++++--
>>>>> migration/ram.c | 40 ++++++++++++++++++++++++---------
>>>>> migration/savevm.c | 11 ++++++---
>>>>> tests/atomic_add-bench.c | 3 ++-
>>>>> tests/iothread.c | 2 +-
>>>>> tests/qht-bench.c | 3 ++-
>>>>> tests/rcutorture.c | 3 ++-
>>>>> tests/test-aio.c | 2 +-
>>>>> tests/test-rcu-list.c | 3 ++-
>>>>> ui/vnc-jobs.c | 17 +++++++++-----
>>>>> ui/vnc-jobs.h | 2 +-
>>>>> ui/vnc.c | 4 +++-
>>>>> util/compatfd.c | 12 ++++++++--
>>>>> util/oslib-posix.c | 17 ++++++++++----
>>>>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>>>>> util/qemu-thread-win32.c | 16 ++++++++++----
>>>>> util/rcu.c | 3 ++-
>>>>> util/thread-pool.c | 4 +++-
>>>>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>>>>
...snip, and only leave the three uncertain small topics...
>>>
>>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>>> index 658dfa88a3..6e0cccf066 100644
>>>>> --- a/migration/ram.c
>>>>> +++ b/migration/ram.c
>>>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>>>> static int compress_threads_save_setup(void)
>>>>> {
>>>>> int i, thread_count;
>>>>> + Error *local_err = NULL;
>>>>> if (!migrate_use_compression()) {
>>>>> return 0;
>>>>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>>>>> comp_param[i].quit = false;
>>>>> qemu_mutex_init(&comp_param[i].mutex);
>>>>> qemu_cond_init(&comp_param[i].cond);
>>>>> - qemu_thread_create(compress_threads + i, "compress",
>>>>> - do_data_compress, comp_param + i,
>>>>> - QEMU_THREAD_JOINABLE);
>>>>> + if (!qemu_thread_create(compress_threads + i, "compress",
>>>>> + do_data_compress, comp_param + i,
>>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>>> + error_reportf_err(local_err, "failed to create
>>>>> do_data_compress: ");
>>>>> + goto exit;
>>>>> + }
>>>>> }
>>>>> return 0;
>>>> Reviewing the migration changes is getting tiresome...
>>> Yes, indeed, the migration involves a lot! Thanks so much for helping
>>> to review!
>>>> Is reporting the
>>>> error appropriate here, and why?
>>> I think the qemu monitor should display the obvious and exact failing
>>> reason for administrators, esp considering that qemu_thread_create()
>>> itself does not print any message thus we have no idea which direct
>>> function fails if gdb is not enabled.
>>> IOW, I think David's answer to that ppc's error_reportf_err() also
>>> apply here:
>>>
>>> "The error returns are for the guest, the reported errors are for the
>>> guest administrator or management layers."
>> There could well be an issue with the "management layers" part. Should
>> this error be sent to the management layer via QMP somehow? Migration
>> maintainers should be able to assist with this question.
Kindly ping migration maintainers. :)
>
>>>>> diff --git a/util/compatfd.c b/util/compatfd.c
>>>>> index 980bd33e52..886aa249f9 100644
>>>>> --- a/util/compatfd.c
>>>>> +++ b/util/compatfd.c
>>>>> @@ -16,6 +16,7 @@
>>>>> #include "qemu/osdep.h"
>>>>> #include "qemu-common.h"
>>>>> #include "qemu/thread.h"
>>>>> +#include "qapi/error.h"
>>>>> #include <sys/syscall.h>
>>>>> @@ -70,6 +71,7 @@ static int qemu_signalfd_compat(const sigset_t
>>>>> *mask)
>>>>> struct sigfd_compat_info *info;
>>>>> QemuThread thread;
>>>>> int fds[2];
>>>>> + Error *local_err = NULL;
>>>>> info = malloc(sizeof(*info));
>>>>> if (info == NULL) {
>>>>> @@ -88,8 +90,14 @@ static int qemu_signalfd_compat(const sigset_t
>>>>> *mask)
>>>>> memcpy(&info->mask, mask, sizeof(*mask));
>>>>> info->fd = fds[1];
>>>>> - qemu_thread_create(&thread, "signalfd_compat",
>>>>> sigwait_compat, info,
>>>>> - QEMU_THREAD_DETACHED);
>>>>> + if (!qemu_thread_create(&thread, "signalfd_compat",
>>>>> sigwait_compat,
>>>>> + info, QEMU_THREAD_DETACHED,
>>>>> &local_err)) {
>>>>> + error_reportf_err(local_err, "failed to create
>>>>> sigwait_compat: ");
>>>>> + close(fds[0]);
>>>>> + close(fds[1]);
>>>>> + free(info);
>>>>> + return -1;
>>>>> + }
>>>>> return fds[0];
>>>>> }
>>>> This function is implements signalfd() when the kernel doesn't provide
>>>> it.
>>>>
>>>> signalfd() sets errno on failure. The replacement's existing failure
>>>> modes set errno. You add a failure mode that doesn't set errno.
>>>> That's
>>>> a bug. To fix it, you can either make qemu_thread_create() set errno,
>>>> or you can make it return a value you can use to set errno. The common
>>>> way to do the latter is returning a *negated* errno value.
>>> Oops, I forgot setting the errno for Linux implementation! My fault..
>>> I will set errno inside qemu_thread_create() as follows:
>>> err = pthread_attr_init(&attr);
>>> if (err) {
>>> - error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>>> - strerror(err));
>>> + errno = err;
>>> + error_setg_errno(errp, errno, "pthread_attr_init failed");
>>> return false;
>>> }
>> Make sure to set errno on all failures, not just this one.
> Actually, this code update is changed for qemu_thread_create() itself,
> I think if the errno is set in this function, no callers' errno need
> to be set.
> Please correct me if I understand wrong. :)
>> Also add a function comment. I suspect returning negated errno would
>> lead to a shorter function comment.
> Actually only one caller needs the errno, that is the above
> qemu_signalfd_compat().
> For the returning value, I remember there's once a email thread
> talking about it:
> returning a bool (and let the passed errp hold the error message) is
> to keep the
> consistency with glib. IMO, returning a bool or returning the -errno
> is equal to
> me if we do not use the return value again in the callers, it just
> involves the
> judgement. But if we want to reuse the return value, like:
> ret = qemu_thread_create(xx, xx, &local_err);
> I do not think it is much needed. What do you think?
One place needs to be confirmed. :)
>> Yet another reason to write
>> function comments! Making myself document the mess I made has made me
>> clean it up before I submit it many times :)
> Ok, thanks for the experience. Will add the comment. :)
>>
>>>>> diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
>>>>> index 865e476df5..81b40a1ece 100644
>>>>> --- a/util/qemu-thread-posix.c
>>>>> +++ b/util/qemu-thread-posix.c
>>>>> @@ -15,6 +15,7 @@
>>>>> #include "qemu/atomic.h"
>>>>> #include "qemu/notify.h"
>>>>> #include "qemu-thread-common.h"
>>>>> +#include "qapi/error.h"
>>>>> static bool name_threads;
>>>>> @@ -500,9 +501,9 @@ static void *qemu_thread_start(void *args)
>>>>> return r;
>>>>> }
>>>>> -void qemu_thread_create(QemuThread *thread, const char *name,
>>>>> - void *(*start_routine)(void*),
>>>>> - void *arg, int mode)
>>>>> +bool qemu_thread_create(QemuThread *thread, const char *name,
>>>>> + void *(*start_routine)(void *),
>>>>> + void *arg, int mode, Error **errp)
>>>>> {
>>>>> sigset_t set, oldset;
>>>>> int err;
>>>>> @@ -511,7 +512,9 @@ void qemu_thread_create(QemuThread *thread,
>>>>> const char *name,
>>>>> err = pthread_attr_init(&attr);
>>>>> if (err) {
>>>>> - error_exit(err, __func__);
>>>>> + error_setg_errno(errp, -err, "pthread_attr_init failed: %s",
>>>>> + strerror(err));
>> -err is actually wrong: pthread_attr_init() returns a *positive* errno
>> code on failure.
> Yes, a definite wrong code.. :( Actually, pthread_attr_init() returns
> a nonzero error
> number, thus I do the below update by assigning the return err to errno.
>
> err = pthread_attr_init(&attr);
> if (err) {
> - error_exit(err, __func__);
> + errno = err;
> + error_setg_errno(errp, errno, "pthread_attr_init failed");
> + return false;
> }
>
Another place needs to be confirmed. :)
>
Have a nice day, thanks
Fei
On Fri, Dec 21, 2018 at 05:36:57PM +0800, Fei Li wrote:
>
> On 12/19/2018 08:14 PM, Fei Li wrote:
> >
> > On 12/19/2018 06:10 PM, Markus Armbruster wrote:
> > > Fei Li <fli@suse.com> writes:
> > >
> > > > On 12/13/2018 03:26 PM, Markus Armbruster wrote:
> > > > > There's a question for David Gibson inline. Please search for /ppc/.
> > > > >
> > > > > Fei Li <fli@suse.com> writes:
> > > > >
> > > > > > Make qemu_thread_create() return a Boolean to indicate if it succeeds
> > > > > > rather than failing with an error. And add an Error parameter to hold
> > > > > > the error message and let the callers handle it.
> > > > > The "rather than failing with an error" is misleading. Before the
> > > > > patch, we report to stderr and abort(). What about:
> > > > >
> > > > > qemu-thread: Make qemu_thread_create() handle errors properly
> > > > >
> > > > > qemu_thread_create() abort()s on error. Not nice. Give it a
> > > > > return value and an Error ** argument, so it can
> > > > > return success /
> > > > > failure.
> > > > A nice commit-amend! Thanks!
> > > > > Still missing from the commit message then: how you update
> > > > > the callers.
> > > > Yes, agree. I think the-how should also be noted here, like
> > > > - propagating the err to callers whose call trace already have the
> > > > Error paramater;
> > > > - just add an &error_abort for qemu_thread_create() and make it a
> > > > "TODO: xxx";
> > > > > Let's see below.
> > > > >
> > > > > > Cc: Markus Armbruster <armbru@redhat.com>
> > > > > > Cc: Daniel P. Berrangé <berrange@redhat.com>
> > > > > > Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > > > > > Signed-off-by: Fei Li <fli@suse.com>
> > > > > > ---
> > > > > > cpus.c | 45
> > > > > > ++++++++++++++++++++++++-------------
> > > > > > dump.c | 6 +++--
> > > > > > hw/misc/edu.c | 6 +++--
> > > > > > hw/ppc/spapr_hcall.c | 10 +++++++--
> > > > > > hw/rdma/rdma_backend.c | 4 +++-
> > > > > > hw/usb/ccid-card-emulated.c | 16 ++++++++++----
> > > > > > include/qemu/thread.h | 4 ++--
> > > > > > io/task.c | 3 ++-
> > > > > > iothread.c | 16 +++++++++-----
> > > > > > migration/migration.c | 54
> > > > > > +++++++++++++++++++++++++++++----------------
> > > > > > migration/postcopy-ram.c | 14 ++++++++++--
> > > > > > migration/ram.c | 40 ++++++++++++++++++++++++---------
> > > > > > migration/savevm.c | 11 ++++++---
> > > > > > tests/atomic_add-bench.c | 3 ++-
> > > > > > tests/iothread.c | 2 +-
> > > > > > tests/qht-bench.c | 3 ++-
> > > > > > tests/rcutorture.c | 3 ++-
> > > > > > tests/test-aio.c | 2 +-
> > > > > > tests/test-rcu-list.c | 3 ++-
> > > > > > ui/vnc-jobs.c | 17 +++++++++-----
> > > > > > ui/vnc-jobs.h | 2 +-
> > > > > > ui/vnc.c | 4 +++-
> > > > > > util/compatfd.c | 12 ++++++++--
> > > > > > util/oslib-posix.c | 17 ++++++++++----
> > > > > > util/qemu-thread-posix.c | 24 +++++++++++++-------
> > > > > > util/qemu-thread-win32.c | 16 ++++++++++----
> > > > > > util/rcu.c | 3 ++-
> > > > > > util/thread-pool.c | 4 +++-
> > > > > > 28 files changed, 243 insertions(+), 101 deletions(-)
> > > > > >
> ...snip, and only leave the three uncertain small topics...
> > > >
> > > > > > diff --git a/migration/ram.c b/migration/ram.c
> > > > > > index 658dfa88a3..6e0cccf066 100644
> > > > > > --- a/migration/ram.c
> > > > > > +++ b/migration/ram.c
> > > > > > @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
> > > > > > static int compress_threads_save_setup(void)
> > > > > > {
> > > > > > int i, thread_count;
> > > > > > + Error *local_err = NULL;
> > > > > > if (!migrate_use_compression()) {
> > > > > > return 0;
> > > > > > @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
> > > > > > comp_param[i].quit = false;
> > > > > > qemu_mutex_init(&comp_param[i].mutex);
> > > > > > qemu_cond_init(&comp_param[i].cond);
> > > > > > - qemu_thread_create(compress_threads + i, "compress",
> > > > > > - do_data_compress, comp_param + i,
> > > > > > - QEMU_THREAD_JOINABLE);
> > > > > > + if (!qemu_thread_create(compress_threads + i, "compress",
> > > > > > + do_data_compress, comp_param + i,
> > > > > > + QEMU_THREAD_JOINABLE, &local_err)) {
> > > > > > + error_reportf_err(local_err, "failed to
> > > > > > create do_data_compress: ");
> > > > > > + goto exit;
[1]
> > > > > > + }
> > > > > > }
> > > > > > return 0;
> > > > > Reviewing the migration changes is getting tiresome...
> > > > Yes, indeed, the migration involves a lot! Thanks so much for helping
> > > > to review!
> > > > > Is reporting the
> > > > > error appropriate here, and why?
> > > > I think the qemu monitor should display the obvious and exact failing
> > > > reason for administrators, esp considering that qemu_thread_create()
> > > > itself does not print any message thus we have no idea which direct
> > > > function fails if gdb is not enabled.
> > > > IOW, I think David's answer to that ppc's error_reportf_err() also
> > > > apply here:
> > > >
> > > > "The error returns are for the guest, the reported errors are for the
> > > > guest administrator or management layers."
> > > There could well be an issue with the "management layers" part. Should
> > > this error be sent to the management layer via QMP somehow? Migration
> > > maintainers should be able to assist with this question.
> Kindly ping migration maintainers. :)
I think both the maintainers are on holiday so possibly there won't be
any reply from them this week... :)
Regarding to error reports of migration via QMP layer, please have a
look at d59ce6f344 ("migration: add reporting of errors for outgoing
migration", 2016-05-26). Though I see that even
qemu_savevm_state_setup() is not capturing error for the management
layer so if you want to pass this thread creation error upward you'll
possibly need to work on that as well.
Though here note that when you "goto exit" at [1] you probably also
need to touch up the cleanup part since otherwise the join() could be
with an invalid thread ID, so you'll possibly need to check the thread
ID validity before do the join() of the compression thread.
Regards,
--
Peter Xu
On 12/24/2018 11:34 AM, Peter Xu wrote:
> On Fri, Dec 21, 2018 at 05:36:57PM +0800, Fei Li wrote:
>> On 12/19/2018 08:14 PM, Fei Li wrote:
>>> On 12/19/2018 06:10 PM, Markus Armbruster wrote:
>>>> Fei Li <fli@suse.com> writes:
>>>>
>>>>> On 12/13/2018 03:26 PM, Markus Armbruster wrote:
>>>>>> There's a question for David Gibson inline. Please search for /ppc/.
>>>>>>
>>>>>> Fei Li <fli@suse.com> writes:
>>>>>>
>>>>>>> Make qemu_thread_create() return a Boolean to indicate if it succeeds
>>>>>>> rather than failing with an error. And add an Error parameter to hold
>>>>>>> the error message and let the callers handle it.
>>>>>> The "rather than failing with an error" is misleading. Before the
>>>>>> patch, we report to stderr and abort(). What about:
>>>>>>
>>>>>> qemu-thread: Make qemu_thread_create() handle errors properly
>>>>>>
>>>>>> qemu_thread_create() abort()s on error. Not nice. Give it a
>>>>>> return value and an Error ** argument, so it can
>>>>>> return success /
>>>>>> failure.
>>>>> A nice commit-amend! Thanks!
>>>>>> Still missing from the commit message then: how you update
>>>>>> the callers.
>>>>> Yes, agree. I think the-how should also be noted here, like
>>>>> - propagating the err to callers whose call trace already have the
>>>>> Error paramater;
>>>>> - just add an &error_abort for qemu_thread_create() and make it a
>>>>> "TODO: xxx";
>>>>>> Let's see below.
>>>>>>
>>>>>>> Cc: Markus Armbruster <armbru@redhat.com>
>>>>>>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>>>>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>>> Signed-off-by: Fei Li <fli@suse.com>
>>>>>>> ---
>>>>>>> cpus.c | 45
>>>>>>> ++++++++++++++++++++++++-------------
>>>>>>> dump.c | 6 +++--
>>>>>>> hw/misc/edu.c | 6 +++--
>>>>>>> hw/ppc/spapr_hcall.c | 10 +++++++--
>>>>>>> hw/rdma/rdma_backend.c | 4 +++-
>>>>>>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>>>>>>> include/qemu/thread.h | 4 ++--
>>>>>>> io/task.c | 3 ++-
>>>>>>> iothread.c | 16 +++++++++-----
>>>>>>> migration/migration.c | 54
>>>>>>> +++++++++++++++++++++++++++++----------------
>>>>>>> migration/postcopy-ram.c | 14 ++++++++++--
>>>>>>> migration/ram.c | 40 ++++++++++++++++++++++++---------
>>>>>>> migration/savevm.c | 11 ++++++---
>>>>>>> tests/atomic_add-bench.c | 3 ++-
>>>>>>> tests/iothread.c | 2 +-
>>>>>>> tests/qht-bench.c | 3 ++-
>>>>>>> tests/rcutorture.c | 3 ++-
>>>>>>> tests/test-aio.c | 2 +-
>>>>>>> tests/test-rcu-list.c | 3 ++-
>>>>>>> ui/vnc-jobs.c | 17 +++++++++-----
>>>>>>> ui/vnc-jobs.h | 2 +-
>>>>>>> ui/vnc.c | 4 +++-
>>>>>>> util/compatfd.c | 12 ++++++++--
>>>>>>> util/oslib-posix.c | 17 ++++++++++----
>>>>>>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>>>>>>> util/qemu-thread-win32.c | 16 ++++++++++----
>>>>>>> util/rcu.c | 3 ++-
>>>>>>> util/thread-pool.c | 4 +++-
>>>>>>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>>>>>>
>> ...snip, and only leave the three uncertain small topics...
>>>>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>>>>> index 658dfa88a3..6e0cccf066 100644
>>>>>>> --- a/migration/ram.c
>>>>>>> +++ b/migration/ram.c
>>>>>>> @@ -473,6 +473,7 @@ static void compress_threads_save_cleanup(void)
>>>>>>> static int compress_threads_save_setup(void)
>>>>>>> {
>>>>>>> int i, thread_count;
>>>>>>> + Error *local_err = NULL;
>>>>>>> if (!migrate_use_compression()) {
>>>>>>> return 0;
>>>>>>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>>>>>>> comp_param[i].quit = false;
>>>>>>> qemu_mutex_init(&comp_param[i].mutex);
>>>>>>> qemu_cond_init(&comp_param[i].cond);
>>>>>>> - qemu_thread_create(compress_threads + i, "compress",
>>>>>>> - do_data_compress, comp_param + i,
>>>>>>> - QEMU_THREAD_JOINABLE);
>>>>>>> + if (!qemu_thread_create(compress_threads + i, "compress",
>>>>>>> + do_data_compress, comp_param + i,
>>>>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>>>>> + error_reportf_err(local_err, "failed to
>>>>>>> create do_data_compress: ");
>>>>>>> + goto exit;
> [1]
>
>>>>>>> + }
>>>>>>> }
>>>>>>> return 0;
>>>>>> Reviewing the migration changes is getting tiresome...
>>>>> Yes, indeed, the migration involves a lot! Thanks so much for helping
>>>>> to review!
>>>>>> Is reporting the
>>>>>> error appropriate here, and why?
>>>>> I think the qemu monitor should display the obvious and exact failing
>>>>> reason for administrators, esp considering that qemu_thread_create()
>>>>> itself does not print any message thus we have no idea which direct
>>>>> function fails if gdb is not enabled.
>>>>> IOW, I think David's answer to that ppc's error_reportf_err() also
>>>>> apply here:
>>>>>
>>>>> "The error returns are for the guest, the reported errors are for the
>>>>> guest administrator or management layers."
>>>> There could well be an issue with the "management layers" part. Should
>>>> this error be sent to the management layer via QMP somehow? Migration
>>>> maintainers should be able to assist with this question.
>> Kindly ping migration maintainers. :)
> I think both the maintainers are on holiday so possibly there won't be
> any reply from them this week... :)
>
> Regarding to error reports of migration via QMP layer, please have a
> look at d59ce6f344 ("migration: add reporting of errors for outgoing
> migration", 2016-05-26). Though I see that even
> qemu_savevm_state_setup() is not capturing error for the management
> layer so if you want to pass this thread creation error upward you'll
> possibly need to work on that as well.
Thanks for the useful commit. :) I guess the "the client app" mentioned
is not qemu,
but other upper thing, maybe something inside openstack? As I have to
say that I
can see the error message (I mean the above error_reportf_err(...) ) be
printed to the
screen when I use qemu command line via hmp to do the migration.
For the qemu_savevm_state_setup(), I see it sets the f->last_error
(instead of s->error)
to indicate whether to stop the migration or not when back to
migration_thread()
in migration_detect_error(s). And no matter whether
qemu_savevm_state_setup()
succeeds, the current code continues to set the migration state to be
ACTIVE. Emm,
I am wondering whether this is on purpose..
> Though here note that when you "goto exit" at [1] you probably also
> need to touch up the cleanup part since otherwise the join() could be
> with an invalid thread ID, so you'll possibly need to check the thread
> ID validity before do the join() of the compression thread.
Thanks for pointing this out. I think my last patch is to fix this
problem, that is
to add a check in qemu_thread_join():
+ if (!thread->thread) {
+ return NULL;
+ }
Correct me if this is not the proper solution. :)
Have a nice day, thanks :)
Fei
>
> Regards,
>
Hi all,
As I am leaving my current company and most reviewers are on holiday,
I'd like to send a new version now:
v9: "qemu_thread: Make qemu_thread_create() handle errors properly",
although some details like whether it is appropriate to report the error
to be seen by the management layer. And I will use my new personal
email address (shirley17fei@gmail.com <mailto:shirley17fei@gmail.com>)
to follow the new version. :)
Merry Christmas, and have a nice day, thanks all!
Fei
On 12/24/2018 02:53 PM, Fei Li wrote:
>
>
> On 12/24/2018 11:34 AM, Peter Xu wrote:
>> On Fri, Dec 21, 2018 at 05:36:57PM +0800, Fei Li wrote:
>>> On 12/19/2018 08:14 PM, Fei Li wrote:
>>>> On 12/19/2018 06:10 PM, Markus Armbruster wrote:
>>>>> Fei Li <fli@suse.com> writes:
>>>>>
>>>>>> On 12/13/2018 03:26 PM, Markus Armbruster wrote:
>>>>>>> There's a question for David Gibson inline. Please search for
>>>>>>> /ppc/.
>>>>>>>
>>>>>>> Fei Li <fli@suse.com> writes:
>>>>>>>
>>>>>>>> Make qemu_thread_create() return a Boolean to indicate if it
>>>>>>>> succeeds
>>>>>>>> rather than failing with an error. And add an Error parameter
>>>>>>>> to hold
>>>>>>>> the error message and let the callers handle it.
>>>>>>> The "rather than failing with an error" is misleading. Before the
>>>>>>> patch, we report to stderr and abort(). What about:
>>>>>>>
>>>>>>> qemu-thread: Make qemu_thread_create() handle errors
>>>>>>> properly
>>>>>>>
>>>>>>> qemu_thread_create() abort()s on error. Not nice. Give it a
>>>>>>> return value and an Error ** argument, so it can
>>>>>>> return success /
>>>>>>> failure.
>>>>>> A nice commit-amend! Thanks!
>>>>>>> Still missing from the commit message then: how you update
>>>>>>> the callers.
>>>>>> Yes, agree. I think the-how should also be noted here, like
>>>>>> - propagating the err to callers whose call trace already have the
>>>>>> Error paramater;
>>>>>> - just add an &error_abort for qemu_thread_create() and make it a
>>>>>> "TODO: xxx";
>>>>>>> Let's see below.
>>>>>>>
>>>>>>>> Cc: Markus Armbruster <armbru@redhat.com>
>>>>>>>> Cc: Daniel P. Berrangé <berrange@redhat.com>
>>>>>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>>>> Signed-off-by: Fei Li <fli@suse.com>
>>>>>>>> ---
>>>>>>>> cpus.c | 45
>>>>>>>> ++++++++++++++++++++++++-------------
>>>>>>>> dump.c | 6 +++--
>>>>>>>> hw/misc/edu.c | 6 +++--
>>>>>>>> hw/ppc/spapr_hcall.c | 10 +++++++--
>>>>>>>> hw/rdma/rdma_backend.c | 4 +++-
>>>>>>>> hw/usb/ccid-card-emulated.c | 16 ++++++++++----
>>>>>>>> include/qemu/thread.h | 4 ++--
>>>>>>>> io/task.c | 3 ++-
>>>>>>>> iothread.c | 16 +++++++++-----
>>>>>>>> migration/migration.c | 54
>>>>>>>> +++++++++++++++++++++++++++++----------------
>>>>>>>> migration/postcopy-ram.c | 14 ++++++++++--
>>>>>>>> migration/ram.c | 40
>>>>>>>> ++++++++++++++++++++++++---------
>>>>>>>> migration/savevm.c | 11 ++++++---
>>>>>>>> tests/atomic_add-bench.c | 3 ++-
>>>>>>>> tests/iothread.c | 2 +-
>>>>>>>> tests/qht-bench.c | 3 ++-
>>>>>>>> tests/rcutorture.c | 3 ++-
>>>>>>>> tests/test-aio.c | 2 +-
>>>>>>>> tests/test-rcu-list.c | 3 ++-
>>>>>>>> ui/vnc-jobs.c | 17 +++++++++-----
>>>>>>>> ui/vnc-jobs.h | 2 +-
>>>>>>>> ui/vnc.c | 4 +++-
>>>>>>>> util/compatfd.c | 12 ++++++++--
>>>>>>>> util/oslib-posix.c | 17 ++++++++++----
>>>>>>>> util/qemu-thread-posix.c | 24 +++++++++++++-------
>>>>>>>> util/qemu-thread-win32.c | 16 ++++++++++----
>>>>>>>> util/rcu.c | 3 ++-
>>>>>>>> util/thread-pool.c | 4 +++-
>>>>>>>> 28 files changed, 243 insertions(+), 101 deletions(-)
>>>>>>>>
>>> ...snip, and only leave the three uncertain small topics...
>>>>>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>>>>>> index 658dfa88a3..6e0cccf066 100644
>>>>>>>> --- a/migration/ram.c
>>>>>>>> +++ b/migration/ram.c
>>>>>>>> @@ -473,6 +473,7 @@ static void
>>>>>>>> compress_threads_save_cleanup(void)
>>>>>>>> static int compress_threads_save_setup(void)
>>>>>>>> {
>>>>>>>> int i, thread_count;
>>>>>>>> + Error *local_err = NULL;
>>>>>>>> if (!migrate_use_compression()) {
>>>>>>>> return 0;
>>>>>>>> @@ -502,9 +503,12 @@ static int compress_threads_save_setup(void)
>>>>>>>> comp_param[i].quit = false;
>>>>>>>> qemu_mutex_init(&comp_param[i].mutex);
>>>>>>>> qemu_cond_init(&comp_param[i].cond);
>>>>>>>> - qemu_thread_create(compress_threads + i, "compress",
>>>>>>>> - do_data_compress, comp_param + i,
>>>>>>>> - QEMU_THREAD_JOINABLE);
>>>>>>>> + if (!qemu_thread_create(compress_threads + i, "compress",
>>>>>>>> + do_data_compress, comp_param + i,
>>>>>>>> + QEMU_THREAD_JOINABLE, &local_err)) {
>>>>>>>> + error_reportf_err(local_err, "failed to
>>>>>>>> create do_data_compress: ");
>>>>>>>> + goto exit;
>> [1]
>>
>>>>>>>> + }
>>>>>>>> }
>>>>>>>> return 0;
>>>>>>> Reviewing the migration changes is getting tiresome...
>>>>>> Yes, indeed, the migration involves a lot! Thanks so much for
>>>>>> helping
>>>>>> to review!
>>>>>>> Is reporting the
>>>>>>> error appropriate here, and why?
>>>>>> I think the qemu monitor should display the obvious and exact
>>>>>> failing
>>>>>> reason for administrators, esp considering that qemu_thread_create()
>>>>>> itself does not print any message thus we have no idea which direct
>>>>>> function fails if gdb is not enabled.
>>>>>> IOW, I think David's answer to that ppc's error_reportf_err() also
>>>>>> apply here:
>>>>>>
>>>>>> "The error returns are for the guest, the reported errors are for
>>>>>> the
>>>>>> guest administrator or management layers."
>>>>> There could well be an issue with the "management layers" part.
>>>>> Should
>>>>> this error be sent to the management layer via QMP somehow? Migration
>>>>> maintainers should be able to assist with this question.
>>> Kindly ping migration maintainers. :)
>> I think both the maintainers are on holiday so possibly there won't be
>> any reply from them this week... :)
>>
>> Regarding to error reports of migration via QMP layer, please have a
>> look at d59ce6f344 ("migration: add reporting of errors for outgoing
>> migration", 2016-05-26). Though I see that even
>> qemu_savevm_state_setup() is not capturing error for the management
>> layer so if you want to pass this thread creation error upward you'll
>> possibly need to work on that as well.
> Thanks for the useful commit. :) I guess the "the client app"
> mentioned is not qemu,
> but other upper thing, maybe something inside openstack? As I have to
> say that I
> can see the error message (I mean the above error_reportf_err(...) )
> be printed to the
> screen when I use qemu command line via hmp to do the migration.
>
> For the qemu_savevm_state_setup(), I see it sets the f->last_error
> (instead of s->error)
> to indicate whether to stop the migration or not when back to
> migration_thread()
> in migration_detect_error(s). And no matter whether
> qemu_savevm_state_setup()
> succeeds, the current code continues to set the migration state to be
> ACTIVE. Emm,
> I am wondering whether this is on purpose..
>> Though here note that when you "goto exit" at [1] you probably also
>> need to touch up the cleanup part since otherwise the join() could be
>> with an invalid thread ID, so you'll possibly need to check the thread
>> ID validity before do the join() of the compression thread.
> Thanks for pointing this out. I think my last patch is to fix this
> problem, that is
> to add a check in qemu_thread_join():
> + if (!thread->thread) {
> + return NULL;
> + }
> Correct me if this is not the proper solution. :)
>
> Have a nice day, thanks :)
> Fei
>>
>> Regards,
>>
>
>
>
>
© 2016 - 2026 Red Hat, Inc.