Put all the shared qemu-thread implementations into these files. The
header should be internal to qemu-thread but not for qemu-thread users.
Introduce some hooks correspondingly for the shared part. Note that in
qemu_mutex_unlock_impl() we moved the call before unlock operation which
should make more sense. And we don't need qemu_mutex_post_unlock() hook.
Currently the hooks only calls the tracepoints.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
util/qemu-thread-common.h | 23 +++++++++++++++++++++++
util/qemu-thread-common.c | 30 ++++++++++++++++++++++++++++++
util/qemu-thread-posix.c | 17 +++++++----------
util/qemu-thread-win32.c | 15 +++++++--------
util/Makefile.objs | 4 ++--
5 files changed, 69 insertions(+), 20 deletions(-)
create mode 100644 util/qemu-thread-common.h
create mode 100644 util/qemu-thread-common.c
diff --git a/util/qemu-thread-common.h b/util/qemu-thread-common.h
new file mode 100644
index 0000000000..f3f66613e9
--- /dev/null
+++ b/util/qemu-thread-common.h
@@ -0,0 +1,23 @@
+/*
+ * Common qemu-thread implementation header file.
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Authors:
+ * Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef __QEMU_THREAD_COMMON_H__
+#define __QEMU_THREAD_COMMON_H__
+
+#include "qemu/typedefs.h"
+#include "qemu/thread.h"
+
+void qemu_mutex_pre_lock(QemuMutex *mutex, const char *file, int line);
+void qemu_mutex_post_lock(QemuMutex *mutex, const char *file, int line);
+void qemu_mutex_pre_unlock(QemuMutex *mutex, const char *file, int line);
+
+#endif
diff --git a/util/qemu-thread-common.c b/util/qemu-thread-common.c
new file mode 100644
index 0000000000..fc1f1aa969
--- /dev/null
+++ b/util/qemu-thread-common.c
@@ -0,0 +1,30 @@
+/*
+ * Common qemu-thread implementation shared for all platforms.
+ *
+ * Copyright Red Hat, Inc. 2018
+ *
+ * Authors:
+ * Peter Xu <peterx@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-thread-common.h"
+#include "trace.h"
+
+void qemu_mutex_pre_lock(QemuMutex *mutex, const char *file, int line)
+{
+ trace_qemu_mutex_lock(mutex, file, line);
+}
+
+void qemu_mutex_post_lock(QemuMutex *mutex, const char *file, int line)
+{
+ trace_qemu_mutex_locked(mutex, file, line);
+}
+
+void qemu_mutex_pre_unlock(QemuMutex *mutex, const char *file, int line)
+{
+ trace_qemu_mutex_unlock(mutex, file, line);
+}
diff --git a/util/qemu-thread-posix.c b/util/qemu-thread-posix.c
index b789cf32e9..b0e7008db3 100644
--- a/util/qemu-thread-posix.c
+++ b/util/qemu-thread-posix.c
@@ -14,7 +14,7 @@
#include "qemu/thread.h"
#include "qemu/atomic.h"
#include "qemu/notify.h"
-#include "trace.h"
+#include "qemu-thread-common.h"
static bool name_threads;
@@ -62,13 +62,11 @@ void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
int err;
assert(mutex->initialized);
- trace_qemu_mutex_lock(mutex, file, line);
-
+ qemu_mutex_pre_lock(mutex, file, line);
err = pthread_mutex_lock(&mutex->lock);
if (err)
error_exit(err, __func__);
-
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
}
int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
@@ -78,7 +76,7 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
assert(mutex->initialized);
err = pthread_mutex_trylock(&mutex->lock);
if (err == 0) {
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
return 0;
}
if (err != EBUSY) {
@@ -92,11 +90,10 @@ void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
int err;
assert(mutex->initialized);
+ qemu_mutex_pre_unlock(mutex, file, line);
err = pthread_mutex_unlock(&mutex->lock);
if (err)
error_exit(err, __func__);
-
- trace_qemu_mutex_unlock(mutex, file, line);
}
void qemu_rec_mutex_init(QemuRecMutex *mutex)
@@ -160,9 +157,9 @@ void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, con
int err;
assert(cond->initialized);
- trace_qemu_mutex_unlock(mutex, file, line);
+ qemu_mutex_pre_unlock(mutex, file, line);
err = pthread_cond_wait(&cond->cond, &mutex->lock);
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
if (err)
error_exit(err, __func__);
}
diff --git a/util/qemu-thread-win32.c b/util/qemu-thread-win32.c
index ab60c0d557..f9fb0581f2 100644
--- a/util/qemu-thread-win32.c
+++ b/util/qemu-thread-win32.c
@@ -19,7 +19,7 @@
#include "qemu-common.h"
#include "qemu/thread.h"
#include "qemu/notify.h"
-#include "trace.h"
+#include "qemu-thread-common.h"
#include <process.h>
static bool name_threads;
@@ -59,10 +59,9 @@ void qemu_mutex_destroy(QemuMutex *mutex)
void qemu_mutex_lock_impl(QemuMutex *mutex, const char *file, const int line)
{
assert(mutex->initialized);
- trace_qemu_mutex_lock(mutex, file, line);
-
+ qemu_mutex_pre_lock(mutex, file, line);
AcquireSRWLockExclusive(&mutex->lock);
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
}
int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
@@ -72,7 +71,7 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
assert(mutex->initialized);
owned = TryAcquireSRWLockExclusive(&mutex->lock);
if (owned) {
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
return 0;
}
return -EBUSY;
@@ -81,7 +80,7 @@ int qemu_mutex_trylock_impl(QemuMutex *mutex, const char *file, const int line)
void qemu_mutex_unlock_impl(QemuMutex *mutex, const char *file, const int line)
{
assert(mutex->initialized);
- trace_qemu_mutex_unlock(mutex, file, line);
+ qemu_mutex_pre_unlock(mutex, file, line);
ReleaseSRWLockExclusive(&mutex->lock);
}
@@ -145,9 +144,9 @@ void qemu_cond_broadcast(QemuCond *cond)
void qemu_cond_wait_impl(QemuCond *cond, QemuMutex *mutex, const char *file, const int line)
{
assert(cond->initialized);
- trace_qemu_mutex_unlock(mutex, file, line);
+ qemu_mutex_pre_unlock(mutex, file, line);
SleepConditionVariableSRW(&cond->var, &mutex->lock, INFINITE, 0);
- trace_qemu_mutex_locked(mutex, file, line);
+ qemu_mutex_post_lock(mutex, file, line);
}
void qemu_sem_init(QemuSemaphore *sem, int init)
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 728c3541db..04d6713daf 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -9,12 +9,12 @@ util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
util-obj-$(CONFIG_POSIX) += mmap-alloc.o
util-obj-$(CONFIG_POSIX) += oslib-posix.o
util-obj-$(CONFIG_POSIX) += qemu-openpty.o
-util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
+util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o qemu-thread-common.o
util-obj-$(CONFIG_POSIX) += memfd.o
util-obj-$(CONFIG_WIN32) += aio-win32.o
util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
util-obj-$(CONFIG_WIN32) += oslib-win32.o
-util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
+util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o qemu-thread-common.o
util-obj-y += envlist.o path.o module.o
util-obj-y += host-utils.o
util-obj-y += bitmap.o bitops.o hbitmap.o
--
2.14.3
On Fri, Apr 20, 2018 at 12:42:10 +0800, Peter Xu wrote: > Put all the shared qemu-thread implementations into these files. The > header should be internal to qemu-thread but not for qemu-thread users. > > Introduce some hooks correspondingly for the shared part. Note that in > qemu_mutex_unlock_impl() we moved the call before unlock operation which > should make more sense. And we don't need qemu_mutex_post_unlock() hook. > > Currently the hooks only calls the tracepoints. > > Signed-off-by: Peter Xu <peterx@redhat.com> (snip) > - trace_qemu_mutex_lock(mutex, file, line); > - > + qemu_mutex_pre_lock(mutex, file, line); > err = pthread_mutex_lock(&mutex->lock); > if (err) > error_exit(err, __func__); > - > - trace_qemu_mutex_locked(mutex, file, line); > + qemu_mutex_post_lock(mutex, file, line); > } I see the value in consolidating these calls. However, having a separate object means that this adds two function calls to mutex_lock. This significantly reduces performance, even without --enable-debug-mutex: - Before: $ taskset -c 0 tests/atomic_add-bench -n 1 -m Parameters: # of threads: 1 duration: 1 ops' range: 1024 Results: Duration: 1 s Throughput: 57.24 Mops/s Throughput/thread: 57.24 Mops/s/thread - After: $ taskset -c 0 tests/atomic_add-bench -n 1 -m Parameters: # of threads: 1 duration: 1 ops' range: 1024 Results: Duration: 1 s Throughput: 49.22 Mops/s Throughput/thread: 49.22 Mops/s/thread So either inlines/macros should be used instead -- I'd prefer inlines but I'm not sure they'll work with the tracing calls. I think you should cherry-pick this patch[1] and add it to the series -- it'll let you make sure the series does not affect performance. Cheers, Emilio [1] https://github.com/cota/qemu/commit/f04f34df
On Fri, Apr 20, 2018 at 01:07:34PM -0400, Emilio G. Cota wrote: > On Fri, Apr 20, 2018 at 12:42:10 +0800, Peter Xu wrote: > > Put all the shared qemu-thread implementations into these files. The > > header should be internal to qemu-thread but not for qemu-thread users. > > > > Introduce some hooks correspondingly for the shared part. Note that in > > qemu_mutex_unlock_impl() we moved the call before unlock operation which > > should make more sense. And we don't need qemu_mutex_post_unlock() hook. > > > > Currently the hooks only calls the tracepoints. > > > > Signed-off-by: Peter Xu <peterx@redhat.com> > (snip) > > - trace_qemu_mutex_lock(mutex, file, line); > > - > > + qemu_mutex_pre_lock(mutex, file, line); > > err = pthread_mutex_lock(&mutex->lock); > > if (err) > > error_exit(err, __func__); > > - > > - trace_qemu_mutex_locked(mutex, file, line); > > + qemu_mutex_post_lock(mutex, file, line); > > } > > I see the value in consolidating these calls. However, having a separate > object means that this adds two function calls to mutex_lock. This > significantly reduces performance, even without --enable-debug-mutex: > - Before: > $ taskset -c 0 tests/atomic_add-bench -n 1 -m > Parameters: > # of threads: 1 > duration: 1 > ops' range: 1024 > Results: > Duration: 1 s > Throughput: 57.24 Mops/s > Throughput/thread: 57.24 Mops/s/thread > > - After: > $ taskset -c 0 tests/atomic_add-bench -n 1 -m > Parameters: > # of threads: 1 > duration: 1 > ops' range: 1024 > Results: > Duration: 1 s > Throughput: 49.22 Mops/s > Throughput/thread: 49.22 Mops/s/thread > > So either inlines/macros should be used instead -- I'd prefer > inlines but I'm not sure they'll work with the tracing calls. Indeed, it's about 10% drop. I never thought an extra call would bring so much overhead, but after reading your patch I think I know - the test is only about raw mutex locking, so the extra call will be "amplified" comparing to real usages, where normally there can be much more things to be done besides taking and releasing the lock. But sure making it inline should be better, and your reasoning is valid. Though I didn't see why it can't work with traces, I thought it should work natually. I'll see. > > I think you should cherry-pick this patch[1] and add it to the > series -- it'll let you make sure the series does not affect > performance. Sure! I'll attach benchmark results in my next post with your tool. Thanks, -- Peter Xu
© 2016 - 2026 Red Hat, Inc.