From: William Roche <william.roche@oracle.com>
madvise MADV_HWPOISON can generate a SIGBUS when called, so the listener
thread (the caller) needs to deal with this signal.
The signal handler recognizes a thread specific variable allowing it to
directly exit when generated from this thread.
Signed-off-by: William Roche <william.roche@oracle.com>
---
system/cpus.c | 9 +++++++++
system/hugetlbfs_ras.c | 43 ++++++++++++++++++++++++++++++++++++++++--
system/hugetlbfs_ras.h | 1 +
3 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/system/cpus.c b/system/cpus.c
index 12e630f760..642055f729 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -47,6 +47,10 @@
#include "hw/hw.h"
#include "trace.h"
+#ifdef CONFIG_HUGETLBFS_RAS
+#include "system/hugetlbfs_ras.h"
+#endif
+
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
@@ -374,6 +378,11 @@ static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
sigbus_reraise();
}
+#ifdef CONFIG_HUGETLBFS_RAS
+ /* skip error on the listener thread - does not return in this case */
+ hugetlbfs_ras_signal_from_listener();
+#endif
+
if (current_cpu) {
/* Called asynchronously in VCPU thread. */
if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code,
diff --git a/system/hugetlbfs_ras.c b/system/hugetlbfs_ras.c
index 2f7e550f56..90e399bbad 100644
--- a/system/hugetlbfs_ras.c
+++ b/system/hugetlbfs_ras.c
@@ -70,6 +70,8 @@ static QemuCond large_hwpoison_vm_running;
static QemuMutex large_hwpoison_mtx;
static QemuThread thread;
static void *hugetlbfs_ras_listener(void *arg);
+static pthread_key_t id_key;
+static sigjmp_buf listener_jmp_buf;
static int vm_running;
static bool hugetlbfs_ras_initialized;
static int _PAGE_SIZE = 4096;
@@ -105,6 +107,10 @@ hugetlbfs_ras_init(void)
qemu_cond_init(&large_hwpoison_vm_running);
qemu_mutex_init(&large_hwpoison_mtx);
+ if (pthread_key_create(&id_key, NULL) != 0) {
+ warn_report("No support for hugetlbfs largepage errors - no id_key");
+ return -EIO;
+ }
qemu_thread_create(&thread, "hugetlbfs_error", hugetlbfs_ras_listener,
NULL, QEMU_THREAD_DETACHED);
@@ -288,6 +294,19 @@ hugetlbfs_ras_correct(void **paddr, size_t *psz, int code)
return (*paddr == NULL ? false : true);
}
+/* this madvise can generate a SIGBUS, use the jump buffer to deal with it */
+static bool poison_location(void *addr, int size)
+{
+ if (sigsetjmp(listener_jmp_buf, 1) == 0) {
+ if (madvise(addr, size, MADV_HWPOISON)) {
+ DPRINTF("poison injection failed: %s (addr:%p sz:%d)\n",
+ strerror(errno), addr, size);
+ return false;
+ }
+ }
+ return true;
+}
+
/*
* Sequentially read the valid data from the failed large page (shared) backend
* file and copy that into our set of standard sized pages.
@@ -321,7 +340,7 @@ static int take_valid_data_lpg(LargeHWPoisonPage *page, const char **err)
slot_num = page->page_size / ps;
if (!qemu_ram_is_shared(rb)) { /* we can't use the backend file */
- if (madvise(page->page_addr, page->page_size, MADV_HWPOISON) == 0) {
+ if (poison_location(page->page_addr, page->page_size)) {
page->first_poison = page->page_addr;
warn_report("Large memory error, unrecoverable section "
"(unshared hugetlbfs): start:%p length: %ld",
@@ -350,7 +369,7 @@ static int take_valid_data_lpg(LargeHWPoisonPage *page, const char **err)
retrieved += count;
}
if (retrieved < ps) { /* consider this page as poisoned */
- if (madvise(page->page_addr + i * ps, ps, MADV_HWPOISON)) {
+ if (!poison_location(page->page_addr + i * ps, ps)) {
if (err) {
*err = "poison injection failed";
}
@@ -402,6 +421,19 @@ void hugetlbfs_ras_empty(void)
qemu_mutex_unlock(&large_hwpoison_mtx);
}
+/*
+ * Check if the signal is taken from the listener thread,
+ * in this thread we don't return as we jump after the madvise call.
+ */
+void
+hugetlbfs_ras_signal_from_listener(void)
+{
+ /* check if we take the SIGBUS in the listener */
+ if (pthread_getspecific(id_key) != NULL) {
+ siglongjmp(listener_jmp_buf, 1);
+ }
+}
+
/*
* Deal with the given page, initializing its data.
*/
@@ -498,6 +530,13 @@ hugetlbfs_ras_listener(void *arg)
LargeHWPoisonPage *page;
int new;
const char *err;
+ sigset_t set;
+
+ pthread_setspecific(id_key, (void *)1);
+ /* unblock SIGBUS */
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
/* monitor any newly submitted element in the list */
qemu_mutex_lock(&large_hwpoison_mtx);
diff --git a/system/hugetlbfs_ras.h b/system/hugetlbfs_ras.h
index 324228bda3..9c2a6e49a1 100644
--- a/system/hugetlbfs_ras.h
+++ b/system/hugetlbfs_ras.h
@@ -1,3 +1,4 @@
bool hugetlbfs_ras_use(void);
bool hugetlbfs_ras_correct(void **paddr, size_t *psz, int code);
void hugetlbfs_ras_empty(void);
+void hugetlbfs_ras_signal_from_listener(void);
--
2.43.5