[RFC 6/6] system/hugetlb_ras: Replay lost BUS_MCEERR_AO signals on VM resume

“William Roche posted 6 patches 2 months, 2 weeks ago
There is a newer version of this series
[RFC 6/6] system/hugetlb_ras: Replay lost BUS_MCEERR_AO signals on VM resume
Posted by “William Roche 2 months, 2 weeks ago
From: William Roche <william.roche@oracle.com>

In case the SIGBUS handler is triggered by a BUS_MCEERR_AO signal
and this handler needs to exit to let the VM pause during the memory
mapping change, this SIGBUS won't be regenerated when the VM resumes.
In this case we take note of this signal before exiting the handler
to replay it when the VM resumes.

Signed-off-by: William Roche <william.roche@oracle.com>
---
 system/hugetlbfs_ras.c | 60 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/system/hugetlbfs_ras.c b/system/hugetlbfs_ras.c
index 90e399bbad..50f810f836 100644
--- a/system/hugetlbfs_ras.c
+++ b/system/hugetlbfs_ras.c
@@ -155,6 +155,56 @@ hugetlbfs_ras_backend_sz(void *addr)
     return rb->page_size;
 }
 
+
+/*
+ *  List of BUS_MCEERR_AO signals received before replaying.
+ *  Addition is serialized under large_hwpoison_mtx, but replay is
+ *  asynchronous.
+ */
+typedef struct LargeHWPoisonAO {
+    void  *addr;
+    QLIST_ENTRY(LargeHWPoisonAO) list;
+} LargeHWPoisonAO;
+
+static QLIST_HEAD(, LargeHWPoisonAO) large_hwpoison_ao =
+    QLIST_HEAD_INITIALIZER(large_hwpoison_ao);
+
+static void
+large_hwpoison_ao_record(void *addr)
+{
+    LargeHWPoisonAO *cel;
+
+    cel = g_new(LargeHWPoisonAO, 1);
+    cel->addr = addr;
+    QLIST_INSERT_HEAD(&large_hwpoison_ao, cel, list);
+}
+
+/* replay the possible BUS_MCEERR_AO recorded signal(s) */
+static void
+hugetlbfs_ras_ao_replay_bh(void)
+{
+    LargeHWPoisonAO *cel, *next;
+    QLIST_HEAD(, LargeHWPoisonAO) local_list =
+    QLIST_HEAD_INITIALIZER(local_list);
+
+    /*
+     * Copy to a local list to avoid holding large_hwpoison_mtx
+     * when calling kvm_on_sigbus().
+     */
+    qemu_mutex_lock(&large_hwpoison_mtx);
+    QLIST_FOREACH_SAFE(cel, &large_hwpoison_ao, list, next) {
+        QLIST_REMOVE(cel, list);
+        QLIST_INSERT_HEAD(&local_list, cel, list);
+    }
+    qemu_mutex_unlock(&large_hwpoison_mtx);
+
+    QLIST_FOREACH_SAFE(cel, &local_list, list, next) {
+        DPRINTF("AO on %p\n", cel->addr);
+        kvm_on_sigbus(BUS_MCEERR_AO, cel->addr, _PAGE_SHIFT);
+        g_free(cel);
+    }
+}
+
 /*
  * Report if this std page address of the given faulted large page should be
  * retried or if the current signal handler should continue to deal with it.
@@ -276,6 +326,15 @@ hugetlbfs_ras_correct(void **paddr, size_t *psz, int code)
     if (large_hwpoison_vm_stop) {
         DPRINTF("Handler exit requested as on page %p\n", page->page_addr);
         *paddr = NULL;
+        /*
+         * BUS_MCEERR_AO specific case: this signal is not regenerated,
+         * we keep it to replay when the VM is ready to take it.
+         */
+        if (code == BUS_MCEERR_AO) {
+            large_hwpoison_ao_record(page->first_poison ? page->first_poison :
+                reported_addr);
+        }
+
     }
     qemu_mutex_unlock(&large_hwpoison_mtx);
 
@@ -522,6 +581,7 @@ static void coroutine_hugetlbfs_ras_vmstop_bh(void *opaque)
 static void coroutine_hugetlbfs_ras_vmstart_bh(void *opaque)
 {
     vm_start();
+    hugetlbfs_ras_ao_replay_bh();
 }
 
 static void *
-- 
2.43.5