[PATCH] libvirt: add memory failure event

zhenwei pi posted 1 patch 4 years, 1 month ago
Test syntax-check failed
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20201010065643.784126-1-pizhenwei@bytedance.com
examples/c/misc/event-test.c        | 17 ++++++++
include/libvirt/libvirt-domain.h    | 84 +++++++++++++++++++++++++++++++++++++
src/conf/domain_event.c             | 82 ++++++++++++++++++++++++++++++++++++
src/conf/domain_event.h             | 12 ++++++
src/libvirt_private.syms            |  2 +
src/qemu/qemu_domain.c              |  1 +
src/qemu/qemu_domain.h              |  1 +
src/qemu/qemu_driver.c              | 57 +++++++++++++++++++++++++
src/qemu/qemu_monitor.c             | 21 +++++++++-
src/qemu/qemu_monitor.h             | 39 +++++++++++++++++
src/qemu/qemu_monitor_json.c        | 50 ++++++++++++++++++++++
src/qemu/qemu_process.c             | 28 +++++++++++++
src/remote/remote_daemon_dispatch.c | 33 +++++++++++++++
src/remote/remote_driver.c          | 35 ++++++++++++++++
src/remote/remote_protocol.x        | 21 +++++++++-
src/remote_protocol-structs         | 12 ++++++
tools/virsh-domain.c                | 37 ++++++++++++++++
17 files changed, 530 insertions(+), 2 deletions(-)
[PATCH] libvirt: add memory failure event
Posted by zhenwei pi 4 years, 1 month ago
Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure'
event, posts event to monitor if hitting a hardware memory error.

Several changes in this patch:
  Add a new event 'memory failure' for libvirt domain.
  Implement memory failure event handling for QEMU from QMP.
  Also implement virsh command callback functions.

Test case:
~# virsh event stretch --event memory-failure
event 'memory-failure' for domain stretch:
recipient: guest
action: inject
flags:
        action required: 0
        recursive: 0
events received: 1

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 examples/c/misc/event-test.c        | 17 ++++++++
 include/libvirt/libvirt-domain.h    | 84 +++++++++++++++++++++++++++++++++++++
 src/conf/domain_event.c             | 82 ++++++++++++++++++++++++++++++++++++
 src/conf/domain_event.h             | 12 ++++++
 src/libvirt_private.syms            |  2 +
 src/qemu/qemu_domain.c              |  1 +
 src/qemu/qemu_domain.h              |  1 +
 src/qemu/qemu_driver.c              | 57 +++++++++++++++++++++++++
 src/qemu/qemu_monitor.c             | 21 +++++++++-
 src/qemu/qemu_monitor.h             | 39 +++++++++++++++++
 src/qemu/qemu_monitor_json.c        | 50 ++++++++++++++++++++++
 src/qemu/qemu_process.c             | 28 +++++++++++++
 src/remote/remote_daemon_dispatch.c | 33 +++++++++++++++
 src/remote/remote_driver.c          | 35 ++++++++++++++++
 src/remote/remote_protocol.x        | 21 +++++++++-
 src/remote_protocol-structs         | 12 ++++++
 tools/virsh-domain.c                | 37 ++++++++++++++++
 17 files changed, 530 insertions(+), 2 deletions(-)

diff --git a/examples/c/misc/event-test.c b/examples/c/misc/event-test.c
index 52caa8ffa8..b10946d569 100644
--- a/examples/c/misc/event-test.c
+++ b/examples/c/misc/event-test.c
@@ -964,6 +964,22 @@ myDomainEventBlockThresholdCallback(virConnectPtr conn G_GNUC_UNUSED,
 
 
 static int
+myDomainEventMemoryFailureCallback(virConnectPtr conn G_GNUC_UNUSED,
+                                   virDomainPtr dom,
+                                   virDomainMemoryFailureRecipientType recipient,
+                                   virDomainMemoryFailureActionType action,
+                                   virDomainMemoryFailureFlagsPtr flags,
+                                   void *opaque G_GNUC_UNUSED)
+{
+    printf("%s EVENT: Domain %s(%d) memory failure: recipient '%d', "
+           "aciont '%d', action_required '%d', recursive '%d'",
+           __func__, virDomainGetName(dom), virDomainGetID(dom), recipient,
+           action, flags->action_required, flags->recursive);
+    return 0;
+}
+
+
+static int
 myDomainEventMigrationIterationCallback(virConnectPtr conn G_GNUC_UNUSED,
                                         virDomainPtr dom,
                                         int iteration,
@@ -1093,6 +1109,7 @@ struct domainEventData domainEvents[] = {
     DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED, myDomainEventDeviceRemovalFailedCallback),
     DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_METADATA_CHANGE, myDomainEventMetadataChangeCallback),
     DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD, myDomainEventBlockThresholdCallback),
+    DOMAIN_EVENT(VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE, myDomainEventMemoryFailureCallback),
 };
 
 struct storagePoolEventData {
diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
index 77f9116675..a9170d9a7e 100644
--- a/include/libvirt/libvirt-domain.h
+++ b/include/libvirt/libvirt-domain.h
@@ -3196,6 +3196,66 @@ typedef enum {
 } virDomainEventCrashedDetailType;
 
 /**
+ * virDomainMemoryFailureRecipientType:
+ *
+ * Recipient of a memory failure event.
+ */
+typedef enum {
+    /* memory failure at hypersivor memory address space */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR = 0,
+
+    /* memory failure at guest memory address space */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST = 1,
+
+# ifdef VIR_ENUM_SENTINELS
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST
+# endif
+} virDomainMemoryFailureRecipientType;
+
+
+/**
+ * virDomainMemoryFailureActionType:
+ *
+ * Action of a memory failure event.
+ */
+typedef enum {
+    /* the memory failure could be ignored. This will only be the case for
+     * action-optional failures. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE = 0,
+
+    /* memory failure occurred in guest memory, the guest enabled MCE handling
+     * mechanism, and hypervisor could inject the MCE into the guest
+     * successfully. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT = 1,
+
+    /* the failure is unrecoverable.  This occurs for action-required failures
+     * if the recipient is the hypervisor; hypervisor will exit. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL = 2,
+
+    /* the failure is unrecoverable but confined to the guest. This occurs if
+     * the recipient is a guest which is not ready to handle memory failures. */
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET = 3,
+
+# ifdef VIR_ENUM_SENTINELS
+    VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST
+# endif
+} virDomainMemoryFailureActionType;
+
+
+typedef struct _virDomainMemoryFailureFlags virDomainMemoryFailureFlags;
+typedef virDomainMemoryFailureFlags *virDomainMemoryFailureFlagsPtr;
+struct _virDomainMemoryFailureFlags {
+    /* whether a memory failure event is action-required or action-optional
+     * (e.g. a failure during memory scrub). */
+    int action_required;
+
+    /* whether the failure occurred while the previous failure was still in
+     * progress. */
+    int recursive;
+};
+
+
+/**
  * virConnectDomainEventCallback:
  * @conn: virConnect connection
  * @dom: The domain on which the event occurred
@@ -4565,6 +4625,29 @@ typedef void (*virConnectDomainEventBlockThresholdCallback)(virConnectPtr conn,
                                                             void *opaque);
 
 /**
+ * virConnectDomainEventMemoryFailureCallback:
+ * @conn: connection object
+ * @dom: domain on which the event occurred
+ * @recipient: the recipient of hardware memory failure
+ * @action: the action of hardware memory failure
+ * @flags: the flags of hardware memory failure
+ * @opaque: application specified data
+ *
+ * The callback occurs when the hypervisor handles the hardware memory
+ * corrupted event.
+ *
+ * The callback signature to use when registering for an event of type
+ * VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE with virConnectDomainEventRegisterAny()
+ */
+typedef void (*virConnectDomainEventMemoryFailureCallback)(virConnectPtr conn,
+                                                           virDomainPtr dom,
+                                                           virDomainMemoryFailureRecipientType recipient,
+                                                           virDomainMemoryFailureActionType action,
+                                                           virDomainMemoryFailureFlagsPtr flags,
+                                                           void *opaque);
+
+
+/**
  * VIR_DOMAIN_EVENT_CALLBACK:
  *
  * Used to cast the event specific callback into the generic one
@@ -4606,6 +4689,7 @@ typedef enum {
     VIR_DOMAIN_EVENT_ID_DEVICE_REMOVAL_FAILED = 22, /* virConnectDomainEventDeviceRemovalFailedCallback */
     VIR_DOMAIN_EVENT_ID_METADATA_CHANGE = 23, /* virConnectDomainEventMetadataChangeCallback */
     VIR_DOMAIN_EVENT_ID_BLOCK_THRESHOLD = 24, /* virConnectDomainEventBlockThresholdCallback */
+    VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE = 25, /* virConnectDomainEventMemoryFailureCallback */
 
 # ifdef VIR_ENUM_SENTINELS
     VIR_DOMAIN_EVENT_ID_LAST
diff --git a/src/conf/domain_event.c b/src/conf/domain_event.c
index a8bd9f1595..20c5590835 100644
--- a/src/conf/domain_event.c
+++ b/src/conf/domain_event.c
@@ -57,6 +57,7 @@ static virClassPtr virDomainEventJobCompletedClass;
 static virClassPtr virDomainEventDeviceRemovalFailedClass;
 static virClassPtr virDomainEventMetadataChangeClass;
 static virClassPtr virDomainEventBlockThresholdClass;
+static virClassPtr virDomainEventMemoryFailureClass;
 
 static void virDomainEventDispose(void *obj);
 static void virDomainEventLifecycleDispose(void *obj);
@@ -79,6 +80,7 @@ static void virDomainEventJobCompletedDispose(void *obj);
 static void virDomainEventDeviceRemovalFailedDispose(void *obj);
 static void virDomainEventMetadataChangeDispose(void *obj);
 static void virDomainEventBlockThresholdDispose(void *obj);
+static void virDomainEventMemoryFailureDispose(void *obj);
 
 static void
 virDomainEventDispatchDefaultFunc(virConnectPtr conn,
@@ -287,6 +289,16 @@ struct _virDomainEventBlockThreshold {
 typedef struct _virDomainEventBlockThreshold virDomainEventBlockThreshold;
 typedef virDomainEventBlockThreshold *virDomainEventBlockThresholdPtr;
 
+struct _virDomainEventMemoryFailure {
+    virDomainEvent parent;
+
+    virDomainMemoryFailureRecipientType recipient;
+    virDomainMemoryFailureActionType action;
+    virDomainMemoryFailureFlags flags;
+};
+typedef struct _virDomainEventMemoryFailure virDomainEventMemoryFailure;
+typedef virDomainEventMemoryFailure *virDomainEventMemoryFailurePtr;
+
 
 static int
 virDomainEventsOnceInit(void)
@@ -333,6 +345,8 @@ virDomainEventsOnceInit(void)
         return -1;
     if (!VIR_CLASS_NEW(virDomainEventBlockThreshold, virDomainEventClass))
         return -1;
+    if (!VIR_CLASS_NEW(virDomainEventMemoryFailure, virDomainEventClass))
+        return -1;
     return 0;
 }
 
@@ -542,6 +556,14 @@ virDomainEventBlockThresholdDispose(void *obj)
 }
 
 
+static void
+virDomainEventMemoryFailureDispose(void *obj)
+{
+    virDomainEventMemoryFailurePtr event = obj;
+    VIR_DEBUG("obj=%p", event);
+}
+
+
 static void *
 virDomainEventNew(virClassPtr klass,
                   int eventID,
@@ -1619,6 +1641,53 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
 }
 
 
+static virObjectEventPtr
+virDomainEventMemoryFailureNew(int id,
+                               const char *name,
+                               unsigned char *uuid,
+                               virDomainMemoryFailureRecipientType recipient,
+                               virDomainMemoryFailureActionType action,
+                               virDomainMemoryFailureFlagsPtr flags)
+{
+    virDomainEventMemoryFailurePtr ev;
+
+    if (virDomainEventsInitialize() < 0)
+        return NULL;
+
+    if (!(ev = virDomainEventNew(virDomainEventMemoryFailureClass,
+                                 VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE,
+                                 id, name, uuid)))
+        return NULL;
+
+    ev->recipient = recipient;
+    ev->action = action;
+    ev->flags.action_required = flags->action_required;
+    ev->flags.recursive = flags->recursive;
+
+    return (virObjectEventPtr)ev;
+}
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      virDomainMemoryFailureFlagsPtr flags)
+{
+    return virDomainEventMemoryFailureNew(obj->def->id, obj->def->name,
+                                          obj->def->uuid, recipient, action,
+                                          flags);
+}
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      virDomainMemoryFailureFlagsPtr flags)
+{
+    return virDomainEventMemoryFailureNew(dom->id, dom->name, dom->uuid,
+                                          recipient, action, flags);
+}
+
 static void
 virDomainEventDispatchDefaultFunc(virConnectPtr conn,
                                   virObjectEventPtr event,
@@ -1902,6 +1971,19 @@ virDomainEventDispatchDefaultFunc(virConnectPtr conn,
                                                               cbopaque);
             goto cleanup;
         }
+    case VIR_DOMAIN_EVENT_ID_MEMORY_FAILURE:
+        {
+            virDomainEventMemoryFailurePtr memoryFailureEvent;
+
+            memoryFailureEvent = (virDomainEventMemoryFailurePtr)event;
+            ((virConnectDomainEventMemoryFailureCallback)cb)(conn, dom,
+                                                             memoryFailureEvent->recipient,
+                                                             memoryFailureEvent->action,
+                                                             &memoryFailureEvent->flags,
+                                                             cbopaque);
+            goto cleanup;
+        }
+
     case VIR_DOMAIN_EVENT_ID_LAST:
         break;
     }
diff --git a/src/conf/domain_event.h b/src/conf/domain_event.h
index d1cfb81d62..5b317e8d30 100644
--- a/src/conf/domain_event.h
+++ b/src/conf/domain_event.h
@@ -255,6 +255,18 @@ virDomainEventBlockThresholdNewFromDom(virDomainPtr dom,
                                        unsigned long long threshold,
                                        unsigned long long excess);
 
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromObj(virDomainObjPtr obj,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      virDomainMemoryFailureFlagsPtr flags);
+
+virObjectEventPtr
+virDomainEventMemoryFailureNewFromDom(virDomainPtr dom,
+                                      virDomainMemoryFailureRecipientType recipient,
+                                      virDomainMemoryFailureActionType action,
+                                      virDomainMemoryFailureFlagsPtr flags);
+
 int
 virDomainEventStateRegister(virConnectPtr conn,
                             virObjectEventStatePtr state,
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 152083d220..927de5001a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -704,6 +704,8 @@ virDomainEventLifecycleNew;
 virDomainEventLifecycleNewFromDef;
 virDomainEventLifecycleNewFromDom;
 virDomainEventLifecycleNewFromObj;
+virDomainEventMemoryFailureNewFromDom;
+virDomainEventMemoryFailureNewFromObj;
 virDomainEventMetadataChangeNewFromDom;
 virDomainEventMetadataChangeNewFromObj;
 virDomainEventMigrationIterationNewFromDom;
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 9623123d3c..5b5316fadd 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -10551,6 +10551,7 @@ qemuProcessEventFree(struct qemuProcessEvent *event)
     case QEMU_PROCESS_EVENT_BLOCK_JOB:
     case QEMU_PROCESS_EVENT_MONITOR_EOF:
     case QEMU_PROCESS_EVENT_GUEST_CRASHLOADED:
+    case QEMU_PROCESS_EVENT_MEMORY_FAILURE:
         VIR_FREE(event->data);
         break;
     case QEMU_PROCESS_EVENT_JOB_STATUS_CHANGE:
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 9bf32e16c9..51d5963f25 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -441,6 +441,7 @@ typedef enum {
     QEMU_PROCESS_EVENT_PR_DISCONNECT,
     QEMU_PROCESS_EVENT_RDMA_GID_STATUS_CHANGED,
     QEMU_PROCESS_EVENT_GUEST_CRASHLOADED,
+    QEMU_PROCESS_EVENT_MEMORY_FAILURE,
 
     QEMU_PROCESS_EVENT_LAST
 } qemuProcessEventType;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 8ef812cd94..aecd947836 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -4292,6 +4292,59 @@ processGuestCrashloadedEvent(virQEMUDriverPtr driver,
 }
 
 
+static void
+processMemoryFailureEvent(virQEMUDriverPtr driver,
+                          virDomainObjPtr vm,
+                          qemuMonitorEventMemoryFailurePtr mfp)
+{
+    virObjectEventPtr event = NULL;
+    virDomainMemoryFailureRecipientType recipient;
+    virDomainMemoryFailureActionType action;
+    virDomainMemoryFailureFlags flags;
+
+    switch (mfp->recipient) {
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
+        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
+        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
+    default:
+        virReportError(VIR_ERR_INVALID_ARG, "%s",
+                       _("requested unknown memory failure recipient"));
+        return;
+    }
+
+    switch (mfp->action) {
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
+        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
+        break;
+    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
+    default:
+        virReportError(VIR_ERR_INVALID_ARG, "%s",
+                       _("requested unknown memory failure action"));
+        return;
+    }
+
+    flags.action_required = mfp->action_required;
+    flags.recursive = mfp->recursive;
+    event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action,
+                                                  &flags);
+
+    virObjectEventStateQueue(driver->domainEventState, event);
+}
+
+
 static void qemuProcessEventHandler(void *data, void *opaque)
 {
     struct qemuProcessEvent *processEvent = data;
@@ -4341,6 +4394,10 @@ static void qemuProcessEventHandler(void *data, void *opaque)
     case QEMU_PROCESS_EVENT_GUEST_CRASHLOADED:
         processGuestCrashloadedEvent(driver, vm);
         break;
+    case QEMU_PROCESS_EVENT_MEMORY_FAILURE:
+        processMemoryFailureEvent(driver, vm, processEvent->data);
+        break;
+
     case QEMU_PROCESS_EVENT_LAST:
         break;
     }
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index 8c991fefbb..189b789bb8 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
 
 VIR_ONCE_GLOBAL_INIT(qemuMonitor);
 
-
 VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
               QEMU_MONITOR_MIGRATION_STATUS_LAST,
               "inactive", "setup",
@@ -197,6 +196,14 @@ VIR_ENUM_IMPL(qemuMonitorDumpStatus,
               "none", "active", "completed", "failed",
 );
 
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureRecipient,
+              QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST,
+              "hypervisor", "guest");
+
+VIR_ENUM_IMPL(qemuMonitorMemoryFailureAction,
+              QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST,
+              "ignore", "inject",
+              "fatal", "reset");
 
 #if DEBUG_RAW_IO
 static char *
@@ -1428,6 +1435,18 @@ qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon)
 
 
 int
+qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                             qemuMonitorEventMemoryFailurePtr mfp)
+{
+    int ret = -1;
+
+    QEMU_MONITOR_CALLBACK(mon, ret, domainMemoryFailure, mon->vm, mfp);
+
+    return ret;
+}
+
+
+int
 qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                int status)
 {
diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h
index a744c8975b..17ba006a2f 100644
--- a/src/qemu/qemu_monitor.h
+++ b/src/qemu/qemu_monitor.h
@@ -340,6 +340,40 @@ typedef int (*qemuMonitorDomainGuestCrashloadedCallback)(qemuMonitorPtr mon,
                                                          virDomainObjPtr vm,
                                                          void *opaque);
 
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR,
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST,
+
+    QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST
+} qemuMonitorMemoryFailureRecipient;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureRecipient);
+
+typedef enum {
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL,
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET,
+
+    QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST
+} qemuMonitorMemoryFailureAction;
+
+VIR_ENUM_DECL(qemuMonitorMemoryFailureAction);
+
+typedef struct _qemuMonitorEventMemoryFailure qemuMonitorEventMemoryFailure;
+typedef qemuMonitorEventMemoryFailure *qemuMonitorEventMemoryFailurePtr;
+struct _qemuMonitorEventMemoryFailure {
+    qemuMonitorMemoryFailureRecipient recipient;
+    qemuMonitorMemoryFailureAction action;
+    bool action_required;
+    bool recursive;
+};
+
+typedef int (*qemuMonitorDomainMemoryFailureCallback)(qemuMonitorPtr mon,
+                                                      virDomainObjPtr vm,
+                                                      qemuMonitorEventMemoryFailurePtr mfp,
+                                                      void *opaque);
+
 typedef struct _qemuMonitorCallbacks qemuMonitorCallbacks;
 typedef qemuMonitorCallbacks *qemuMonitorCallbacksPtr;
 struct _qemuMonitorCallbacks {
@@ -376,6 +410,7 @@ struct _qemuMonitorCallbacks {
     qemuMonitorDomainPRManagerStatusChangedCallback domainPRManagerStatusChanged;
     qemuMonitorDomainRdmaGidStatusChangedCallback domainRdmaGidStatusChanged;
     qemuMonitorDomainGuestCrashloadedCallback domainGuestCrashloaded;
+    qemuMonitorDomainMemoryFailureCallback domainMemoryFailure;
 };
 
 qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm,
@@ -475,6 +510,10 @@ int qemuMonitorEmitSerialChange(qemuMonitorPtr mon,
                                 const char *devAlias,
                                 bool connected);
 int qemuMonitorEmitSpiceMigrated(qemuMonitorPtr mon);
+
+int qemuMonitorEmitMemoryFailure(qemuMonitorPtr mon,
+                                 qemuMonitorEventMemoryFailurePtr mfp);
+
 int qemuMonitorEmitMigrationStatus(qemuMonitorPtr mon,
                                    int status);
 int qemuMonitorEmitMigrationPass(qemuMonitorPtr mon,
diff --git a/src/qemu/qemu_monitor_json.c b/src/qemu/qemu_monitor_json.c
index 26ac499fc5..8e2659dc21 100644
--- a/src/qemu/qemu_monitor_json.c
+++ b/src/qemu/qemu_monitor_json.c
@@ -112,6 +112,7 @@ static void qemuMonitorJSONHandleBlockThreshold(qemuMonitorPtr mon, virJSONValue
 static void qemuMonitorJSONHandleDumpCompleted(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandlePRManagerStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
 static void qemuMonitorJSONHandleRdmaGidStatusChanged(qemuMonitorPtr mon, virJSONValuePtr data);
+static void qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon, virJSONValuePtr data);
 
 typedef struct {
     const char *type;
@@ -132,6 +133,7 @@ static qemuEventHandler eventHandlers[] = {
     { "GUEST_CRASHLOADED", qemuMonitorJSONHandleGuestCrashloaded, },
     { "GUEST_PANICKED", qemuMonitorJSONHandleGuestPanic, },
     { "JOB_STATUS_CHANGE", qemuMonitorJSONHandleJobStatusChange, },
+    { "MEMORY_FAILURE", qemuMonitorJSONHandleMemoryFailure, },
     { "MIGRATION", qemuMonitorJSONHandleMigrationStatus, },
     { "MIGRATION_PASS", qemuMonitorJSONHandleMigrationPass, },
     { "NIC_RX_FILTER_CHANGED", qemuMonitorJSONHandleNicRxFilterChanged, },
@@ -1336,6 +1338,54 @@ qemuMonitorJSONHandleSpiceMigrated(qemuMonitorPtr mon,
 
 
 static void
+qemuMonitorJSONHandleMemoryFailure(qemuMonitorPtr mon,
+                                   virJSONValuePtr data)
+{
+    virJSONValuePtr flagsjson = virJSONValueObjectGetObject(data, "flags");
+    const char *str;
+    int recipient;
+    int action;
+    bool ar = false;
+    bool recursive = false;
+    qemuMonitorEventMemoryFailurePtr mfp;
+
+    if (!(str = virJSONValueObjectGetString(data, "recipient"))) {
+        VIR_WARN("missing recipient in memory failure event");
+        return;
+    }
+
+    recipient = qemuMonitorMemoryFailureRecipientTypeFromString(str);
+    if (recipient == -1) {
+        VIR_WARN("unknown recipient '%s' in memory_failure event", str);
+        return;
+    }
+
+    if (!(str = virJSONValueObjectGetString(data, "action"))) {
+        VIR_WARN("missing action in memory failure event");
+        return;
+    }
+
+    action = qemuMonitorMemoryFailureActionTypeFromString(str);
+    if (action == -1) {
+        VIR_WARN("unknown action '%s' in memory_failure event", str);
+        return;
+    }
+
+    if (flagsjson) {
+        virJSONValueObjectGetBoolean(flagsjson, "action-required", &ar);
+        virJSONValueObjectGetBoolean(flagsjson, "recursive", &recursive);
+    }
+
+    mfp = g_new0(qemuMonitorEventMemoryFailure, 1);
+    mfp->recipient = recipient;
+    mfp->action = action;
+    mfp->action_required = ar;
+    mfp->recursive = recursive;
+    qemuMonitorEmitMemoryFailure(mon, mfp);
+}
+
+
+static void
 qemuMonitorJSONHandleMigrationStatus(qemuMonitorPtr mon,
                                      virJSONValuePtr data)
 {
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 6b5de29fdb..abcbab0f06 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -1878,6 +1878,33 @@ qemuProcessHandleGuestCrashloaded(qemuMonitorPtr mon G_GNUC_UNUSED,
 }
 
 
+static int
+qemuProcessHandleMemoryFailure(qemuMonitorPtr mon G_GNUC_UNUSED,
+                               virDomainObjPtr vm,
+                               qemuMonitorEventMemoryFailurePtr mfp,
+                               void *opaque)
+{
+    virQEMUDriverPtr driver = opaque;
+    struct qemuProcessEvent *processEvent;
+
+    virObjectLock(vm);
+    processEvent = g_new0(struct qemuProcessEvent, 1);
+
+    processEvent->eventType = QEMU_PROCESS_EVENT_MEMORY_FAILURE;
+    processEvent->data = mfp;
+    processEvent->vm = virObjectRef(vm);
+
+    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
+        virObjectUnref(vm);
+        qemuProcessEventFree(processEvent);
+    }
+
+    virObjectUnlock(vm);
+
+    return 0;
+}
+
+
 static qemuMonitorCallbacks monitorCallbacks = {
     .eofNotify = qemuProcessHandleMonitorEOF,
     .errorNotify = qemuProcessHandleMonitorError,
@@ -1910,6 +1937,7 @@ static qemuMonitorCallbacks monitorCallbacks = {
     .domainPRManagerStatusChanged = qemuProcessHandlePRManagerStatusChanged,
     .domainRdmaGidStatusChanged = qemuProcessHandleRdmaGidStatusChanged,
     .domainGuestCrashloaded = qemuProcessHandleGuestCrashloaded,
+    .domainMemoryFailure = qemuProcessHandleMemoryFailure,
 };
 
 static void
diff --git a/src/remote/remote_daemon_dispatch.c b/src/remote/remote_daemon_dispatch.c
index 32ebcd8f36..45a8ab3c05 100644
--- a/src/remote/remote_daemon_dispatch.c
+++ b/src/remote/remote_daemon_dispatch.c
@@ -1302,6 +1302,38 @@ remoteRelayDomainEventBlockThreshold(virConnectPtr conn,
 }
 
 
+static int
+remoteRelayDomainEventMemoryFailure(virConnectPtr conn,
+                                    virDomainPtr dom,
+                                    virDomainMemoryFailureRecipientType recipient,
+                                    virDomainMemoryFailureActionType action,
+                                    virDomainMemoryFailureFlagsPtr flags,
+                                    void *opaque)
+{
+    daemonClientEventCallbackPtr callback = opaque;
+    remote_domain_event_memory_failure_msg data;
+
+    if (callback->callbackID < 0 ||
+        !remoteRelayDomainEventCheckACL(callback->client, conn, dom))
+        return -1;
+
+    /* build return data */
+    memset(&data, 0, sizeof(data));
+    data.callbackID = callback->callbackID;
+    data.recipient = recipient;
+    data.action = action;
+    data.flags.action_required = flags->action_required;
+    data.flags.recursive = flags->recursive;
+    make_nonnull_domain(&data.dom, dom);
+
+    remoteDispatchObjectEventSend(callback->client, remoteProgram,
+                                  REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
+                                  (xdrproc_t)xdr_remote_domain_event_memory_failure_msg, &data);
+
+    return 0;
+}
+
+
 static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventLifecycle),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventReboot),
@@ -1328,6 +1360,7 @@ static virConnectDomainEventGenericCallback domainEventCallbacks[] = {
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventDeviceRemovalFailed),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMetadataChange),
     VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventBlockThreshold),
+    VIR_DOMAIN_EVENT_CALLBACK(remoteRelayDomainEventMemoryFailure),
 };
 
 G_STATIC_ASSERT(G_N_ELEMENTS(domainEventCallbacks) == VIR_DOMAIN_EVENT_ID_LAST);
diff --git a/src/remote/remote_driver.c b/src/remote/remote_driver.c
index d318224605..5b29161a22 100644
--- a/src/remote/remote_driver.c
+++ b/src/remote/remote_driver.c
@@ -405,6 +405,11 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog,
                                      void *evdata, void *opaque);
 
 static void
+remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog,
+                                    virNetClientPtr client,
+                                    void *evdata, void *opaque);
+
+static void
 remoteConnectNotifyEventConnectionClosed(virNetClientProgramPtr prog G_GNUC_UNUSED,
                                          virNetClientPtr client G_GNUC_UNUSED,
                                          void *evdata, void *opaque);
@@ -615,6 +620,10 @@ static virNetClientProgramEvent remoteEvents[] = {
       remoteDomainBuildEventBlockThreshold,
       sizeof(remote_domain_event_block_threshold_msg),
       (xdrproc_t)xdr_remote_domain_event_block_threshold_msg },
+    { REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE,
+      remoteDomainBuildEventMemoryFailure,
+      sizeof(remote_domain_event_memory_failure_msg),
+      (xdrproc_t)xdr_remote_domain_event_memory_failure_msg },
 };
 
 static void
@@ -5440,6 +5449,32 @@ remoteDomainBuildEventBlockThreshold(virNetClientProgramPtr prog G_GNUC_UNUSED,
 }
 
 
+static void
+remoteDomainBuildEventMemoryFailure(virNetClientProgramPtr prog G_GNUC_UNUSED,
+                                    virNetClientPtr client G_GNUC_UNUSED,
+                                    void *evdata, void *opaque)
+{
+    virConnectPtr conn = opaque;
+    remote_domain_event_memory_failure_msg *msg = evdata;
+    struct private_data *priv = conn->privateData;
+    virDomainPtr dom;
+    virDomainMemoryFailureFlags flags;
+    virObjectEventPtr event = NULL;
+
+    if (!(dom = get_nonnull_domain(conn, msg->dom)))
+        return;
+
+    flags.action_required = msg->flags.action_required;
+    flags.recursive = msg->flags.recursive;
+    event = virDomainEventMemoryFailureNewFromDom(dom, msg->recipient,
+                                                  msg->action, &flags);
+
+    virObjectUnref(dom);
+
+    virObjectEventStateQueueRemote(priv->eventState, event, msg->callbackID);
+}
+
+
 static int
 remoteStreamSend(virStreamPtr st,
                  const char *data,
diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x
index f4d6147676..a3fda24807 100644
--- a/src/remote/remote_protocol.x
+++ b/src/remote/remote_protocol.x
@@ -3469,6 +3469,19 @@ struct remote_domain_event_callback_metadata_change_msg {
     remote_string nsuri;
 };
 
+struct remote_domain_event_memory_failure_flags {
+    int action_required;
+    int recursive;
+};
+
+struct remote_domain_event_memory_failure_msg {
+    int callbackID;
+    remote_nonnull_domain dom;
+    int recipient;
+    int action;
+    remote_domain_event_memory_failure_flags flags;
+};
+
 struct remote_connect_secret_event_register_any_args {
     int eventID;
     remote_secret secret;
@@ -6668,5 +6681,11 @@ enum remote_procedure {
      * @priority: high
      * @acl: domain:read
      */
-    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422
+    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
+
+    /**
+     * @generate: both
+     * @acl: none
+     */
+    REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423
 };
diff --git a/src/remote_protocol-structs b/src/remote_protocol-structs
index bae0f0b545..1b74fb330d 100644
--- a/src/remote_protocol-structs
+++ b/src/remote_protocol-structs
@@ -2862,6 +2862,17 @@ struct remote_domain_event_callback_metadata_change_msg {
         int                        type;
         remote_string              nsuri;
 };
+struct remote_domain_event_memory_failure_flags {
+        int                        action_required;
+        int                        recursive;
+};
+struct remote_domain_event_memory_failure_msg {
+        int                        callbackID;
+        remote_nonnull_domain      dom;
+        int                        recipient;
+        int                        action;
+        remote_domain_event_memory_failure_flags flags;
+};
 struct remote_connect_secret_event_register_any_args {
         int                        eventID;
         remote_secret              secret;
@@ -3558,4 +3569,5 @@ enum remote_procedure {
         REMOTE_PROC_DOMAIN_AGENT_SET_RESPONSE_TIMEOUT = 420,
         REMOTE_PROC_DOMAIN_BACKUP_BEGIN = 421,
         REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
+        REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423,
 };
diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
index 8f11393197..7c6b19a54b 100644
--- a/tools/virsh-domain.c
+++ b/tools/virsh-domain.c
@@ -13590,6 +13590,41 @@ virshEventBlockThresholdPrint(virConnectPtr conn G_GNUC_UNUSED,
 }
 
 
+VIR_ENUM_DECL(virshEventMemoryFailureRecipientType);
+VIR_ENUM_IMPL(virshEventMemoryFailureRecipientType,
+              VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST,
+              N_("hypervisor"),
+              N_("guest"));
+
+VIR_ENUM_DECL(virshEventMemoryFailureActionType);
+VIR_ENUM_IMPL(virshEventMemoryFailureActionType,
+              VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST,
+              N_("ignore"),
+              N_("inject"),
+              N_("fatal"),
+              N_("reset"));
+
+static void
+virshEventMemoryFailurePrint(virConnectPtr conn G_GNUC_UNUSED,
+                             virDomainPtr dom,
+                             virDomainMemoryFailureRecipientType recipient,
+                             virDomainMemoryFailureActionType action,
+                             virDomainMemoryFailureFlagsPtr flags,
+                             void *opaque)
+{
+    g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+
+    virBufferAsprintf(&buf, _("event 'memory-failure' for domain %s:\n"
+                              "recipient: %s\naction: %s\nflags:\n"
+                              "\taction required: %d\n\trecursive: %d\n"),
+                      virDomainGetName(dom),
+                      UNKNOWNSTR(virshEventMemoryFailureRecipientTypeTypeToString(recipient)),
+                      UNKNOWNSTR(virshEventMemoryFailureActionTypeTypeToString(action)),
+                      !!(flags->action_required), !!(flags->recursive));
+    virshEventPrint(opaque, &buf);
+}
+
+
 virshDomainEventCallback virshDomainEventCallbacks[] = {
     { "lifecycle",
       VIR_DOMAIN_EVENT_CALLBACK(virshEventLifecyclePrint), },
@@ -13639,6 +13674,8 @@ virshDomainEventCallback virshDomainEventCallbacks[] = {
       VIR_DOMAIN_EVENT_CALLBACK(virshEventMetadataChangePrint), },
     { "block-threshold",
       VIR_DOMAIN_EVENT_CALLBACK(virshEventBlockThresholdPrint), },
+    { "memory-failure",
+      VIR_DOMAIN_EVENT_CALLBACK(virshEventMemoryFailurePrint), },
 };
 G_STATIC_ASSERT(VIR_DOMAIN_EVENT_ID_LAST == G_N_ELEMENTS(virshDomainEventCallbacks));
 
-- 
2.11.0

Re: [PATCH] libvirt: add memory failure event
Posted by Peter Krempa 4 years, 1 month ago
On Sat, Oct 10, 2020 at 14:56:43 +0800, zhenwei pi wrote:
> Since QEMU 5.2 (commit-77b285f7f6), QEMU supports 'memory failure'
> event, posts event to monitor if hitting a hardware memory error.

I've noticed that you've introduced this to qemu. Is there a possibility
that the event could return more data? Current design of the libvirt
event you are proposing is not extensible and thus if you expect to add
anythingin the future the design will need to change to e.g. use
virTypedParameter or something like that.

Additionally could you please elaborate how this event is supposed to be
used? I didn't really get it from the commit message of the qemu commit.

> Several changes in this patch:
>   Add a new event 'memory failure' for libvirt domain.
>   Implement memory failure event handling for QEMU from QMP.
>   Also implement virsh command callback functions.

See below. We don't like to see "all-in-one" patches.

> 
> Test case:
> ~# virsh event stretch --event memory-failure
> event 'memory-failure' for domain stretch:
> recipient: guest
> action: inject
> flags:
>         action required: 0
>         recursive: 0
> events received: 1

This doesn't say how you trigger the error for testing.


> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---

We require that changes are split into sensible smaller patches:

>  examples/c/misc/event-test.c        | 17 ++++++++
>  tools/virsh-domain.c                | 37 ++++++++++++++++

Virsh and client changes must be separata

>  include/libvirt/libvirt-domain.h    | 84 +++++++++++++++++++++++++++++++++++++
>  src/conf/domain_event.c             | 82 ++++++++++++++++++++++++++++++++++++
>  src/conf/domain_event.h             | 12 ++++++
>  src/remote/remote_daemon_dispatch.c | 33 +++++++++++++++
>  src/remote/remote_driver.c          | 35 ++++++++++++++++
>  src/remote/remote_protocol.x        | 21 +++++++++-
>  src/remote_protocol-structs         | 12 ++++++

Public API must be separate

>  src/libvirt_private.syms            |  2 +
>  src/qemu/qemu_domain.c              |  1 +
>  src/qemu/qemu_domain.h              |  1 +
>  src/qemu/qemu_driver.c              | 57 +++++++++++++++++++++++++
>  src/qemu/qemu_process.c             | 28 +++++++++++++

qemu impl then goes in separately.

>  src/qemu/qemu_monitor.c             | 21 +++++++++-
>  src/qemu/qemu_monitor.h             | 39 +++++++++++++++++
>  src/qemu/qemu_monitor_json.c        | 50 ++++++++++++++++++++++

Monitor usually is also separated, although not required.

>  17 files changed, 530 insertions(+), 2 deletions(-)

Please note that the tree _must_ compile after every single patch so
make sure that they are in sane order and contain appropriate changes.


A brief review follows, the patch is rather massive so I might overlook
some things:

> diff --git a/include/libvirt/libvirt-domain.h b/include/libvirt/libvirt-domain.h
> index 77f9116675..a9170d9a7e 100644
> --- a/include/libvirt/libvirt-domain.h
> +++ b/include/libvirt/libvirt-domain.h
> @@ -3196,6 +3196,66 @@ typedef enum {

[...]

> +typedef struct _virDomainMemoryFailureFlags virDomainMemoryFailureFlags;
> +typedef virDomainMemoryFailureFlags *virDomainMemoryFailureFlagsPtr;

Usually types ending in Flags are enums in our code base. Additionally
this type is not used externally. I'd go with ...FailureProps, or drop
it completely and pass the "flags" in as arguments of the callback as
you won't be able to extend it this way.

> +struct _virDomainMemoryFailureFlags {
> +    /* whether a memory failure event is action-required or action-optional
> +     * (e.g. a failure during memory scrub). */
> +    int action_required;
> +
> +    /* whether the failure occurred while the previous failure was still in
> +     * progress. */
> +    int recursive;

Note that public structs are not considered extensible in our API as it
would break remote protocol and the ABI of the library, so this can't be
used as means to extend the event in the future.


[...]

> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index 8ef812cd94..aecd947836 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -4292,6 +4292,59 @@ processGuestCrashloadedEvent(virQEMUDriverPtr driver,
>  }
>  
>  
> +static void
> +processMemoryFailureEvent(virQEMUDriverPtr driver,
> +                          virDomainObjPtr vm,
> +                          qemuMonitorEventMemoryFailurePtr mfp)
> +{
> +    virObjectEventPtr event = NULL;
> +    virDomainMemoryFailureRecipientType recipient;
> +    virDomainMemoryFailureActionType action;
> +    virDomainMemoryFailureFlags flags;
> +
> +    switch (mfp->recipient) {
> +    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_HYPERVISOR:
> +        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_HYPERVISOR;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_GUEST:
> +        recipient = VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_GUEST;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_RECIPIENT_LAST:
> +    default:
> +        virReportError(VIR_ERR_INVALID_ARG, "%s",
> +                       _("requested unknown memory failure recipient"));
> +        return;
> +    }
> +
> +    switch (mfp->action) {
> +    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_IGNORE:
> +        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_IGNORE;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_INJECT:
> +        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_INJECT;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_FATAL:
> +        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_FATAL;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_RESET:
> +        action = VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_RESET;
> +        break;
> +    case QEMU_MONITOR_MEMORY_FAILURE_ACTION_LAST:
> +    default:
> +        virReportError(VIR_ERR_INVALID_ARG, "%s",
> +                       _("requested unknown memory failure action"));
> +        return;
> +    }
> +
> +    flags.action_required = mfp->action_required;
> +    flags.recursive = mfp->recursive;
> +    event = virDomainEventMemoryFailureNewFromObj(vm, recipient, action,
> +                                                  &flags);
> +
> +    virObjectEventStateQueue(driver->domainEventState, event);

So all this function does is translation from the qemu monitor flags to
the public API flags. See below ...


> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index 8c991fefbb..189b789bb8 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c
> @@ -159,7 +159,6 @@ static int qemuMonitorOnceInit(void)
>  
>  VIR_ONCE_GLOBAL_INIT(qemuMonitor);
>  
> -

Irrelevalnt whitespace change.

>  VIR_ENUM_IMPL(qemuMonitorMigrationStatus,
>                QEMU_MONITOR_MIGRATION_STATUS_LAST,
>                "inactive", "setup",

[...]


> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
> index 6b5de29fdb..abcbab0f06 100644
> --- a/src/qemu/qemu_process.c
> +++ b/src/qemu/qemu_process.c
> @@ -1878,6 +1878,33 @@ qemuProcessHandleGuestCrashloaded(qemuMonitorPtr mon G_GNUC_UNUSED,
>  }
>  
>  
> +static int
> +qemuProcessHandleMemoryFailure(qemuMonitorPtr mon G_GNUC_UNUSED,
> +                               virDomainObjPtr vm,
> +                               qemuMonitorEventMemoryFailurePtr mfp,
> +                               void *opaque)
> +{
> +    virQEMUDriverPtr driver = opaque;
> +    struct qemuProcessEvent *processEvent;
> +
> +    virObjectLock(vm);
> +    processEvent = g_new0(struct qemuProcessEvent, 1);
> +
> +    processEvent->eventType = QEMU_PROCESS_EVENT_MEMORY_FAILURE;
> +    processEvent->data = mfp;
> +    processEvent->vm = virObjectRef(vm);
> +
> +    if (virThreadPoolSendJob(driver->workerPool, 0, processEvent) < 0) {
> +        virObjectUnref(vm);
> +        qemuProcessEventFree(processEvent);

Looking at the code for the function handling QEMU_PROCESS_EVENT_MEMORY_FAILURE
in another thread I didn't see anything that would require a domain job,
this means that handling this event via the processing thread actually
isn't needed and could be done directly here.


[...]

> diff --git a/src/remote/remote_protocol.x b/src/remote/remote_protocol.x
> index f4d6147676..a3fda24807 100644
> --- a/src/remote/remote_protocol.x
> +++ b/src/remote/remote_protocol.x
> @@ -3469,6 +3469,19 @@ struct remote_domain_event_callback_metadata_change_msg {
>      remote_string nsuri;
>  };
>  
> +struct remote_domain_event_memory_failure_flags {
> +    int action_required;
> +    int recursive;
> +};
> +
> +struct remote_domain_event_memory_failure_msg {
> +    int callbackID;
> +    remote_nonnull_domain dom;
> +    int recipient;
> +    int action;
> +    remote_domain_event_memory_failure_flags flags;

As noted above, none of this can be changed in the future.

> +};
> +
>  struct remote_connect_secret_event_register_any_args {
>      int eventID;
>      remote_secret secret;
> @@ -6668,5 +6681,11 @@ enum remote_procedure {
>       * @priority: high
>       * @acl: domain:read
>       */
> -    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422
> +    REMOTE_PROC_DOMAIN_BACKUP_GET_XML_DESC = 422,
> +
> +    /**
> +     * @generate: both
> +     * @acl: none
> +     */
> +    REMOTE_PROC_DOMAIN_EVENT_MEMORY_FAILURE = 423
>  };

[...]

> diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c
> index 8f11393197..7c6b19a54b 100644
> --- a/tools/virsh-domain.c
> +++ b/tools/virsh-domain.c
> @@ -13590,6 +13590,41 @@ virshEventBlockThresholdPrint(virConnectPtr conn G_GNUC_UNUSED,
>  }
>  
>  
> +VIR_ENUM_DECL(virshEventMemoryFailureRecipientType);
> +VIR_ENUM_IMPL(virshEventMemoryFailureRecipientType,
> +              VIR_DOMAIN_EVENT_MEMORY_FAILURE_RECIPIENT_LAST,
> +              N_("hypervisor"),
> +              N_("guest"));
> +
> +VIR_ENUM_DECL(virshEventMemoryFailureActionType);
> +VIR_ENUM_IMPL(virshEventMemoryFailureActionType,
> +              VIR_DOMAIN_EVENT_MEMORY_FAILURE_ACTION_LAST,
> +              N_("ignore"),
> +              N_("inject"),
> +              N_("fatal"),
> +              N_("reset"));
> +
> +static void
> +virshEventMemoryFailurePrint(virConnectPtr conn G_GNUC_UNUSED,
> +                             virDomainPtr dom,
> +                             virDomainMemoryFailureRecipientType recipient,
> +                             virDomainMemoryFailureActionType action,
> +                             virDomainMemoryFailureFlagsPtr flags,
> +                             void *opaque)
> +{
> +    g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
> +
> +    virBufferAsprintf(&buf, _("event 'memory-failure' for domain %s:\n"
> +                              "recipient: %s\naction: %s\nflags:\n"
> +                              "\taction required: %d\n\trecursive: %d\n"),
> +                      virDomainGetName(dom),
> +                      UNKNOWNSTR(virshEventMemoryFailureRecipientTypeTypeToString(recipient)),
> +                      UNKNOWNSTR(virshEventMemoryFailureActionTypeTypeToString(action)),
> +                      !!(flags->action_required), !!(flags->recursive));

Ideally split this into multiple virBufferAsprintf calls.