[PULL 09/13] replay: introduce a central report point for sync errors

Alex Bennée posted 13 patches 10 months, 3 weeks ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>, John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Wainer dos Santos Moschetta <wainersm@redhat.com>, Beraldo Leal <bleal@redhat.com>
[PULL 09/13] replay: introduce a central report point for sync errors
Posted by Alex Bennée 10 months, 3 weeks ago
Figuring out why replay has failed is tricky at the best of times.
Lets centralise the reporting of a replay sync error and add a little
bit of extra information to help with debugging.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20231211091346.14616-10-alex.bennee@linaro.org>

diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 1bc8fd50868..75249b76936 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -25,7 +25,12 @@ typedef enum ReplayAsyncEventKind {
     REPLAY_ASYNC_COUNT
 } ReplayAsyncEventKind;
 
-/* Any changes to order/number of events will need to bump REPLAY_VERSION */
+/*
+ * Any changes to order/number of events will need to bump
+ * REPLAY_VERSION to prevent confusion with old logs. Also don't
+ * forget to update replay_event_name() to make your debugging life
+ * easier.
+ */
 enum ReplayEvents {
     /* for instruction event */
     EVENT_INSTRUCTION,
@@ -74,6 +79,7 @@ enum ReplayEvents {
  * @cached_clock: Cached clocks values
  * @current_icount: number of processed instructions
  * @instruction_count: number of instructions until next event
+ * @current_event: current event index
  * @data_kind: current event
  * @has_unread_data: true if event not yet processed
  * @file_offset: offset into replay log at replay snapshot
@@ -84,6 +90,7 @@ typedef struct ReplayState {
     int64_t cached_clock[REPLAY_CLOCK_COUNT];
     uint64_t current_icount;
     int instruction_count;
+    unsigned int current_event;
     unsigned int data_kind;
     bool has_unread_data;
     uint64_t file_offset;
@@ -188,6 +195,16 @@ void replay_event_net_save(void *opaque);
 /*! Reads network from the file. */
 void *replay_event_net_load(void);
 
+/* Diagnostics */
+
+/**
+ * replay_sync_error(): report sync error and exit
+ *
+ * When we reach an error condition we want to report it centrally so
+ * we can also dump some useful information into the logs.
+ */
+G_NORETURN void replay_sync_error(const char *error);
+
 /* VMState-related functions */
 
 /* Registers replay VMState.
diff --git a/replay/replay-internal.c b/replay/replay-internal.c
index 634025096ef..654b99cfb55 100644
--- a/replay/replay-internal.c
+++ b/replay/replay-internal.c
@@ -175,6 +175,7 @@ void replay_fetch_data_kind(void)
     if (replay_file) {
         if (!replay_state.has_unread_data) {
             replay_state.data_kind = replay_get_byte();
+            replay_state.current_event++;
             if (replay_state.data_kind == EVENT_INSTRUCTION) {
                 replay_state.instruction_count = replay_get_dword();
             }
diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
index d4f6cb7cda8..ccb4d89dda7 100644
--- a/replay/replay-snapshot.c
+++ b/replay/replay-snapshot.c
@@ -55,6 +55,7 @@ static const VMStateDescription vmstate_replay = {
         VMSTATE_INT64_ARRAY(cached_clock, ReplayState, REPLAY_CLOCK_COUNT),
         VMSTATE_UINT64(current_icount, ReplayState),
         VMSTATE_INT32(instruction_count, ReplayState),
+        VMSTATE_UINT32(current_event, ReplayState),
         VMSTATE_UINT32(data_kind, ReplayState),
         VMSTATE_BOOL(has_unread_data, ReplayState),
         VMSTATE_UINT64(file_offset, ReplayState),
diff --git a/replay/replay.c b/replay/replay.c
index 0f7d766efe8..ff197f436bf 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -38,6 +38,107 @@ static GSList *replay_blockers;
 uint64_t replay_break_icount = -1ULL;
 QEMUTimer *replay_break_timer;
 
+/* Pretty print event names */
+
+static const char *replay_async_event_name(ReplayAsyncEventKind event)
+{
+    switch (event) {
+#define ASYNC_EVENT(_x) case REPLAY_ASYNC_EVENT_ ## _x: return "ASYNC_EVENT_"#_x
+        ASYNC_EVENT(BH);
+        ASYNC_EVENT(BH_ONESHOT);
+        ASYNC_EVENT(INPUT);
+        ASYNC_EVENT(INPUT_SYNC);
+        ASYNC_EVENT(CHAR_READ);
+        ASYNC_EVENT(BLOCK);
+        ASYNC_EVENT(NET);
+#undef ASYNC_EVENT
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const char *replay_clock_event_name(ReplayClockKind clock)
+{
+    switch (clock) {
+#define CLOCK_EVENT(_x) case REPLAY_CLOCK_ ## _x: return "CLOCK_" #_x
+        CLOCK_EVENT(HOST);
+        CLOCK_EVENT(VIRTUAL_RT);
+#undef CLOCK_EVENT
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/* Pretty print shutdown event names */
+static const char *replay_shutdown_event_name(ShutdownCause cause)
+{
+    switch (cause) {
+#define SHUTDOWN_EVENT(_x) case SHUTDOWN_CAUSE_ ## _x: return "SHUTDOWN_CAUSE_" #_x
+        SHUTDOWN_EVENT(NONE);
+        SHUTDOWN_EVENT(HOST_ERROR);
+        SHUTDOWN_EVENT(HOST_QMP_QUIT);
+        SHUTDOWN_EVENT(HOST_QMP_SYSTEM_RESET);
+        SHUTDOWN_EVENT(HOST_SIGNAL);
+        SHUTDOWN_EVENT(HOST_UI);
+        SHUTDOWN_EVENT(GUEST_SHUTDOWN);
+        SHUTDOWN_EVENT(GUEST_RESET);
+        SHUTDOWN_EVENT(GUEST_PANIC);
+        SHUTDOWN_EVENT(SUBSYSTEM_RESET);
+        SHUTDOWN_EVENT(SNAPSHOT_LOAD);
+#undef SHUTDOWN_EVENT
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const char *replay_checkpoint_event_name(enum ReplayCheckpoint checkpoint)
+{
+    switch (checkpoint) {
+#define CHECKPOINT_EVENT(_x) case CHECKPOINT_ ## _x: return "CHECKPOINT_" #_x
+        CHECKPOINT_EVENT(CLOCK_WARP_START);
+        CHECKPOINT_EVENT(CLOCK_WARP_ACCOUNT);
+        CHECKPOINT_EVENT(RESET_REQUESTED);
+        CHECKPOINT_EVENT(SUSPEND_REQUESTED);
+        CHECKPOINT_EVENT(CLOCK_VIRTUAL);
+        CHECKPOINT_EVENT(CLOCK_HOST);
+        CHECKPOINT_EVENT(CLOCK_VIRTUAL_RT);
+        CHECKPOINT_EVENT(INIT);
+        CHECKPOINT_EVENT(RESET);
+#undef CHECKPOINT_EVENT
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static const char *replay_event_name(enum ReplayEvents event)
+{
+    /* First deal with the simple ones */
+    switch (event) {
+#define EVENT(_x) case EVENT_ ## _x: return "EVENT_"#_x
+        EVENT(INSTRUCTION);
+        EVENT(INTERRUPT);
+        EVENT(EXCEPTION);
+        EVENT(CHAR_WRITE);
+        EVENT(CHAR_READ_ALL);
+        EVENT(AUDIO_OUT);
+        EVENT(AUDIO_IN);
+        EVENT(RANDOM);
+#undef EVENT
+    default:
+        if (event >= EVENT_ASYNC && event <= EVENT_ASYNC_LAST) {
+            return replay_async_event_name(event - EVENT_ASYNC);
+        } else if (event >= EVENT_SHUTDOWN && event <= EVENT_SHUTDOWN_LAST) {
+            return replay_shutdown_event_name(event - EVENT_SHUTDOWN);
+        } else if (event >= EVENT_CLOCK && event <= EVENT_CLOCK_LAST) {
+            return replay_clock_event_name(event - EVENT_CLOCK);
+        } else if (event >= EVENT_CHECKPOINT && event <= EVENT_CHECKPOINT_LAST) {
+            return replay_checkpoint_event_name(event - EVENT_CHECKPOINT);
+        }
+    }
+
+    g_assert_not_reached();
+}
+
 bool replay_next_event_is(int event)
 {
     bool res = false;
@@ -226,6 +327,15 @@ bool replay_has_event(void)
     return res;
 }
 
+G_NORETURN void replay_sync_error(const char *error)
+{
+    error_report("%s (insn total %"PRId64"/%d left, event %d is %s)", error,
+                 replay_state.current_icount, replay_state.instruction_count,
+                 replay_state.current_event,
+                 replay_event_name(replay_state.data_kind));
+    abort();
+}
+
 static void replay_enable(const char *fname, int mode)
 {
     const char *fmode = NULL;
@@ -258,6 +368,7 @@ static void replay_enable(const char *fname, int mode)
     replay_state.data_kind = -1;
     replay_state.instruction_count = 0;
     replay_state.current_icount = 0;
+    replay_state.current_event = 0;
     replay_state.has_unread_data = 0;
 
     /* skip file header for RECORD and check it for PLAY */
-- 
2.39.2