Kernel expects the platform to provide CPU registers after pausing
execution of the CPUs.
Currently only exporting the registers, used by Linux, for generating
the /proc/vmcore
Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>
---
hw/ppc/pnv_mpipl.c | 102 +++++++++++++++++++++++++++++++++++++
include/hw/ppc/pnv_mpipl.h | 62 ++++++++++++++++++++++
2 files changed, 164 insertions(+)
diff --git a/hw/ppc/pnv_mpipl.c b/hw/ppc/pnv_mpipl.c
index a4f7113a44fd..8b41938c2e87 100644
--- a/hw/ppc/pnv_mpipl.c
+++ b/hw/ppc/pnv_mpipl.c
@@ -8,6 +8,8 @@
#include "qemu/log.h"
#include "qemu/units.h"
#include "system/address-spaces.h"
+#include "system/cpus.h"
+#include "system/hw_accel.h"
#include "system/runstate.h"
#include "hw/ppc/pnv.h"
#include "hw/ppc/pnv_mpipl.h"
@@ -17,6 +19,8 @@
(pnv->mpipl_state.skiboot_base + MDST_TABLE_OFF)
#define MDDT_TABLE_RELOCATED \
(pnv->mpipl_state.skiboot_base + MDDT_TABLE_OFF)
+#define PROC_DUMP_RELOCATED \
+ (pnv->mpipl_state.skiboot_base + PROC_DUMP_AREA_OFF)
/*
* Preserve the memory regions as pointed by MDST table
@@ -164,9 +168,107 @@ static bool pnv_mpipl_preserve_mem(PnvMachineState *pnv)
return true;
}
+static void do_store_cpu_regs(CPUState *cpu, MpiplPreservedCPUState *state)
+{
+ CPUPPCState *env = cpu_env(cpu);
+ MpiplRegDataHdr *regs_hdr = &state->hdr;
+ MpiplRegEntry *reg_entries = state->reg_entries;
+ MpiplRegEntry *curr_reg_entry;
+ uint32_t num_saved_regs = 0;
+
+ cpu_synchronize_state(cpu);
+
+ regs_hdr->pir = cpu_to_be32(env->spr[SPR_PIR]);
+
+ /* QEMU CPUs are not in Power Saving Mode */
+ regs_hdr->core_state = 0xff;
+
+ regs_hdr->off_regentries = 0;
+ regs_hdr->num_regentries = cpu_to_be32(NUM_REGS_PER_CPU);
+
+ regs_hdr->alloc_size = cpu_to_be32(sizeof(MpiplRegEntry));
+ regs_hdr->act_size = cpu_to_be32(sizeof(MpiplRegEntry));
+
+#define REG_TYPE_GPR 0x1
+#define REG_TYPE_SPR 0x2
+#define REG_TYPE_TIMA 0x3
+
+/*
+ * ID numbers used by f/w while populating certain registers
+ *
+ * Copied these defines from the linux kernel
+ */
+#define REG_ID_NIP 0x7D0
+#define REG_ID_MSR 0x7D1
+#define REG_ID_CCR 0x7D2
+
+ curr_reg_entry = reg_entries;
+
+#define REG_ENTRY(type, num, val) \
+ do { \
+ curr_reg_entry->reg_type = cpu_to_be32(type); \
+ curr_reg_entry->reg_num = cpu_to_be32(num); \
+ curr_reg_entry->reg_val = cpu_to_be64(val); \
+ ++curr_reg_entry; \
+ ++num_saved_regs; \
+ } while (0)
+
+ /* Save the GPRs */
+ for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
+ REG_ENTRY(REG_TYPE_GPR, gpr_id, env->gpr[gpr_id]);
+ }
+
+ REG_ENTRY(REG_TYPE_SPR, REG_ID_NIP, env->nip);
+ REG_ENTRY(REG_TYPE_SPR, REG_ID_MSR, env->msr);
+
+ /*
+ * Ensure the number of registers saved match the number of
+ * registers per cpu
+ *
+ * This will help catch an error if in future a new register entry
+ * is added/removed while not modifying NUM_PER_CPU_REGS
+ */
+ assert(num_saved_regs == NUM_REGS_PER_CPU);
+}
+
+static void pnv_mpipl_preserve_cpu_state(PnvMachineState *pnv)
+{
+ MachineState *machine = MACHINE(pnv);
+ uint32_t num_cpus = machine->smp.cpus;
+ MpiplPreservedCPUState *state;
+ CPUState *cpu;
+
+ if (pnv->mpipl_state.cpu_states) {
+ /*
+ * CPU States might have been allocated from some past crash, free the
+ * memory to preven memory leak
+ */
+ g_free(pnv->mpipl_state.cpu_states);
+ pnv->mpipl_state.num_cpu_states = 0;
+ }
+
+ pnv->mpipl_state.cpu_states = g_malloc_n(num_cpus,
+ sizeof(MpiplPreservedCPUState));
+ pnv->mpipl_state.num_cpu_states = num_cpus;
+
+ state = pnv->mpipl_state.cpu_states;
+
+ /* Preserve the Processor Dump Area */
+ cpu_physical_memory_read(PROC_DUMP_RELOCATED, &pnv->mpipl_state.proc_area,
+ sizeof(MpiplProcDumpArea));
+
+ CPU_FOREACH(cpu) {
+ do_store_cpu_regs(cpu, state);
+ ++state;
+ }
+}
+
void do_mpipl_preserve(PnvMachineState *pnv)
{
+ pause_all_vcpus();
+
pnv_mpipl_preserve_mem(pnv);
+ pnv_mpipl_preserve_cpu_state(pnv);
/* Mark next boot as Memory-preserving boot */
pnv->mpipl_state.is_next_boot_mpipl = true;
diff --git a/include/hw/ppc/pnv_mpipl.h b/include/hw/ppc/pnv_mpipl.h
index ec173ba8268e..d85970bba039 100644
--- a/include/hw/ppc/pnv_mpipl.h
+++ b/include/hw/ppc/pnv_mpipl.h
@@ -16,6 +16,12 @@ typedef struct MdstTableEntry MdstTableEntry;
typedef struct MdrtTableEntry MdrtTableEntry;
typedef struct MpiplPreservedState MpiplPreservedState;
+typedef struct MpiplRegDataHdr MpiplRegDataHdr;
+typedef struct MpiplRegEntry MpiplRegEntry;
+typedef struct MpiplProcDumpArea MpiplProcDumpArea;
+typedef struct MpiplPreservedState MpiplPreservedState;
+typedef struct MpiplPreservedCPUState MpiplPreservedCPUState;
+
/* Following offsets are copied from skiboot source code */
/* Use 768 bytes for SPIRAH */
#define SPIRAH_OFF 0x00010000
@@ -46,6 +52,8 @@ typedef struct MpiplPreservedState MpiplPreservedState;
#define __packed __attribute__((packed))
+#define NUM_REGS_PER_CPU 34 /*(32 GPRs, NIP, MSR)*/
+
/*
* Memory Dump Source Table (MDST)
*
@@ -92,6 +100,55 @@ static_assert(MDST_MAX_ENTRIES == MDDT_MAX_ENTRIES,
static_assert(MDRT_MAX_ENTRIES >= MDST_MAX_ENTRIES,
"MDRT should support atleast having number of entries as in MDST");
+/*
+ * Processor Dump Area
+ *
+ * This contains the information needed for having processor
+ * state captured during a platform dump.
+ *
+ * As mentioned in HDAT, following the P9 specific format
+ */
+struct MpiplProcDumpArea {
+ uint32_t thread_size; /* Size of each thread register entry */
+#define PROC_DUMP_AREA_VERSION_P9 0x1 /* P9 format */
+ uint8_t version;
+ uint8_t reserved[11];
+ uint64_t alloc_addr; /* Destination memory to place register data */
+ uint32_t reserved2;
+ uint32_t alloc_size; /* Allocated size */
+ uint64_t dest_addr; /* Destination address */
+ uint32_t reserved3;
+ uint32_t act_size; /* Actual data size */
+} __packed;
+
+/*
+ * "Architected Register Data" in the HDAT spec
+ *
+ * Acts as a header to the register entries for a particular thread
+ */
+struct MpiplRegDataHdr {
+ uint32_t pir; /* PIR of thread */
+ uint8_t core_state; /* Stop state of the overall core */
+ uint8_t reserved[3];
+ uint32_t off_regentries; /* Offset to Register Entries Array */
+ uint32_t num_regentries; /* Number of Register Entries in Array */
+ uint32_t alloc_size; /* Allocated size for each Register Entry */
+ uint32_t act_size; /* Actual size for each Register Entry */
+} __packed;
+
+struct MpiplRegEntry {
+ uint32_t reg_type;
+ uint32_t reg_num;
+ uint64_t reg_val;
+} __packed;
+
+struct MpiplPreservedCPUState {
+ MpiplRegDataHdr hdr;
+
+ /* Length of 'reg_entries' is hdr.num_regentries */
+ MpiplRegEntry reg_entries[NUM_REGS_PER_CPU];
+};
+
/* Preserved state to be saved in PnvMachineState */
struct MpiplPreservedState {
/* skiboot_base will be valid only after OPAL sends relocated base to SBE */
@@ -100,6 +157,11 @@ struct MpiplPreservedState {
MdrtTableEntry *mdrt_table;
uint32_t num_mdrt_entries;
+
+ MpiplProcDumpArea proc_area;
+
+ MpiplPreservedCPUState *cpu_states;
+ uint32_t num_cpu_states;
};
#endif
--
2.52.0
On 06/12/25 11:26 am, Aditya Gupta wrote:
> Kernel expects the platform to provide CPU registers after pausing
> execution of the CPUs.
>
> Currently only exporting the registers, used by Linux, for generating
> the /proc/vmcore
>
> Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>
> ---
> hw/ppc/pnv_mpipl.c | 102 +++++++++++++++++++++++++++++++++++++
> include/hw/ppc/pnv_mpipl.h | 62 ++++++++++++++++++++++
> 2 files changed, 164 insertions(+)
>
> diff --git a/hw/ppc/pnv_mpipl.c b/hw/ppc/pnv_mpipl.c
> index a4f7113a44fd..8b41938c2e87 100644
> --- a/hw/ppc/pnv_mpipl.c
> +++ b/hw/ppc/pnv_mpipl.c
> @@ -8,6 +8,8 @@
> #include "qemu/log.h"
> #include "qemu/units.h"
> #include "system/address-spaces.h"
> +#include "system/cpus.h"
> +#include "system/hw_accel.h"
> #include "system/runstate.h"
> #include "hw/ppc/pnv.h"
> #include "hw/ppc/pnv_mpipl.h"
> @@ -17,6 +19,8 @@
> (pnv->mpipl_state.skiboot_base + MDST_TABLE_OFF)
> #define MDDT_TABLE_RELOCATED \
> (pnv->mpipl_state.skiboot_base + MDDT_TABLE_OFF)
> +#define PROC_DUMP_RELOCATED \
> + (pnv->mpipl_state.skiboot_base + PROC_DUMP_AREA_OFF)
>
> /*
> * Preserve the memory regions as pointed by MDST table
> @@ -164,9 +168,107 @@ static bool pnv_mpipl_preserve_mem(PnvMachineState *pnv)
> return true;
> }
>
> +static void do_store_cpu_regs(CPUState *cpu, MpiplPreservedCPUState *state)
> +{
> + CPUPPCState *env = cpu_env(cpu);
> + MpiplRegDataHdr *regs_hdr = &state->hdr;
> + MpiplRegEntry *reg_entries = state->reg_entries;
> + MpiplRegEntry *curr_reg_entry;
> + uint32_t num_saved_regs = 0;
> +
> + cpu_synchronize_state(cpu);
> +
> + regs_hdr->pir = cpu_to_be32(env->spr[SPR_PIR]);
> +
> + /* QEMU CPUs are not in Power Saving Mode */
> + regs_hdr->core_state = 0xff;
> +
> + regs_hdr->off_regentries = 0;
> + regs_hdr->num_regentries = cpu_to_be32(NUM_REGS_PER_CPU);
> +
> + regs_hdr->alloc_size = cpu_to_be32(sizeof(MpiplRegEntry));
> + regs_hdr->act_size = cpu_to_be32(sizeof(MpiplRegEntry));
> +
> +#define REG_TYPE_GPR 0x1
> +#define REG_TYPE_SPR 0x2
> +#define REG_TYPE_TIMA 0x3
> +
> +/*
> + * ID numbers used by f/w while populating certain registers
> + *
> + * Copied these defines from the linux kernel
> + */
> +#define REG_ID_NIP 0x7D0
> +#define REG_ID_MSR 0x7D1
> +#define REG_ID_CCR 0x7D2
> +
> + curr_reg_entry = reg_entries;
> +
> +#define REG_ENTRY(type, num, val) \
> + do { \
> + curr_reg_entry->reg_type = cpu_to_be32(type); \
> + curr_reg_entry->reg_num = cpu_to_be32(num); \
> + curr_reg_entry->reg_val = cpu_to_be64(val); \
> + ++curr_reg_entry; \
> + ++num_saved_regs; \
> + } while (0)
> +
> + /* Save the GPRs */
> + for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
> + REG_ENTRY(REG_TYPE_GPR, gpr_id, env->gpr[gpr_id]);
> + }
> +
> + REG_ENTRY(REG_TYPE_SPR, REG_ID_NIP, env->nip);
> + REG_ENTRY(REG_TYPE_SPR, REG_ID_MSR, env->msr);
> +
> + /*
> + * Ensure the number of registers saved match the number of
> + * registers per cpu
> + *
> + * This will help catch an error if in future a new register entry
> + * is added/removed while not modifying NUM_PER_CPU_REGS
> + */
> + assert(num_saved_regs == NUM_REGS_PER_CPU);
> +}
> +
> +static void pnv_mpipl_preserve_cpu_state(PnvMachineState *pnv)
> +{
> + MachineState *machine = MACHINE(pnv);
> + uint32_t num_cpus = machine->smp.cpus;
> + MpiplPreservedCPUState *state;
> + CPUState *cpu;
> +
> + if (pnv->mpipl_state.cpu_states) {
> + /*
> + * CPU States might have been allocated from some past crash, free the
> + * memory to preven memory leak
> + */
> + g_free(pnv->mpipl_state.cpu_states);
> + pnv->mpipl_state.num_cpu_states = 0;
> + }
> +
> + pnv->mpipl_state.cpu_states = g_malloc_n(num_cpus,
> + sizeof(MpiplPreservedCPUState));
> + pnv->mpipl_state.num_cpu_states = num_cpus;
> +
> + state = pnv->mpipl_state.cpu_states;
> +
> + /* Preserve the Processor Dump Area */
> + cpu_physical_memory_read(PROC_DUMP_RELOCATED, &pnv->mpipl_state.proc_area,
> + sizeof(MpiplProcDumpArea));
> +
> + CPU_FOREACH(cpu) {
> + do_store_cpu_regs(cpu, state);
> + ++state;
> + }
> +}
> +
> void do_mpipl_preserve(PnvMachineState *pnv)
> {
> + pause_all_vcpus();
> +
> pnv_mpipl_preserve_mem(pnv);
> + pnv_mpipl_preserve_cpu_state(pnv);
>
> /* Mark next boot as Memory-preserving boot */
> pnv->mpipl_state.is_next_boot_mpipl = true;
> diff --git a/include/hw/ppc/pnv_mpipl.h b/include/hw/ppc/pnv_mpipl.h
> index ec173ba8268e..d85970bba039 100644
> --- a/include/hw/ppc/pnv_mpipl.h
> +++ b/include/hw/ppc/pnv_mpipl.h
> @@ -16,6 +16,12 @@ typedef struct MdstTableEntry MdstTableEntry;
> typedef struct MdrtTableEntry MdrtTableEntry;
> typedef struct MpiplPreservedState MpiplPreservedState;
>
> +typedef struct MpiplRegDataHdr MpiplRegDataHdr;
> +typedef struct MpiplRegEntry MpiplRegEntry;
> +typedef struct MpiplProcDumpArea MpiplProcDumpArea;
> +typedef struct MpiplPreservedState MpiplPreservedState;
> +typedef struct MpiplPreservedCPUState MpiplPreservedCPUState;
> +
> /* Following offsets are copied from skiboot source code */
> /* Use 768 bytes for SPIRAH */
> #define SPIRAH_OFF 0x00010000
> @@ -46,6 +52,8 @@ typedef struct MpiplPreservedState MpiplPreservedState;
>
> #define __packed __attribute__((packed))
>
> +#define NUM_REGS_PER_CPU 34 /*(32 GPRs, NIP, MSR)*/
> +
Any limitation with saving the other SPRs? At least, LR is one other
relevant SPR that needs to be saved for some meaningful context..
- Hari
On 19/01/26 1:17 pm, Hari Bathini wrote:
>
>
> On 06/12/25 11:26 am, Aditya Gupta wrote:
>> Kernel expects the platform to provide CPU registers after pausing
>> execution of the CPUs.
>>
>> Currently only exporting the registers, used by Linux, for generating
>> the /proc/vmcore
>>
>> Signed-off-by: Aditya Gupta <adityag@linux.ibm.com>
>> ---
>> hw/ppc/pnv_mpipl.c | 102 +++++++++++++++++++++++++++++++++++++
>> include/hw/ppc/pnv_mpipl.h | 62 ++++++++++++++++++++++
>> 2 files changed, 164 insertions(+)
>>
>> diff --git a/hw/ppc/pnv_mpipl.c b/hw/ppc/pnv_mpipl.c
>> index a4f7113a44fd..8b41938c2e87 100644
>> --- a/hw/ppc/pnv_mpipl.c
>> +++ b/hw/ppc/pnv_mpipl.c
>> @@ -8,6 +8,8 @@
>> #include "qemu/log.h"
>> #include "qemu/units.h"
>> #include "system/address-spaces.h"
>> +#include "system/cpus.h"
>> +#include "system/hw_accel.h"
>> #include "system/runstate.h"
>> #include "hw/ppc/pnv.h"
>> #include "hw/ppc/pnv_mpipl.h"
>> @@ -17,6 +19,8 @@
>> (pnv->mpipl_state.skiboot_base + MDST_TABLE_OFF)
>> #define MDDT_TABLE_RELOCATED \
>> (pnv->mpipl_state.skiboot_base + MDDT_TABLE_OFF)
>> +#define PROC_DUMP_RELOCATED \
>> + (pnv->mpipl_state.skiboot_base + PROC_DUMP_AREA_OFF)
>> /*
>> * Preserve the memory regions as pointed by MDST table
>> @@ -164,9 +168,107 @@ static bool
>> pnv_mpipl_preserve_mem(PnvMachineState *pnv)
>> return true;
>> }
>> +static void do_store_cpu_regs(CPUState *cpu, MpiplPreservedCPUState
>> *state)
>> +{
>> + CPUPPCState *env = cpu_env(cpu);
>> + MpiplRegDataHdr *regs_hdr = &state->hdr;
>> + MpiplRegEntry *reg_entries = state->reg_entries;
>> + MpiplRegEntry *curr_reg_entry;
>> + uint32_t num_saved_regs = 0;
>> +
>> + cpu_synchronize_state(cpu);
>> +
>> + regs_hdr->pir = cpu_to_be32(env->spr[SPR_PIR]);
>> +
>> + /* QEMU CPUs are not in Power Saving Mode */
>> + regs_hdr->core_state = 0xff;
>> +
>> + regs_hdr->off_regentries = 0;
>> + regs_hdr->num_regentries = cpu_to_be32(NUM_REGS_PER_CPU);
>> +
>> + regs_hdr->alloc_size = cpu_to_be32(sizeof(MpiplRegEntry));
>> + regs_hdr->act_size = cpu_to_be32(sizeof(MpiplRegEntry));
>> +
>> +#define REG_TYPE_GPR 0x1
>> +#define REG_TYPE_SPR 0x2
>> +#define REG_TYPE_TIMA 0x3
>> +
>> +/*
>> + * ID numbers used by f/w while populating certain registers
>> + *
>> + * Copied these defines from the linux kernel
>> + */
>> +#define REG_ID_NIP 0x7D0
>> +#define REG_ID_MSR 0x7D1
>> +#define REG_ID_CCR 0x7D2
>> +
>> + curr_reg_entry = reg_entries;
>> +
>> +#define REG_ENTRY(type, num, val) \
>> + do { \
>> + curr_reg_entry->reg_type = cpu_to_be32(type); \
>> + curr_reg_entry->reg_num = cpu_to_be32(num); \
>> + curr_reg_entry->reg_val = cpu_to_be64(val); \
>> + ++curr_reg_entry; \
>> + ++num_saved_regs; \
>> + } while (0)
>> +
>> + /* Save the GPRs */
>> + for (int gpr_id = 0; gpr_id < 32; ++gpr_id) {
>> + REG_ENTRY(REG_TYPE_GPR, gpr_id, env->gpr[gpr_id]);
>> + }
>> +
>> + REG_ENTRY(REG_TYPE_SPR, REG_ID_NIP, env->nip);
>> + REG_ENTRY(REG_TYPE_SPR, REG_ID_MSR, env->msr);
>> +
>> + /*
>> + * Ensure the number of registers saved match the number of
>> + * registers per cpu
>> + *
>> + * This will help catch an error if in future a new register entry
>> + * is added/removed while not modifying NUM_PER_CPU_REGS
>> + */
>> + assert(num_saved_regs == NUM_REGS_PER_CPU);
>> +}
>> +
>> +static void pnv_mpipl_preserve_cpu_state(PnvMachineState *pnv)
>> +{
>> + MachineState *machine = MACHINE(pnv);
>> + uint32_t num_cpus = machine->smp.cpus;
>> + MpiplPreservedCPUState *state;
>> + CPUState *cpu;
>> +
>> + if (pnv->mpipl_state.cpu_states) {
>> + /*
>> + * CPU States might have been allocated from some past crash,
>> free the
>> + * memory to preven memory leak
>> + */
>> + g_free(pnv->mpipl_state.cpu_states);
>> + pnv->mpipl_state.num_cpu_states = 0;
>> + }
>> +
>> + pnv->mpipl_state.cpu_states = g_malloc_n(num_cpus,
>> + sizeof(MpiplPreservedCPUState));
>> + pnv->mpipl_state.num_cpu_states = num_cpus;
>> +
>> + state = pnv->mpipl_state.cpu_states;
>> +
>> + /* Preserve the Processor Dump Area */
>> + cpu_physical_memory_read(PROC_DUMP_RELOCATED, &pnv-
>> >mpipl_state.proc_area,
>> + sizeof(MpiplProcDumpArea));
>> +
>> + CPU_FOREACH(cpu) {
>> + do_store_cpu_regs(cpu, state);
>> + ++state;
>> + }
>> +}
>> +
>> void do_mpipl_preserve(PnvMachineState *pnv)
>> {
>> + pause_all_vcpus();
>> +
>> pnv_mpipl_preserve_mem(pnv);
>> + pnv_mpipl_preserve_cpu_state(pnv);
>> /* Mark next boot as Memory-preserving boot */
>> pnv->mpipl_state.is_next_boot_mpipl = true;
>> diff --git a/include/hw/ppc/pnv_mpipl.h b/include/hw/ppc/pnv_mpipl.h
>> index ec173ba8268e..d85970bba039 100644
>> --- a/include/hw/ppc/pnv_mpipl.h
>> +++ b/include/hw/ppc/pnv_mpipl.h
>> @@ -16,6 +16,12 @@ typedef struct MdstTableEntry MdstTableEntry;
>> typedef struct MdrtTableEntry MdrtTableEntry;
>> typedef struct MpiplPreservedState MpiplPreservedState;
>> +typedef struct MpiplRegDataHdr MpiplRegDataHdr;
>> +typedef struct MpiplRegEntry MpiplRegEntry;
>> +typedef struct MpiplProcDumpArea MpiplProcDumpArea;
>> +typedef struct MpiplPreservedState MpiplPreservedState;
btw, the above typedef is already available from a previous patch..
>> +typedef struct MpiplPreservedCPUState MpiplPreservedCPUState;
>> +
>> /* Following offsets are copied from skiboot source code */
>> /* Use 768 bytes for SPIRAH */
>> #define SPIRAH_OFF 0x00010000
>> @@ -46,6 +52,8 @@ typedef struct MpiplPreservedState MpiplPreservedState;
>> #define __packed __attribute__((packed))
>
>> +#define NUM_REGS_PER_CPU 34 /*(32 GPRs, NIP, MSR)*/
>> +
>
> Any limitation with saving the other SPRs? At least, LR is one other
> relevant SPR that needs to be saved for some meaningful context..
- Hari
© 2016 - 2026 Red Hat, Inc.