[v9] target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

[Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by Aravinda Prasad 6 years, 5 months ago

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |    5 +
 hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    4 +
 3 files changed, 245 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6b6c962..c97f6a6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
         error_report("Could not get size of LPAR rtas '%s'", filename);
         exit(1);
     }
+
+    /* Resize blob to accommodate error log. */
+    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
+    spapr->rtas_size = RTAS_ERROR_LOG_MAX;
+
     spapr->rtas_blob = g_malloc(spapr->rtas_size);
     if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
         error_report("Could not load LPAR rtas '%s'", filename);
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index a18446b..573c0b7 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -212,6 +212,106 @@ struct hp_extended_log {
     struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
+    struct rtas_event_log_v6_section_header hdr;
+    uint32_t fru_id;
+    uint32_t proc_id;
+    uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE                           0
+#define RTAS_LOG_V6_MC_TYPE_SLB                          1
+#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
+#define RTAS_LOG_V6_MC_TYPE_TLB                          4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
+    uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
+#define RTAS_LOG_V6_MC_UE_IFETCH                         1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
+#define RTAS_LOG_V6_MC_SLB_PARITY                        0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
+#define RTAS_LOG_V6_MC_TLB_PARITY                        1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
+    uint8_t reserved_1[6];
+    uint64_t effective_address;
+    uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+    unsigned long srr1_mask;
+    unsigned long srr1_value;
+    bool nip_valid; /* nip is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0, 0, 0, 0, 0, 0 } };
+
+struct MC_derror_table {
+    unsigned long dsisr_value;
+    bool dar_valid; /* dar is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0, false, 0, 0, 0, 0 } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
 typedef enum EventClass {
     EVENT_CLASS_INTERNAL_ERRORS     = 0,
     EVENT_CLASS_EPOW                = 1,
@@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
                             RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+                                        struct mc_extended_log *ext_elog)
+{
+    int i;
+    CPUPPCState *env = &cpu->env;
+    uint32_t summary;
+    uint64_t dsisr = env->spr[SPR_DSISR];
+
+    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+    if (recovered) {
+        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+    } else {
+        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+    }
+
+    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
+            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_derror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+            if (mc_derror_table[i].dar_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+            }
+
+            summary |= mc_derror_table[i].initiator
+                        | mc_derror_table[i].severity;
+
+            return summary;
+        }
+    } else {
+        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
+            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+                    mc_ierror_table[i].srr1_value) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+            if (mc_ierror_table[i].nip_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+            }
+
+            summary |= mc_ierror_table[i].initiator
+                        | mc_ierror_table[i].severity;
+
+            return summary;
+        }
+    }
+
+    summary |= RTAS_LOG_INITIATOR_CPU;
+    return summary;
+}
+
+static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    CPUState *cs = CPU(cpu);
+    uint64_t rtas_addr;
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    target_ulong r3, msr = 0;
+    struct rtas_error_log log;
+    struct mc_extended_log *ext_elog;
+    uint32_t summary;
+
+    /*
+     * Properly set bits in MSR before we invoke the handler.
+     * SRR0/1, DAR and DSISR are properly set by KVM
+     */
+    if (!(*pcc->interrupts_big_endian)(cpu)) {
+        msr |= (1ULL << MSR_LE);
+    }
+
+    if (env->msr & (1ULL << MSR_SF)) {
+        msr |= (1ULL << MSR_SF);
+    }
+
+    msr |= (1ULL << MSR_ME);
+
+    if (spapr->guest_machine_check_addr == -1) {
+        /*
+         * This implies that we have hit a machine check between system
+         * reset and "ibm,nmi-register". Fall back to the old machine
+         * check behavior in such cases.
+         */
+        env->spr[SPR_SRR0] = env->nip;
+        env->spr[SPR_SRR1] = env->msr;
+        env->msr = msr;
+        env->nip = 0x200;
+        return;
+    }
+
+    ext_elog = g_malloc0(sizeof(*ext_elog));
+    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+    log.summary = cpu_to_be32(summary);
+    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+    /* r3 should be in BE always */
+    r3 = cpu_to_be64(env->gpr[3]);
+    env->msr = msr;
+
+    spapr_init_v6hdr(&ext_elog->v6hdr);
+    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+    ext_elog->mc.hdr.section_length =
+                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+    ext_elog->mc.hdr.section_version = 1;
+
+    /* get rtas addr from fdt */
+    rtas_addr = spapr_get_rtas_addr();
+    if (!rtas_addr) {
+        /* Unable to fetch rtas_addr. Hence reset the guest */
+        ppc_cpu_do_system_reset(cs);
+    }
+
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
+                              sizeof(r3));
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
+                              &log, sizeof(log));
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
+                              sizeof(log), ext_elog,
+                              sizeof(*ext_elog));
+
+    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+    env->nip = spapr->guest_machine_check_addr;
+
+    g_free(ext_elog);
+}
+
 void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
@@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
         }
     }
     spapr->mc_status = cpu->vcpu_id;
+
+    spapr_mce_dispatch_elog(cpu, recovered);
+
+    return;
 }
 
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index fc3a776..c717ab2 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
 
 #define RTAS_ERROR_LOG_MAX      2048
 
+/* Offset from rtas-base where error log is placed */
+#define RTAS_ERROR_LOG_OFFSET       0x30
+
 #define RTAS_EVENT_SCAN_RATE    1
 
 /* This helper should be used to encode interrupt specifiers when the related
@@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
                       uint64_t pte0, uint64_t pte1);
 void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
+ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by Greg Kurz 6 years, 5 months ago

On Wed, 29 May 2019 11:10:40 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> Upon a machine check exception (MCE) in a guest address space,
> KVM causes a guest exit to enable QEMU to build and pass the
> error to the guest in the PAPR defined rtas error log format.
> 
> This patch builds the rtas error log, copies it to the rtas_addr
> and then invokes the guest registered machine check handler. The
> handler in the guest takes suitable action(s) depending on the type
> and criticality of the error. For example, if an error is
> unrecoverable memory corruption in an application inside the
> guest, then the guest kernel sends a SIGBUS to the application.
> For recoverable errors, the guest performs recovery actions and
> logs the error.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |    5 +
>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    4 +
>  3 files changed, 245 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6b6c962..c97f6a6 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
>          error_report("Could not get size of LPAR rtas '%s'", filename);
>          exit(1);
>      }
> +
> +    /* Resize blob to accommodate error log. */
> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);

I don't see the point of this assertion... especially with the assignment
below.

> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;

As requested by David, this should only be done when the spapr cap is set,
so that 4.0 machine types and older continue to use the current size.

> +
>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>          error_report("Could not load LPAR rtas '%s'", filename);
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index a18446b..573c0b7 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -212,6 +212,106 @@ struct hp_extended_log {
>      struct rtas_event_log_v6_hp hp;
>  } QEMU_PACKED;
>  
> +struct rtas_event_log_v6_mc {
> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
> +    struct rtas_event_log_v6_section_header hdr;
> +    uint32_t fru_id;
> +    uint32_t proc_id;
> +    uint8_t error_type;
> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
> +    uint8_t sub_err_type;
> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> +    uint8_t reserved_1[6];
> +    uint64_t effective_address;
> +    uint64_t logical_address;
> +} QEMU_PACKED;
> +
> +struct mc_extended_log {
> +    struct rtas_event_log_v6 v6hdr;
> +    struct rtas_event_log_v6_mc mc;
> +} QEMU_PACKED;
> +
> +struct MC_ierror_table {
> +    unsigned long srr1_mask;
> +    unsigned long srr1_value;
> +    bool nip_valid; /* nip is a valid indicator of faulting address */
> +    uint8_t error_type;
> +    uint8_t error_subtype;
> +    unsigned int initiator;
> +    unsigned int severity;
> +};
> +
> +static const struct MC_ierror_table mc_ierror_table[] = {
> +{ 0x00000000081c0000, 0x0000000000040000, true,
> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x0000000000080000, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x0000000000100000, true,
> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x0000000000140000, true,
> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000000081c0000, 0x0000000000180000, true,
> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0, 0, 0, 0, 0, 0 } };
> +
> +struct MC_derror_table {
> +    unsigned long dsisr_value;
> +    bool dar_valid; /* dar is a valid indicator of faulting address */
> +    uint8_t error_type;
> +    uint8_t error_subtype;
> +    unsigned int initiator;
> +    unsigned int severity;
> +};
> +
> +static const struct MC_derror_table mc_derror_table[] = {
> +{ 0x00008000, false,
> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00004000, true,
> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000800, true,
> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000400, true,
> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000080, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0x00000100, true,
> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> +{ 0, false, 0, 0, 0, 0 } };
> +
> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
> +
>  typedef enum EventClass {
>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
>      EVENT_CLASS_EPOW                = 1,
> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
> +                                        struct mc_extended_log *ext_elog)
> +{
> +    int i;
> +    CPUPPCState *env = &cpu->env;
> +    uint32_t summary;
> +    uint64_t dsisr = env->spr[SPR_DSISR];
> +
> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
> +    if (recovered) {
> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
> +    } else {
> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
> +    }
> +
> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
> +                continue;
> +            }
> +
> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
> +            if (mc_derror_table[i].dar_valid) {
> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> +            }
> +
> +            summary |= mc_derror_table[i].initiator
> +                        | mc_derror_table[i].severity;
> +
> +            return summary;
> +        }
> +    } else {
> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
> +                    mc_ierror_table[i].srr1_value) {
> +                continue;
> +            }
> +
> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
> +            if (mc_ierror_table[i].nip_valid) {
> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> +            }
> +
> +            summary |= mc_ierror_table[i].initiator
> +                        | mc_ierror_table[i].severity;
> +
> +            return summary;
> +        }
> +    }
> +
> +    summary |= RTAS_LOG_INITIATOR_CPU;
> +    return summary;
> +}
> +
> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> +{
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +    CPUState *cs = CPU(cpu);
> +    uint64_t rtas_addr;
> +    CPUPPCState *env = &cpu->env;
> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> +    target_ulong r3, msr = 0;
> +    struct rtas_error_log log;
> +    struct mc_extended_log *ext_elog;
> +    uint32_t summary;
> +
> +    /*
> +     * Properly set bits in MSR before we invoke the handler.
> +     * SRR0/1, DAR and DSISR are properly set by KVM
> +     */
> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
> +        msr |= (1ULL << MSR_LE);
> +    }
> +
> +    if (env->msr & (1ULL << MSR_SF)) {
> +        msr |= (1ULL << MSR_SF);
> +    }
> +
> +    msr |= (1ULL << MSR_ME);
> +
> +    if (spapr->guest_machine_check_addr == -1) {
> +        /*
> +         * This implies that we have hit a machine check between system
> +         * reset and "ibm,nmi-register". Fall back to the old machine
> +         * check behavior in such cases.
> +         */
> +        env->spr[SPR_SRR0] = env->nip;
> +        env->spr[SPR_SRR1] = env->msr;
> +        env->msr = msr;
> +        env->nip = 0x200;
> +        return;
> +    }
> +
> +    ext_elog = g_malloc0(sizeof(*ext_elog));
> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
> +
> +    log.summary = cpu_to_be32(summary);
> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
> +
> +    /* r3 should be in BE always */
> +    r3 = cpu_to_be64(env->gpr[3]);
> +    env->msr = msr;
> +
> +    spapr_init_v6hdr(&ext_elog->v6hdr);
> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
> +    ext_elog->mc.hdr.section_length =
> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
> +    ext_elog->mc.hdr.section_version = 1;
> +
> +    /* get rtas addr from fdt */
> +    rtas_addr = spapr_get_rtas_addr();
> +    if (!rtas_addr) {
> +        /* Unable to fetch rtas_addr. Hence reset the guest */
> +        ppc_cpu_do_system_reset(cs);
> +    }
> +
> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
> +                              sizeof(r3));
> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
> +                              &log, sizeof(log));
> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
> +                              sizeof(log), ext_elog,
> +                              sizeof(*ext_elog));
> +
> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
> +    env->nip = spapr->guest_machine_check_addr;
> +
> +    g_free(ext_elog);
> +}
> +
>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>  {
>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>          }
>      }
>      spapr->mc_status = cpu->vcpu_id;
> +
> +    spapr_mce_dispatch_elog(cpu, recovered);
> +
> +    return;

Drop the last two lines.

>  }
>  
>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index fc3a776..c717ab2 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
>  
>  #define RTAS_ERROR_LOG_MAX      2048
>  
> +/* Offset from rtas-base where error log is placed */
> +#define RTAS_ERROR_LOG_OFFSET       0x30
> +
>  #define RTAS_EVENT_SCAN_RATE    1
>  
>  /* This helper should be used to encode interrupt specifiers when the related
> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>                        uint64_t pte0, uint64_t pte1);
>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
>  

Looks like a leftover.

>  /* DRC callbacks. */
>  void spapr_core_release(DeviceState *dev);
>

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by Aravinda Prasad 6 years, 5 months ago


On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> On Wed, 29 May 2019 11:10:40 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> Upon a machine check exception (MCE) in a guest address space,
>> KVM causes a guest exit to enable QEMU to build and pass the
>> error to the guest in the PAPR defined rtas error log format.
>>
>> This patch builds the rtas error log, copies it to the rtas_addr
>> and then invokes the guest registered machine check handler. The
>> handler in the guest takes suitable action(s) depending on the type
>> and criticality of the error. For example, if an error is
>> unrecoverable memory corruption in an application inside the
>> guest, then the guest kernel sends a SIGBUS to the application.
>> For recoverable errors, the guest performs recovery actions and
>> logs the error.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |    5 +
>>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    4 +
>>  3 files changed, 245 insertions(+)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 6b6c962..c97f6a6 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
>>          error_report("Could not get size of LPAR rtas '%s'", filename);
>>          exit(1);
>>      }
>> +
>> +    /* Resize blob to accommodate error log. */
>> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> 
> I don't see the point of this assertion... especially with the assignment
> below.

It is required because we want to ensure that the rtas image size is
less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
image with rtas error when we hit machine check exception. But I think a
comment in the code will help. Will add it.


> 
>> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> 
> As requested by David, this should only be done when the spapr cap is set,
> so that 4.0 machine types and older continue to use the current size.

Due to other issue of re-allocating the blob and as this is not that
much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.

Link to the discussion on this:
http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html

> 
>> +
>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>>          error_report("Could not load LPAR rtas '%s'", filename);
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index a18446b..573c0b7 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -212,6 +212,106 @@ struct hp_extended_log {
>>      struct rtas_event_log_v6_hp hp;
>>  } QEMU_PACKED;
>>  
>> +struct rtas_event_log_v6_mc {
>> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
>> +    struct rtas_event_log_v6_section_header hdr;
>> +    uint32_t fru_id;
>> +    uint32_t proc_id;
>> +    uint8_t error_type;
>> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
>> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
>> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
>> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
>> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
>> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
>> +    uint8_t sub_err_type;
>> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
>> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
>> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
>> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
>> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
>> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
>> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
>> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
>> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
>> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
>> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
>> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
>> +    uint8_t reserved_1[6];
>> +    uint64_t effective_address;
>> +    uint64_t logical_address;
>> +} QEMU_PACKED;
>> +
>> +struct mc_extended_log {
>> +    struct rtas_event_log_v6 v6hdr;
>> +    struct rtas_event_log_v6_mc mc;
>> +} QEMU_PACKED;
>> +
>> +struct MC_ierror_table {
>> +    unsigned long srr1_mask;
>> +    unsigned long srr1_value;
>> +    bool nip_valid; /* nip is a valid indicator of faulting address */
>> +    uint8_t error_type;
>> +    uint8_t error_subtype;
>> +    unsigned int initiator;
>> +    unsigned int severity;
>> +};
>> +
>> +static const struct MC_ierror_table mc_ierror_table[] = {
>> +{ 0x00000000081c0000, 0x0000000000040000, true,
>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000000081c0000, 0x0000000000080000, true,
>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000000081c0000, 0x00000000000c0000, true,
>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000000081c0000, 0x0000000000100000, true,
>> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000000081c0000, 0x0000000000140000, true,
>> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000000081c0000, 0x0000000000180000, true,
>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0, 0, 0, 0, 0, 0 } };
>> +
>> +struct MC_derror_table {
>> +    unsigned long dsisr_value;
>> +    bool dar_valid; /* dar is a valid indicator of faulting address */
>> +    uint8_t error_type;
>> +    uint8_t error_subtype;
>> +    unsigned int initiator;
>> +    unsigned int severity;
>> +};
>> +
>> +static const struct MC_derror_table mc_derror_table[] = {
>> +{ 0x00008000, false,
>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00004000, true,
>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000800, true,
>> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000400, true,
>> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000080, true,
>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0x00000100, true,
>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>> +{ 0, false, 0, 0, 0, 0 } };
>> +
>> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
>> +
>>  typedef enum EventClass {
>>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
>>      EVENT_CLASS_EPOW                = 1,
>> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>>  }
>>  
>> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>> +                                        struct mc_extended_log *ext_elog)
>> +{
>> +    int i;
>> +    CPUPPCState *env = &cpu->env;
>> +    uint32_t summary;
>> +    uint64_t dsisr = env->spr[SPR_DSISR];
>> +
>> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
>> +    if (recovered) {
>> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
>> +    } else {
>> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
>> +    }
>> +
>> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
>> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
>> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
>> +                continue;
>> +            }
>> +
>> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
>> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
>> +            if (mc_derror_table[i].dar_valid) {
>> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
>> +            }
>> +
>> +            summary |= mc_derror_table[i].initiator
>> +                        | mc_derror_table[i].severity;
>> +
>> +            return summary;
>> +        }
>> +    } else {
>> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
>> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
>> +                    mc_ierror_table[i].srr1_value) {
>> +                continue;
>> +            }
>> +
>> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
>> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
>> +            if (mc_ierror_table[i].nip_valid) {
>> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
>> +            }
>> +
>> +            summary |= mc_ierror_table[i].initiator
>> +                        | mc_ierror_table[i].severity;
>> +
>> +            return summary;
>> +        }
>> +    }
>> +
>> +    summary |= RTAS_LOG_INITIATOR_CPU;
>> +    return summary;
>> +}
>> +
>> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>> +{
>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +    CPUState *cs = CPU(cpu);
>> +    uint64_t rtas_addr;
>> +    CPUPPCState *env = &cpu->env;
>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>> +    target_ulong r3, msr = 0;
>> +    struct rtas_error_log log;
>> +    struct mc_extended_log *ext_elog;
>> +    uint32_t summary;
>> +
>> +    /*
>> +     * Properly set bits in MSR before we invoke the handler.
>> +     * SRR0/1, DAR and DSISR are properly set by KVM
>> +     */
>> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
>> +        msr |= (1ULL << MSR_LE);
>> +    }
>> +
>> +    if (env->msr & (1ULL << MSR_SF)) {
>> +        msr |= (1ULL << MSR_SF);
>> +    }
>> +
>> +    msr |= (1ULL << MSR_ME);
>> +
>> +    if (spapr->guest_machine_check_addr == -1) {
>> +        /*
>> +         * This implies that we have hit a machine check between system
>> +         * reset and "ibm,nmi-register". Fall back to the old machine
>> +         * check behavior in such cases.
>> +         */
>> +        env->spr[SPR_SRR0] = env->nip;
>> +        env->spr[SPR_SRR1] = env->msr;
>> +        env->msr = msr;
>> +        env->nip = 0x200;
>> +        return;
>> +    }
>> +
>> +    ext_elog = g_malloc0(sizeof(*ext_elog));
>> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
>> +
>> +    log.summary = cpu_to_be32(summary);
>> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
>> +
>> +    /* r3 should be in BE always */
>> +    r3 = cpu_to_be64(env->gpr[3]);
>> +    env->msr = msr;
>> +
>> +    spapr_init_v6hdr(&ext_elog->v6hdr);
>> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
>> +    ext_elog->mc.hdr.section_length =
>> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
>> +    ext_elog->mc.hdr.section_version = 1;
>> +
>> +    /* get rtas addr from fdt */
>> +    rtas_addr = spapr_get_rtas_addr();
>> +    if (!rtas_addr) {
>> +        /* Unable to fetch rtas_addr. Hence reset the guest */
>> +        ppc_cpu_do_system_reset(cs);
>> +    }
>> +
>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
>> +                              sizeof(r3));
>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
>> +                              &log, sizeof(log));
>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
>> +                              sizeof(log), ext_elog,
>> +                              sizeof(*ext_elog));
>> +
>> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
>> +    env->nip = spapr->guest_machine_check_addr;
>> +
>> +    g_free(ext_elog);
>> +}
>> +
>>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>  {
>>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>          }
>>      }
>>      spapr->mc_status = cpu->vcpu_id;
>> +
>> +    spapr_mce_dispatch_elog(cpu, recovered);
>> +
>> +    return;
> 
> Drop the last two lines.

ok.

> 
>>  }
>>  
>>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index fc3a776..c717ab2 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
>>  
>>  #define RTAS_ERROR_LOG_MAX      2048
>>  
>> +/* Offset from rtas-base where error log is placed */
>> +#define RTAS_ERROR_LOG_OFFSET       0x30
>> +
>>  #define RTAS_EVENT_SCAN_RATE    1
>>  
>>  /* This helper should be used to encode interrupt specifiers when the related
>> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
>>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>>                        uint64_t pte0, uint64_t pte1);
>>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
>> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
>>  
> 
> Looks like a leftover.

ah.. yes.

> 
>>  /* DRC callbacks. */
>>  void spapr_core_release(DeviceState *dev);
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by Greg Kurz 6 years, 5 months ago

On Tue, 4 Jun 2019 11:59:13 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> > On Wed, 29 May 2019 11:10:40 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> >   
> >> Upon a machine check exception (MCE) in a guest address space,
> >> KVM causes a guest exit to enable QEMU to build and pass the
> >> error to the guest in the PAPR defined rtas error log format.
> >>
> >> This patch builds the rtas error log, copies it to the rtas_addr
> >> and then invokes the guest registered machine check handler. The
> >> handler in the guest takes suitable action(s) depending on the type
> >> and criticality of the error. For example, if an error is
> >> unrecoverable memory corruption in an application inside the
> >> guest, then the guest kernel sends a SIGBUS to the application.
> >> For recoverable errors, the guest performs recovery actions and
> >> logs the error.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    5 +
> >>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    4 +
> >>  3 files changed, 245 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 6b6c962..c97f6a6 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
> >>          error_report("Could not get size of LPAR rtas '%s'", filename);
> >>          exit(1);
> >>      }
> >> +
> >> +    /* Resize blob to accommodate error log. */
> >> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);  
> > 
> > I don't see the point of this assertion... especially with the assignment
> > below.  
> 
> It is required because we want to ensure that the rtas image size is
> less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
> image with rtas error when we hit machine check exception. But I think a
> comment in the code will help. Will add it.
> 

I'd rather exit QEMU properly instead of aborting then. Also this is only
needed if the guest has a chance to use FWNMI, ie. the spapr cap is set.

> 
> >   
> >> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;  
> > 
> > As requested by David, this should only be done when the spapr cap is set,
> > so that 4.0 machine types and older continue to use the current size.  
> 
> Due to other issue of re-allocating the blob and as this is not that
> much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
> 
> Link to the discussion on this:
> http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html
> 

Indeed, and in the next mail in that thread, David writes:

> No, that's not right.  It's impractical to change the allocation
> depending on whether fwnmi is currently active.  But you *can* (and
> should) base the allocation on whether fwnmi is *possible* - that is,
> the value of the spapr cap.

ie, allocate RTAS_ERROR_LOG_MAX when the spapr cap is set, allocate
the file size otherwise.

> >   
> >> +
> >>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
> >>          error_report("Could not load LPAR rtas '%s'", filename);
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index a18446b..573c0b7 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> >>      struct rtas_event_log_v6_hp hp;
> >>  } QEMU_PACKED;
> >>  
> >> +struct rtas_event_log_v6_mc {
> >> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
> >> +    struct rtas_event_log_v6_section_header hdr;
> >> +    uint32_t fru_id;
> >> +    uint32_t proc_id;
> >> +    uint8_t error_type;
> >> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
> >> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
> >> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
> >> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
> >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
> >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
> >> +    uint8_t sub_err_type;
> >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
> >> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
> >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
> >> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
> >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
> >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
> >> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
> >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
> >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
> >> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
> >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
> >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> >> +    uint8_t reserved_1[6];
> >> +    uint64_t effective_address;
> >> +    uint64_t logical_address;
> >> +} QEMU_PACKED;
> >> +
> >> +struct mc_extended_log {
> >> +    struct rtas_event_log_v6 v6hdr;
> >> +    struct rtas_event_log_v6_mc mc;
> >> +} QEMU_PACKED;
> >> +
> >> +struct MC_ierror_table {
> >> +    unsigned long srr1_mask;
> >> +    unsigned long srr1_value;
> >> +    bool nip_valid; /* nip is a valid indicator of faulting address */
> >> +    uint8_t error_type;
> >> +    uint8_t error_subtype;
> >> +    unsigned int initiator;
> >> +    unsigned int severity;
> >> +};
> >> +
> >> +static const struct MC_ierror_table mc_ierror_table[] = {
> >> +{ 0x00000000081c0000, 0x0000000000040000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000080000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000100000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000140000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000180000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0, 0, 0, 0, 0, 0 } };
> >> +
> >> +struct MC_derror_table {
> >> +    unsigned long dsisr_value;
> >> +    bool dar_valid; /* dar is a valid indicator of faulting address */
> >> +    uint8_t error_type;
> >> +    uint8_t error_subtype;
> >> +    unsigned int initiator;
> >> +    unsigned int severity;
> >> +};
> >> +
> >> +static const struct MC_derror_table mc_derror_table[] = {
> >> +{ 0x00008000, false,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00004000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000800, true,
> >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000400, true,
> >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000080, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000100, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0, false, 0, 0, 0, 0 } };
> >> +
> >> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
> >> +
> >>  typedef enum EventClass {
> >>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
> >>      EVENT_CLASS_EPOW                = 1,
> >> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
> >>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> >>  }
> >>  
> >> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
> >> +                                        struct mc_extended_log *ext_elog)
> >> +{
> >> +    int i;
> >> +    CPUPPCState *env = &cpu->env;
> >> +    uint32_t summary;
> >> +    uint64_t dsisr = env->spr[SPR_DSISR];
> >> +
> >> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
> >> +    if (recovered) {
> >> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
> >> +    } else {
> >> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
> >> +    }
> >> +
> >> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
> >> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
> >> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
> >> +                continue;
> >> +            }
> >> +
> >> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
> >> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
> >> +            if (mc_derror_table[i].dar_valid) {
> >> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> >> +            }
> >> +
> >> +            summary |= mc_derror_table[i].initiator
> >> +                        | mc_derror_table[i].severity;
> >> +
> >> +            return summary;
> >> +        }
> >> +    } else {
> >> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
> >> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
> >> +                    mc_ierror_table[i].srr1_value) {
> >> +                continue;
> >> +            }
> >> +
> >> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
> >> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
> >> +            if (mc_ierror_table[i].nip_valid) {
> >> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> >> +            }
> >> +
> >> +            summary |= mc_ierror_table[i].initiator
> >> +                        | mc_ierror_table[i].severity;
> >> +
> >> +            return summary;
> >> +        }
> >> +    }
> >> +
> >> +    summary |= RTAS_LOG_INITIATOR_CPU;
> >> +    return summary;
> >> +}
> >> +
> >> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> >> +{
> >> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> +    CPUState *cs = CPU(cpu);
> >> +    uint64_t rtas_addr;
> >> +    CPUPPCState *env = &cpu->env;
> >> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> >> +    target_ulong r3, msr = 0;
> >> +    struct rtas_error_log log;
> >> +    struct mc_extended_log *ext_elog;
> >> +    uint32_t summary;
> >> +
> >> +    /*
> >> +     * Properly set bits in MSR before we invoke the handler.
> >> +     * SRR0/1, DAR and DSISR are properly set by KVM
> >> +     */
> >> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
> >> +        msr |= (1ULL << MSR_LE);
> >> +    }
> >> +
> >> +    if (env->msr & (1ULL << MSR_SF)) {
> >> +        msr |= (1ULL << MSR_SF);
> >> +    }
> >> +
> >> +    msr |= (1ULL << MSR_ME);
> >> +
> >> +    if (spapr->guest_machine_check_addr == -1) {
> >> +        /*
> >> +         * This implies that we have hit a machine check between system
> >> +         * reset and "ibm,nmi-register". Fall back to the old machine
> >> +         * check behavior in such cases.
> >> +         */
> >> +        env->spr[SPR_SRR0] = env->nip;
> >> +        env->spr[SPR_SRR1] = env->msr;
> >> +        env->msr = msr;
> >> +        env->nip = 0x200;
> >> +        return;
> >> +    }
> >> +
> >> +    ext_elog = g_malloc0(sizeof(*ext_elog));
> >> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
> >> +
> >> +    log.summary = cpu_to_be32(summary);
> >> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
> >> +
> >> +    /* r3 should be in BE always */
> >> +    r3 = cpu_to_be64(env->gpr[3]);
> >> +    env->msr = msr;
> >> +
> >> +    spapr_init_v6hdr(&ext_elog->v6hdr);
> >> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
> >> +    ext_elog->mc.hdr.section_length =
> >> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
> >> +    ext_elog->mc.hdr.section_version = 1;
> >> +
> >> +    /* get rtas addr from fdt */
> >> +    rtas_addr = spapr_get_rtas_addr();
> >> +    if (!rtas_addr) {
> >> +        /* Unable to fetch rtas_addr. Hence reset the guest */
> >> +        ppc_cpu_do_system_reset(cs);
> >> +    }
> >> +
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
> >> +                              sizeof(r3));
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
> >> +                              &log, sizeof(log));
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
> >> +                              sizeof(log), ext_elog,
> >> +                              sizeof(*ext_elog));
> >> +
> >> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
> >> +    env->nip = spapr->guest_machine_check_addr;
> >> +
> >> +    g_free(ext_elog);
> >> +}
> >> +
> >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >>  {
> >>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >>          }
> >>      }
> >>      spapr->mc_status = cpu->vcpu_id;
> >> +
> >> +    spapr_mce_dispatch_elog(cpu, recovered);
> >> +
> >> +    return;  
> > 
> > Drop the last two lines.  
> 
> ok.
> 
> >   
> >>  }
> >>  
> >>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index fc3a776..c717ab2 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
> >>  
> >>  #define RTAS_ERROR_LOG_MAX      2048
> >>  
> >> +/* Offset from rtas-base where error log is placed */
> >> +#define RTAS_ERROR_LOG_OFFSET       0x30
> >> +
> >>  #define RTAS_EVENT_SCAN_RATE    1
> >>  
> >>  /* This helper should be used to encode interrupt specifiers when the related
> >> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
> >>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
> >>                        uint64_t pte0, uint64_t pte1);
> >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
> >> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
> >>    
> > 
> > Looks like a leftover.  
> 
> ah.. yes.
> 
> >   
> >>  /* DRC callbacks. */
> >>  void spapr_core_release(DeviceState *dev);
> >>  
> >   
>

Re: [Qemu-devel] [Qemu-ppc] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by Aravinda Prasad 6 years, 5 months ago


On Tuesday 04 June 2019 02:31 PM, Greg Kurz wrote:
> On Tue, 4 Jun 2019 11:59:13 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
>>> On Wed, 29 May 2019 11:10:40 +0530
>>> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
>>>   
>>>> Upon a machine check exception (MCE) in a guest address space,
>>>> KVM causes a guest exit to enable QEMU to build and pass the
>>>> error to the guest in the PAPR defined rtas error log format.
>>>>
>>>> This patch builds the rtas error log, copies it to the rtas_addr
>>>> and then invokes the guest registered machine check handler. The
>>>> handler in the guest takes suitable action(s) depending on the type
>>>> and criticality of the error. For example, if an error is
>>>> unrecoverable memory corruption in an application inside the
>>>> guest, then the guest kernel sends a SIGBUS to the application.
>>>> For recoverable errors, the guest performs recovery actions and
>>>> logs the error.
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr.c         |    5 +
>>>>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    4 +
>>>>  3 files changed, 245 insertions(+)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index 6b6c962..c97f6a6 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
>>>>          error_report("Could not get size of LPAR rtas '%s'", filename);
>>>>          exit(1);
>>>>      }
>>>> +
>>>> +    /* Resize blob to accommodate error log. */
>>>> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);  
>>>
>>> I don't see the point of this assertion... especially with the assignment
>>> below.  
>>
>> It is required because we want to ensure that the rtas image size is
>> less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
>> image with rtas error when we hit machine check exception. But I think a
>> comment in the code will help. Will add it.
>>
> 
> I'd rather exit QEMU properly instead of aborting then. Also this is only
> needed if the guest has a chance to use FWNMI, ie. the spapr cap is set.

ok..

> 
>>
>>>   
>>>> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;  
>>>
>>> As requested by David, this should only be done when the spapr cap is set,
>>> so that 4.0 machine types and older continue to use the current size.  
>>
>> Due to other issue of re-allocating the blob and as this is not that
>> much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
>>
>> Link to the discussion on this:
>> http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html
>>
> 
> Indeed, and in the next mail in that thread, David writes:
> 
>> No, that's not right.  It's impractical to change the allocation
>> depending on whether fwnmi is currently active.  But you *can* (and
>> should) base the allocation on whether fwnmi is *possible* - that is,
>> the value of the spapr cap.
> 
> ie, allocate RTAS_ERROR_LOG_MAX when the spapr cap is set, allocate
> the file size otherwise.

Ah.. somehow this slipped off my mind...

Regards,
Aravinda

> 
>>>   
>>>> +
>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>>>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
>>>>          error_report("Could not load LPAR rtas '%s'", filename);
>>>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>>>> index a18446b..573c0b7 100644
>>>> --- a/hw/ppc/spapr_events.c
>>>> +++ b/hw/ppc/spapr_events.c
>>>> @@ -212,6 +212,106 @@ struct hp_extended_log {
>>>>      struct rtas_event_log_v6_hp hp;
>>>>  } QEMU_PACKED;
>>>>  
>>>> +struct rtas_event_log_v6_mc {
>>>> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
>>>> +    struct rtas_event_log_v6_section_header hdr;
>>>> +    uint32_t fru_id;
>>>> +    uint32_t proc_id;
>>>> +    uint8_t error_type;
>>>> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
>>>> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
>>>> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
>>>> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
>>>> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
>>>> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
>>>> +    uint8_t sub_err_type;
>>>> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
>>>> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
>>>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
>>>> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
>>>> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
>>>> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
>>>> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
>>>> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
>>>> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
>>>> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
>>>> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
>>>> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
>>>> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
>>>> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
>>>> +    uint8_t reserved_1[6];
>>>> +    uint64_t effective_address;
>>>> +    uint64_t logical_address;
>>>> +} QEMU_PACKED;
>>>> +
>>>> +struct mc_extended_log {
>>>> +    struct rtas_event_log_v6 v6hdr;
>>>> +    struct rtas_event_log_v6_mc mc;
>>>> +} QEMU_PACKED;
>>>> +
>>>> +struct MC_ierror_table {
>>>> +    unsigned long srr1_mask;
>>>> +    unsigned long srr1_value;
>>>> +    bool nip_valid; /* nip is a valid indicator of faulting address */
>>>> +    uint8_t error_type;
>>>> +    uint8_t error_subtype;
>>>> +    unsigned int initiator;
>>>> +    unsigned int severity;
>>>> +};
>>>> +
>>>> +static const struct MC_ierror_table mc_ierror_table[] = {
>>>> +{ 0x00000000081c0000, 0x0000000000040000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000000081c0000, 0x0000000000080000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000000081c0000, 0x00000000000c0000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000000081c0000, 0x0000000000100000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000000081c0000, 0x0000000000140000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000000081c0000, 0x0000000000180000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0, 0, 0, 0, 0, 0 } };
>>>> +
>>>> +struct MC_derror_table {
>>>> +    unsigned long dsisr_value;
>>>> +    bool dar_valid; /* dar is a valid indicator of faulting address */
>>>> +    uint8_t error_type;
>>>> +    uint8_t error_subtype;
>>>> +    unsigned int initiator;
>>>> +    unsigned int severity;
>>>> +};
>>>> +
>>>> +static const struct MC_derror_table mc_derror_table[] = {
>>>> +{ 0x00008000, false,
>>>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00004000, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000800, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000400, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000080, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0x00000100, true,
>>>> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
>>>> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
>>>> +{ 0, false, 0, 0, 0, 0 } };
>>>> +
>>>> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
>>>> +
>>>>  typedef enum EventClass {
>>>>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
>>>>      EVENT_CLASS_EPOW                = 1,
>>>> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>>>>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>>>>  }
>>>>  
>>>> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
>>>> +                                        struct mc_extended_log *ext_elog)
>>>> +{
>>>> +    int i;
>>>> +    CPUPPCState *env = &cpu->env;
>>>> +    uint32_t summary;
>>>> +    uint64_t dsisr = env->spr[SPR_DSISR];
>>>> +
>>>> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
>>>> +    if (recovered) {
>>>> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
>>>> +    } else {
>>>> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
>>>> +    }
>>>> +
>>>> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
>>>> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
>>>> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
>>>> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
>>>> +            if (mc_derror_table[i].dar_valid) {
>>>> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
>>>> +            }
>>>> +
>>>> +            summary |= mc_derror_table[i].initiator
>>>> +                        | mc_derror_table[i].severity;
>>>> +
>>>> +            return summary;
>>>> +        }
>>>> +    } else {
>>>> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
>>>> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
>>>> +                    mc_ierror_table[i].srr1_value) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
>>>> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
>>>> +            if (mc_ierror_table[i].nip_valid) {
>>>> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
>>>> +            }
>>>> +
>>>> +            summary |= mc_ierror_table[i].initiator
>>>> +                        | mc_ierror_table[i].severity;
>>>> +
>>>> +            return summary;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    summary |= RTAS_LOG_INITIATOR_CPU;
>>>> +    return summary;
>>>> +}
>>>> +
>>>> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>>>> +{
>>>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>>> +    CPUState *cs = CPU(cpu);
>>>> +    uint64_t rtas_addr;
>>>> +    CPUPPCState *env = &cpu->env;
>>>> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>>>> +    target_ulong r3, msr = 0;
>>>> +    struct rtas_error_log log;
>>>> +    struct mc_extended_log *ext_elog;
>>>> +    uint32_t summary;
>>>> +
>>>> +    /*
>>>> +     * Properly set bits in MSR before we invoke the handler.
>>>> +     * SRR0/1, DAR and DSISR are properly set by KVM
>>>> +     */
>>>> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
>>>> +        msr |= (1ULL << MSR_LE);
>>>> +    }
>>>> +
>>>> +    if (env->msr & (1ULL << MSR_SF)) {
>>>> +        msr |= (1ULL << MSR_SF);
>>>> +    }
>>>> +
>>>> +    msr |= (1ULL << MSR_ME);
>>>> +
>>>> +    if (spapr->guest_machine_check_addr == -1) {
>>>> +        /*
>>>> +         * This implies that we have hit a machine check between system
>>>> +         * reset and "ibm,nmi-register". Fall back to the old machine
>>>> +         * check behavior in such cases.
>>>> +         */
>>>> +        env->spr[SPR_SRR0] = env->nip;
>>>> +        env->spr[SPR_SRR1] = env->msr;
>>>> +        env->msr = msr;
>>>> +        env->nip = 0x200;
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    ext_elog = g_malloc0(sizeof(*ext_elog));
>>>> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
>>>> +
>>>> +    log.summary = cpu_to_be32(summary);
>>>> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
>>>> +
>>>> +    /* r3 should be in BE always */
>>>> +    r3 = cpu_to_be64(env->gpr[3]);
>>>> +    env->msr = msr;
>>>> +
>>>> +    spapr_init_v6hdr(&ext_elog->v6hdr);
>>>> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
>>>> +    ext_elog->mc.hdr.section_length =
>>>> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
>>>> +    ext_elog->mc.hdr.section_version = 1;
>>>> +
>>>> +    /* get rtas addr from fdt */
>>>> +    rtas_addr = spapr_get_rtas_addr();
>>>> +    if (!rtas_addr) {
>>>> +        /* Unable to fetch rtas_addr. Hence reset the guest */
>>>> +        ppc_cpu_do_system_reset(cs);
>>>> +    }
>>>> +
>>>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
>>>> +                              sizeof(r3));
>>>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
>>>> +                              &log, sizeof(log));
>>>> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
>>>> +                              sizeof(log), ext_elog,
>>>> +                              sizeof(*ext_elog));
>>>> +
>>>> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
>>>> +    env->nip = spapr->guest_machine_check_addr;
>>>> +
>>>> +    g_free(ext_elog);
>>>> +}
>>>> +
>>>>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>>>  {
>>>>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>>> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>>>          }
>>>>      }
>>>>      spapr->mc_status = cpu->vcpu_id;
>>>> +
>>>> +    spapr_mce_dispatch_elog(cpu, recovered);
>>>> +
>>>> +    return;  
>>>
>>> Drop the last two lines.  
>>
>> ok.
>>
>>>   
>>>>  }
>>>>  
>>>>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index fc3a776..c717ab2 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
>>>>  
>>>>  #define RTAS_ERROR_LOG_MAX      2048
>>>>  
>>>> +/* Offset from rtas-base where error log is placed */
>>>> +#define RTAS_ERROR_LOG_OFFSET       0x30
>>>> +
>>>>  #define RTAS_EVENT_SCAN_RATE    1
>>>>  
>>>>  /* This helper should be used to encode interrupt specifiers when the related
>>>> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
>>>>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
>>>>                        uint64_t pte0, uint64_t pte1);
>>>>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
>>>> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
>>>>    
>>>
>>> Looks like a leftover.  
>>
>> ah.. yes.
>>
>>>   
>>>>  /* DRC callbacks. */
>>>>  void spapr_core_release(DeviceState *dev);
>>>>  
>>>   
>>
> 
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by David Gibson 6 years, 5 months ago

On Tue, Jun 04, 2019 at 11:01:19AM +0200, Greg Kurz wrote:
> On Tue, 4 Jun 2019 11:59:13 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
> > On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> > > On Wed, 29 May 2019 11:10:40 +0530
> > > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> > >   
> > >> Upon a machine check exception (MCE) in a guest address space,
> > >> KVM causes a guest exit to enable QEMU to build and pass the
> > >> error to the guest in the PAPR defined rtas error log format.
> > >>
> > >> This patch builds the rtas error log, copies it to the rtas_addr
> > >> and then invokes the guest registered machine check handler. The
> > >> handler in the guest takes suitable action(s) depending on the type
> > >> and criticality of the error. For example, if an error is
> > >> unrecoverable memory corruption in an application inside the
> > >> guest, then the guest kernel sends a SIGBUS to the application.
> > >> For recoverable errors, the guest performs recovery actions and
> > >> logs the error.
> > >>
> > >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> > >> ---
> > >>  hw/ppc/spapr.c         |    5 +
> > >>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
> > >>  include/hw/ppc/spapr.h |    4 +
> > >>  3 files changed, 245 insertions(+)
> > >>
> > >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > >> index 6b6c962..c97f6a6 100644
> > >> --- a/hw/ppc/spapr.c
> > >> +++ b/hw/ppc/spapr.c
> > >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
> > >>          error_report("Could not get size of LPAR rtas '%s'", filename);
> > >>          exit(1);
> > >>      }
> > >> +
> > >> +    /* Resize blob to accommodate error log. */
> > >> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);  
> > > 
> > > I don't see the point of this assertion... especially with the assignment
> > > below.  
> > 
> > It is required because we want to ensure that the rtas image size is
> > less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
> > image with rtas error when we hit machine check exception. But I think a
> > comment in the code will help. Will add it.
> 
> I'd rather exit QEMU properly instead of aborting then. Also this is only
> needed if the guest has a chance to use FWNMI, ie. the spapr cap is
> set.

I think assert() is appropriate in this case.  If it fails it means
something is wrong in the code, not with configuration.

> 
> > 
> > >   
> > >> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;  
> > > 
> > > As requested by David, this should only be done when the spapr cap is set,
> > > so that 4.0 machine types and older continue to use the current size.  
> > 
> > Due to other issue of re-allocating the blob and as this is not that
> > much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
> > 
> > Link to the discussion on this:
> > http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html
> > 
> 
> Indeed, and in the next mail in that thread, David writes:
> 
> > No, that's not right.  It's impractical to change the allocation
> > depending on whether fwnmi is currently active.  But you *can* (and
> > should) base the allocation on whether fwnmi is *possible* - that is,
> > the value of the spapr cap.
> 
> ie, allocate RTAS_ERROR_LOG_MAX when the spapr cap is set, allocate
> the file size otherwise.
> 
> > >   
> > >> +
> > >>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> > >>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
> > >>          error_report("Could not load LPAR rtas '%s'", filename);
> > >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> > >> index a18446b..573c0b7 100644
> > >> --- a/hw/ppc/spapr_events.c
> > >> +++ b/hw/ppc/spapr_events.c
> > >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> > >>      struct rtas_event_log_v6_hp hp;
> > >>  } QEMU_PACKED;
> > >>  
> > >> +struct rtas_event_log_v6_mc {
> > >> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
> > >> +    struct rtas_event_log_v6_section_header hdr;
> > >> +    uint32_t fru_id;
> > >> +    uint32_t proc_id;
> > >> +    uint8_t error_type;
> > >> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
> > >> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
> > >> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
> > >> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
> > >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
> > >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
> > >> +    uint8_t sub_err_type;
> > >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
> > >> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
> > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
> > >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
> > >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
> > >> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
> > >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
> > >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
> > >> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
> > >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
> > >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
> > >> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
> > >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
> > >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> > >> +    uint8_t reserved_1[6];
> > >> +    uint64_t effective_address;
> > >> +    uint64_t logical_address;
> > >> +} QEMU_PACKED;
> > >> +
> > >> +struct mc_extended_log {
> > >> +    struct rtas_event_log_v6 v6hdr;
> > >> +    struct rtas_event_log_v6_mc mc;
> > >> +} QEMU_PACKED;
> > >> +
> > >> +struct MC_ierror_table {
> > >> +    unsigned long srr1_mask;
> > >> +    unsigned long srr1_value;
> > >> +    bool nip_valid; /* nip is a valid indicator of faulting address */
> > >> +    uint8_t error_type;
> > >> +    uint8_t error_subtype;
> > >> +    unsigned int initiator;
> > >> +    unsigned int severity;
> > >> +};
> > >> +
> > >> +static const struct MC_ierror_table mc_ierror_table[] = {
> > >> +{ 0x00000000081c0000, 0x0000000000040000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000000081c0000, 0x0000000000080000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000000081c0000, 0x0000000000100000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000000081c0000, 0x0000000000140000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000000081c0000, 0x0000000000180000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0, 0, 0, 0, 0, 0 } };
> > >> +
> > >> +struct MC_derror_table {
> > >> +    unsigned long dsisr_value;
> > >> +    bool dar_valid; /* dar is a valid indicator of faulting address */
> > >> +    uint8_t error_type;
> > >> +    uint8_t error_subtype;
> > >> +    unsigned int initiator;
> > >> +    unsigned int severity;
> > >> +};
> > >> +
> > >> +static const struct MC_derror_table mc_derror_table[] = {
> > >> +{ 0x00008000, false,
> > >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00004000, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000800, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000400, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000080, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0x00000100, true,
> > >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> > >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> > >> +{ 0, false, 0, 0, 0, 0 } };
> > >> +
> > >> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
> > >> +
> > >>  typedef enum EventClass {
> > >>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
> > >>      EVENT_CLASS_EPOW                = 1,
> > >> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
> > >>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> > >>  }
> > >>  
> > >> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
> > >> +                                        struct mc_extended_log *ext_elog)
> > >> +{
> > >> +    int i;
> > >> +    CPUPPCState *env = &cpu->env;
> > >> +    uint32_t summary;
> > >> +    uint64_t dsisr = env->spr[SPR_DSISR];
> > >> +
> > >> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
> > >> +    if (recovered) {
> > >> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
> > >> +    } else {
> > >> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
> > >> +    }
> > >> +
> > >> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
> > >> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
> > >> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
> > >> +                continue;
> > >> +            }
> > >> +
> > >> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
> > >> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
> > >> +            if (mc_derror_table[i].dar_valid) {
> > >> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> > >> +            }
> > >> +
> > >> +            summary |= mc_derror_table[i].initiator
> > >> +                        | mc_derror_table[i].severity;
> > >> +
> > >> +            return summary;
> > >> +        }
> > >> +    } else {
> > >> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
> > >> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
> > >> +                    mc_ierror_table[i].srr1_value) {
> > >> +                continue;
> > >> +            }
> > >> +
> > >> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
> > >> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
> > >> +            if (mc_ierror_table[i].nip_valid) {
> > >> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> > >> +            }
> > >> +
> > >> +            summary |= mc_ierror_table[i].initiator
> > >> +                        | mc_ierror_table[i].severity;
> > >> +
> > >> +            return summary;
> > >> +        }
> > >> +    }
> > >> +
> > >> +    summary |= RTAS_LOG_INITIATOR_CPU;
> > >> +    return summary;
> > >> +}
> > >> +
> > >> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> > >> +{
> > >> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >> +    CPUState *cs = CPU(cpu);
> > >> +    uint64_t rtas_addr;
> > >> +    CPUPPCState *env = &cpu->env;
> > >> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> > >> +    target_ulong r3, msr = 0;
> > >> +    struct rtas_error_log log;
> > >> +    struct mc_extended_log *ext_elog;
> > >> +    uint32_t summary;
> > >> +
> > >> +    /*
> > >> +     * Properly set bits in MSR before we invoke the handler.
> > >> +     * SRR0/1, DAR and DSISR are properly set by KVM
> > >> +     */
> > >> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
> > >> +        msr |= (1ULL << MSR_LE);
> > >> +    }
> > >> +
> > >> +    if (env->msr & (1ULL << MSR_SF)) {
> > >> +        msr |= (1ULL << MSR_SF);
> > >> +    }
> > >> +
> > >> +    msr |= (1ULL << MSR_ME);
> > >> +
> > >> +    if (spapr->guest_machine_check_addr == -1) {
> > >> +        /*
> > >> +         * This implies that we have hit a machine check between system
> > >> +         * reset and "ibm,nmi-register". Fall back to the old machine
> > >> +         * check behavior in such cases.
> > >> +         */
> > >> +        env->spr[SPR_SRR0] = env->nip;
> > >> +        env->spr[SPR_SRR1] = env->msr;
> > >> +        env->msr = msr;
> > >> +        env->nip = 0x200;
> > >> +        return;
> > >> +    }
> > >> +
> > >> +    ext_elog = g_malloc0(sizeof(*ext_elog));
> > >> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
> > >> +
> > >> +    log.summary = cpu_to_be32(summary);
> > >> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
> > >> +
> > >> +    /* r3 should be in BE always */
> > >> +    r3 = cpu_to_be64(env->gpr[3]);
> > >> +    env->msr = msr;
> > >> +
> > >> +    spapr_init_v6hdr(&ext_elog->v6hdr);
> > >> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
> > >> +    ext_elog->mc.hdr.section_length =
> > >> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
> > >> +    ext_elog->mc.hdr.section_version = 1;
> > >> +
> > >> +    /* get rtas addr from fdt */
> > >> +    rtas_addr = spapr_get_rtas_addr();
> > >> +    if (!rtas_addr) {
> > >> +        /* Unable to fetch rtas_addr. Hence reset the guest */
> > >> +        ppc_cpu_do_system_reset(cs);
> > >> +    }
> > >> +
> > >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
> > >> +                              sizeof(r3));
> > >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
> > >> +                              &log, sizeof(log));
> > >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
> > >> +                              sizeof(log), ext_elog,
> > >> +                              sizeof(*ext_elog));
> > >> +
> > >> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
> > >> +    env->nip = spapr->guest_machine_check_addr;
> > >> +
> > >> +    g_free(ext_elog);
> > >> +}
> > >> +
> > >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> > >>  {
> > >>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > >> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> > >>          }
> > >>      }
> > >>      spapr->mc_status = cpu->vcpu_id;
> > >> +
> > >> +    spapr_mce_dispatch_elog(cpu, recovered);
> > >> +
> > >> +    return;  
> > > 
> > > Drop the last two lines.  
> > 
> > ok.
> > 
> > >   
> > >>  }
> > >>  
> > >>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
> > >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > >> index fc3a776..c717ab2 100644
> > >> --- a/include/hw/ppc/spapr.h
> > >> +++ b/include/hw/ppc/spapr.h
> > >> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
> > >>  
> > >>  #define RTAS_ERROR_LOG_MAX      2048
> > >>  
> > >> +/* Offset from rtas-base where error log is placed */
> > >> +#define RTAS_ERROR_LOG_OFFSET       0x30
> > >> +
> > >>  #define RTAS_EVENT_SCAN_RATE    1
> > >>  
> > >>  /* This helper should be used to encode interrupt specifiers when the related
> > >> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
> > >>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
> > >>                        uint64_t pte0, uint64_t pte1);
> > >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
> > >> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
> > >>    
> > > 
> > > Looks like a leftover.  
> > 
> > ah.. yes.
> > 
> > >   
> > >>  /* DRC callbacks. */
> > >>  void spapr_core_release(DeviceState *dev);
> > >>  
> > >   
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE

Posted by David Gibson 6 years, 5 months ago

On Tue, Jun 04, 2019 at 11:59:13AM +0530, Aravinda Prasad wrote:
> 
> 
> On Monday 03 June 2019 07:30 PM, Greg Kurz wrote:
> > On Wed, 29 May 2019 11:10:40 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> > 
> >> Upon a machine check exception (MCE) in a guest address space,
> >> KVM causes a guest exit to enable QEMU to build and pass the
> >> error to the guest in the PAPR defined rtas error log format.
> >>
> >> This patch builds the rtas error log, copies it to the rtas_addr
> >> and then invokes the guest registered machine check handler. The
> >> handler in the guest takes suitable action(s) depending on the type
> >> and criticality of the error. For example, if an error is
> >> unrecoverable memory corruption in an application inside the
> >> guest, then the guest kernel sends a SIGBUS to the application.
> >> For recoverable errors, the guest performs recovery actions and
> >> logs the error.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    5 +
> >>  hw/ppc/spapr_events.c  |  236 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    4 +
> >>  3 files changed, 245 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 6b6c962..c97f6a6 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2910,6 +2910,11 @@ static void spapr_machine_init(MachineState *machine)
> >>          error_report("Could not get size of LPAR rtas '%s'", filename);
> >>          exit(1);
> >>      }
> >> +
> >> +    /* Resize blob to accommodate error log. */
> >> +    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> > 
> > I don't see the point of this assertion... especially with the assignment
> > below.
> 
> It is required because we want to ensure that the rtas image size is
> less than RTAS_ERROR_LOG_OFFSET, or else we will overwrite the rtas
> image with rtas error when we hit machine check exception. But I think a
> comment in the code will help. Will add it.
> 
> 
> > 
> >> +    spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> > 
> > As requested by David, this should only be done when the spapr cap is set,
> > so that 4.0 machine types and older continue to use the current size.
> 
> Due to other issue of re-allocating the blob and as this is not that
> much space, we agreed to keep the size to RTAS_ERROR_LOG_MAX always.
> 
> Link to the discussion on this:
> http://lists.nongnu.org/archive/html/qemu-ppc/2019-05/msg00275.html

Sorry, I wasn't clear in that discussion.  It is definitely *not* ok
to advertise the increased size to the guest for old machine types.
It *is* ok to waste some space inside qemu internal allocations if it
reduces conditionals.


> 
> > 
> >> +
> >>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>      if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
> >>          error_report("Could not load LPAR rtas '%s'", filename);
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index a18446b..573c0b7 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -212,6 +212,106 @@ struct hp_extended_log {
> >>      struct rtas_event_log_v6_hp hp;
> >>  } QEMU_PACKED;
> >>  
> >> +struct rtas_event_log_v6_mc {
> >> +#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
> >> +    struct rtas_event_log_v6_section_header hdr;
> >> +    uint32_t fru_id;
> >> +    uint32_t proc_id;
> >> +    uint8_t error_type;
> >> +#define RTAS_LOG_V6_MC_TYPE_UE                           0
> >> +#define RTAS_LOG_V6_MC_TYPE_SLB                          1
> >> +#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
> >> +#define RTAS_LOG_V6_MC_TYPE_TLB                          4
> >> +#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
> >> +#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
> >> +    uint8_t sub_err_type;
> >> +#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
> >> +#define RTAS_LOG_V6_MC_UE_IFETCH                         1
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
> >> +#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
> >> +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
> >> +#define RTAS_LOG_V6_MC_SLB_PARITY                        0
> >> +#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
> >> +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
> >> +#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
> >> +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
> >> +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
> >> +#define RTAS_LOG_V6_MC_TLB_PARITY                        1
> >> +#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
> >> +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
> >> +    uint8_t reserved_1[6];
> >> +    uint64_t effective_address;
> >> +    uint64_t logical_address;
> >> +} QEMU_PACKED;
> >> +
> >> +struct mc_extended_log {
> >> +    struct rtas_event_log_v6 v6hdr;
> >> +    struct rtas_event_log_v6_mc mc;
> >> +} QEMU_PACKED;
> >> +
> >> +struct MC_ierror_table {
> >> +    unsigned long srr1_mask;
> >> +    unsigned long srr1_value;
> >> +    bool nip_valid; /* nip is a valid indicator of faulting address */
> >> +    uint8_t error_type;
> >> +    uint8_t error_subtype;
> >> +    unsigned int initiator;
> >> +    unsigned int severity;
> >> +};
> >> +
> >> +static const struct MC_ierror_table mc_ierror_table[] = {
> >> +{ 0x00000000081c0000, 0x0000000000040000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000080000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x00000000000c0000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000100000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000140000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000000081c0000, 0x0000000000180000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0, 0, 0, 0, 0, 0 } };
> >> +
> >> +struct MC_derror_table {
> >> +    unsigned long dsisr_value;
> >> +    bool dar_valid; /* dar is a valid indicator of faulting address */
> >> +    uint8_t error_type;
> >> +    uint8_t error_subtype;
> >> +    unsigned int initiator;
> >> +    unsigned int severity;
> >> +};
> >> +
> >> +static const struct MC_derror_table mc_derror_table[] = {
> >> +{ 0x00008000, false,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00004000, true,
> >> +  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000800, true,
> >> +  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000400, true,
> >> +  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000080, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0x00000100, true,
> >> +  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
> >> +  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
> >> +{ 0, false, 0, 0, 0, 0 } };
> >> +
> >> +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
> >> +
> >>  typedef enum EventClass {
> >>      EVENT_CLASS_INTERNAL_ERRORS     = 0,
> >>      EVENT_CLASS_EPOW                = 1,
> >> @@ -620,6 +720,138 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
> >>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> >>  }
> >>  
> >> +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
> >> +                                        struct mc_extended_log *ext_elog)
> >> +{
> >> +    int i;
> >> +    CPUPPCState *env = &cpu->env;
> >> +    uint32_t summary;
> >> +    uint64_t dsisr = env->spr[SPR_DSISR];
> >> +
> >> +    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
> >> +    if (recovered) {
> >> +        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
> >> +    } else {
> >> +        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
> >> +    }
> >> +
> >> +    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
> >> +        for (i = 0; mc_derror_table[i].dsisr_value; i++) {
> >> +            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
> >> +                continue;
> >> +            }
> >> +
> >> +            ext_elog->mc.error_type = mc_derror_table[i].error_type;
> >> +            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
> >> +            if (mc_derror_table[i].dar_valid) {
> >> +                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
> >> +            }
> >> +
> >> +            summary |= mc_derror_table[i].initiator
> >> +                        | mc_derror_table[i].severity;
> >> +
> >> +            return summary;
> >> +        }
> >> +    } else {
> >> +        for (i = 0; mc_ierror_table[i].srr1_mask; i++) {
> >> +            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
> >> +                    mc_ierror_table[i].srr1_value) {
> >> +                continue;
> >> +            }
> >> +
> >> +            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
> >> +            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
> >> +            if (mc_ierror_table[i].nip_valid) {
> >> +                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
> >> +            }
> >> +
> >> +            summary |= mc_ierror_table[i].initiator
> >> +                        | mc_ierror_table[i].severity;
> >> +
> >> +            return summary;
> >> +        }
> >> +    }
> >> +
> >> +    summary |= RTAS_LOG_INITIATOR_CPU;
> >> +    return summary;
> >> +}
> >> +
> >> +static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> >> +{
> >> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> +    CPUState *cs = CPU(cpu);
> >> +    uint64_t rtas_addr;
> >> +    CPUPPCState *env = &cpu->env;
> >> +    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> >> +    target_ulong r3, msr = 0;
> >> +    struct rtas_error_log log;
> >> +    struct mc_extended_log *ext_elog;
> >> +    uint32_t summary;
> >> +
> >> +    /*
> >> +     * Properly set bits in MSR before we invoke the handler.
> >> +     * SRR0/1, DAR and DSISR are properly set by KVM
> >> +     */
> >> +    if (!(*pcc->interrupts_big_endian)(cpu)) {
> >> +        msr |= (1ULL << MSR_LE);
> >> +    }
> >> +
> >> +    if (env->msr & (1ULL << MSR_SF)) {
> >> +        msr |= (1ULL << MSR_SF);
> >> +    }
> >> +
> >> +    msr |= (1ULL << MSR_ME);
> >> +
> >> +    if (spapr->guest_machine_check_addr == -1) {
> >> +        /*
> >> +         * This implies that we have hit a machine check between system
> >> +         * reset and "ibm,nmi-register". Fall back to the old machine
> >> +         * check behavior in such cases.
> >> +         */
> >> +        env->spr[SPR_SRR0] = env->nip;
> >> +        env->spr[SPR_SRR1] = env->msr;
> >> +        env->msr = msr;
> >> +        env->nip = 0x200;
> >> +        return;
> >> +    }
> >> +
> >> +    ext_elog = g_malloc0(sizeof(*ext_elog));
> >> +    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
> >> +
> >> +    log.summary = cpu_to_be32(summary);
> >> +    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
> >> +
> >> +    /* r3 should be in BE always */
> >> +    r3 = cpu_to_be64(env->gpr[3]);
> >> +    env->msr = msr;
> >> +
> >> +    spapr_init_v6hdr(&ext_elog->v6hdr);
> >> +    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
> >> +    ext_elog->mc.hdr.section_length =
> >> +                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
> >> +    ext_elog->mc.hdr.section_version = 1;
> >> +
> >> +    /* get rtas addr from fdt */
> >> +    rtas_addr = spapr_get_rtas_addr();
> >> +    if (!rtas_addr) {
> >> +        /* Unable to fetch rtas_addr. Hence reset the guest */
> >> +        ppc_cpu_do_system_reset(cs);
> >> +    }
> >> +
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET, &r3,
> >> +                              sizeof(r3));
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3),
> >> +                              &log, sizeof(log));
> >> +    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + sizeof(r3) +
> >> +                              sizeof(log), ext_elog,
> >> +                              sizeof(*ext_elog));
> >> +
> >> +    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
> >> +    env->nip = spapr->guest_machine_check_addr;
> >> +
> >> +    g_free(ext_elog);
> >> +}
> >> +
> >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >>  {
> >>      SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> @@ -641,6 +873,10 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >>          }
> >>      }
> >>      spapr->mc_status = cpu->vcpu_id;
> >> +
> >> +    spapr_mce_dispatch_elog(cpu, recovered);
> >> +
> >> +    return;
> > 
> > Drop the last two lines.
> 
> ok.
> 
> > 
> >>  }
> >>  
> >>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index fc3a776..c717ab2 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -710,6 +710,9 @@ void spapr_load_rtas(SpaprMachineState *spapr, void *fdt, hwaddr addr);
> >>  
> >>  #define RTAS_ERROR_LOG_MAX      2048
> >>  
> >> +/* Offset from rtas-base where error log is placed */
> >> +#define RTAS_ERROR_LOG_OFFSET       0x30
> >> +
> >>  #define RTAS_EVENT_SCAN_RATE    1
> >>  
> >>  /* This helper should be used to encode interrupt specifiers when the related
> >> @@ -799,6 +802,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
> >>  void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
> >>                        uint64_t pte0, uint64_t pte1);
> >>  void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
> >> +ssize_t spapr_get_rtas_size(ssize_t old_rtas_sizea);
> >>  
> > 
> > Looks like a leftover.
> 
> ah.. yes.
> 
> > 
> >>  /* DRC callbacks. */
> >>  void spapr_core_release(DeviceState *dev);
> >>
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[Qemu-devel] [PATCH v9 1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
[Qemu-devel] [PATCH v9 2/6] Wrapper function to wait on condition for the main loop mutex
[Qemu-devel] [PATCH v9 3/6] target/ppc: Handle NMI guest exit
[Qemu-devel] [PATCH v9 4/6] target/ppc: Build rtas error log upon an MCE
[Qemu-devel] [PATCH v9 5/6] ppc: spapr: Enable FWNMI capability
[Qemu-devel] [PATCH v9 6/6] migration: Include migration support for machine check handling