[PATCH v2 19/19] ppc/pnv: Add an LPAR per core machine option

Nicholas Piggin posted 19 patches 4 months, 2 weeks ago
Maintainers: "Cédric Le Goater" <clg@kaod.org>, Nicholas Piggin <npiggin@gmail.com>, "Frédéric Barrat" <fbarrat@linux.ibm.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, David Gibson <david@gibson.dropbear.id.au>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>
There is a newer version of this series
[PATCH v2 19/19] ppc/pnv: Add an LPAR per core machine option
Posted by Nicholas Piggin 4 months, 2 weeks ago
Recent POWER CPUs can operate in "LPAR per core" or "LPAR per thread"
modes. In per-core mode, some SPRs and IPI doorbells are shared between
threads in a core. In per-thread mode, supervisor and user state is
not shared between threads.

OpenPOWER systems after POWER8 use LPAR per thread mode, and it is
required for KVM. Enterprise systems use LPAR per core mode, as they
partition the machine by core.

Implement a lpar-per-core machine option for powernv machines. This
is fixed true for POWER8 machines, and defaults off for P9 and P10.

With this change, powernv8 SMT now works sufficiently to run Linux,
with a single socket. Multi-threaded KVM guests still have problems,
as does multi-socket Linux boot.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/hw/ppc/pnv.h      |  2 ++
 include/hw/ppc/pnv_chip.h |  1 +
 include/hw/ppc/pnv_core.h |  1 +
 hw/ppc/pnv.c              | 35 +++++++++++++++++++++++++++++++++++
 hw/ppc/pnv_core.c         |  7 +++++++
 target/ppc/cpu_init.c     |  3 ++-
 6 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
index b7858d310d..fcb6699150 100644
--- a/include/hw/ppc/pnv.h
+++ b/include/hw/ppc/pnv.h
@@ -77,6 +77,7 @@ struct PnvMachineClass {
     const char *compat;
     int compat_size;
     int max_smt_threads;
+    bool has_lpar_per_thread;
     bool quirk_tb_big_core;
 
     void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt);
@@ -104,6 +105,7 @@ struct PnvMachineState {
     hwaddr       fw_load_addr;
 
     bool         big_core;
+    bool         lpar_per_core;
 };
 
 PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id);
diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
index 69d8273efe..ee1649babc 100644
--- a/include/hw/ppc/pnv_chip.h
+++ b/include/hw/ppc/pnv_chip.h
@@ -28,6 +28,7 @@ struct PnvChip {
     uint64_t     ram_size;
 
     bool         big_core;
+    bool         lpar_per_core;
     uint32_t     nr_cores;
     uint32_t     nr_threads;
     uint64_t     cores_mask;
diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
index 1de79a818e..d8afb4f95f 100644
--- a/include/hw/ppc/pnv_core.h
+++ b/include/hw/ppc/pnv_core.h
@@ -57,6 +57,7 @@ struct PnvCore {
     /*< public >*/
     PowerPCCPU **threads;
     bool big_core;
+    bool lpar_per_core;
     uint32_t pir;
     uint32_t hwid;
     uint64_t hrmor;
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index cd96cde6c9..be1a48d2cb 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -1026,6 +1026,11 @@ static void pnv_init(MachineState *machine)
         exit(1);
     }
 
+    /* Set lpar-per-core mode if lpar-per-thread is not supported */
+    if (!pmc->has_lpar_per_thread) {
+        pnv->lpar_per_core = true;
+    }
+
     pnv->num_chips =
         machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads);
 
@@ -2308,6 +2313,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
     }
 
     chip->big_core = pnv->big_core;
+    chip->lpar_per_core = pnv->lpar_per_core;
 
     chip->cores = g_new0(PnvCore *, chip->nr_cores);
 
@@ -2339,6 +2345,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
 
         pnv_core->tod_state.big_core_quirk = pmc->quirk_tb_big_core;
         pnv_core->big_core = chip->big_core;
+        pnv_core->lpar_per_core = chip->lpar_per_core;
 
         qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
 
@@ -2591,6 +2598,18 @@ static void pnv_machine_set_big_core(Object *obj, bool value, Error **errp)
     pnv->big_core = value;
 }
 
+static bool pnv_machine_get_lpar_per_core(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    return pnv->lpar_per_core;
+}
+
+static void pnv_machine_set_lpar_per_core(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    pnv->lpar_per_core = value;
+}
+
 static bool pnv_machine_get_hb(Object *obj, Error **errp)
 {
     PnvMachineState *pnv = PNV_MACHINE(obj);
@@ -2630,6 +2649,8 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
     pmc->max_smt_threads = 8;
+    /* POWER8 is always lpar-per-core mode */
+    pmc->has_lpar_per_thread = false;
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
 }
@@ -2655,6 +2676,7 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
     pmc->max_smt_threads = 4;
+    pmc->has_lpar_per_thread = true;
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
@@ -2664,6 +2686,12 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
                                    pnv_machine_set_big_core);
     object_class_property_set_description(oc, "big-core",
                               "Use big-core (aka fused-core) mode");
+
+    object_class_property_add_bool(oc, "lpar-per-core",
+                                   pnv_machine_get_lpar_per_core,
+                                   pnv_machine_set_lpar_per_core);
+    object_class_property_set_description(oc, "lpar-per-core",
+                              "Use 1 LPAR per core mode");
 }
 
 static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
@@ -2686,6 +2714,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
     pmc->compat = compat;
     pmc->compat_size = sizeof(compat);
     pmc->max_smt_threads = 4;
+    pmc->has_lpar_per_thread = true;
     pmc->quirk_tb_big_core = true;
     pmc->dt_power_mgt = pnv_dt_power_mgt;
 
@@ -2711,6 +2740,12 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
                                    pnv_machine_set_big_core);
     object_class_property_set_description(oc, "big-core",
                               "Use big-core (aka fused-core) mode");
+
+    object_class_property_add_bool(oc, "lpar-per-core",
+                                   pnv_machine_get_lpar_per_core,
+                                   pnv_machine_set_lpar_per_core);
+    object_class_property_set_description(oc, "lpar-per-core",
+                              "Use 1 LPAR per core mode");
 }
 
 static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data)
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
index b32bdb79ff..d59da16ce4 100644
--- a/hw/ppc/pnv_core.c
+++ b/hw/ppc/pnv_core.c
@@ -208,6 +208,9 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
                 val |= PPC_BIT(56 + i);
             }
         }
+        if (pc->lpar_per_core) {
+            val |= PPC_BIT(62);
+        }
         break;
     case PNV10_XSCOM_EC_CORE_THREAD_INFO:
         break;
@@ -321,6 +324,10 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp,
         env->core_index = core_hwid;
     }
 
+    if (pc->lpar_per_core) {
+        cpu_ppc_set_1lpar(cpu);
+    }
+
     /* Set time-base frequency to 512 MHz */
     cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
 }
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 81dd4e1a7a..4ba7f54510 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6803,7 +6803,8 @@ void cpu_ppc_set_1lpar(PowerPCCPU *cpu)
 
     /*
      * pseries SMT means "LPAR per core" mode, e.g., msgsndp is usable
-     * between threads.
+     * between threads. powernv be in either mode, and it mostly affects
+     * supervisor visible registers and instructions.
      */
     if (env->flags & POWERPC_FLAG_SMT) {
         env->flags |= POWERPC_FLAG_SMT_1LPAR;
-- 
2.45.1
Re: [PATCH v2 19/19] ppc/pnv: Add an LPAR per core machine option
Posted by Cédric Le Goater 4 months, 2 weeks ago
On 7/12/24 14:02, Nicholas Piggin wrote:
> Recent POWER CPUs can operate in "LPAR per core" or "LPAR per thread"
> modes. In per-core mode, some SPRs and IPI doorbells are shared between
> threads in a core. In per-thread mode, supervisor and user state is
> not shared between threads.
> 
> OpenPOWER systems after POWER8 use LPAR per thread mode, and it is
> required for KVM. Enterprise systems use LPAR per core mode, as they
> partition the machine by core.
> 
> Implement a lpar-per-core machine option for powernv machines. This
> is fixed true for POWER8 machines, and defaults off for P9 and P10.
> 
> With this change, powernv8 SMT now works sufficiently to run Linux,
> with a single socket. Multi-threaded KVM guests still have problems,
> as does multi-socket Linux boot.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Looks good. Some more proposal again,

> ---
>   include/hw/ppc/pnv.h      |  2 ++
>   include/hw/ppc/pnv_chip.h |  1 +
>   include/hw/ppc/pnv_core.h |  1 +
>   hw/ppc/pnv.c              | 35 +++++++++++++++++++++++++++++++++++
>   hw/ppc/pnv_core.c         |  7 +++++++
>   target/ppc/cpu_init.c     |  3 ++-
>   6 files changed, 48 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h
> index b7858d310d..fcb6699150 100644
> --- a/include/hw/ppc/pnv.h
> +++ b/include/hw/ppc/pnv.h
> @@ -77,6 +77,7 @@ struct PnvMachineClass {
>       const char *compat;
>       int compat_size;
>       int max_smt_threads;
> +    bool has_lpar_per_thread;
>       bool quirk_tb_big_core;
>   
>       void (*dt_power_mgt)(PnvMachineState *pnv, void *fdt);
> @@ -104,6 +105,7 @@ struct PnvMachineState {
>       hwaddr       fw_load_addr;
>   
>       bool         big_core;
> +    bool         lpar_per_core;
>   };
>   
>   PnvChip *pnv_get_chip(PnvMachineState *pnv, uint32_t chip_id);
> diff --git a/include/hw/ppc/pnv_chip.h b/include/hw/ppc/pnv_chip.h
> index 69d8273efe..ee1649babc 100644
> --- a/include/hw/ppc/pnv_chip.h
> +++ b/include/hw/ppc/pnv_chip.h
> @@ -28,6 +28,7 @@ struct PnvChip {
>       uint64_t     ram_size;
>   
>       bool         big_core;
> +    bool         lpar_per_core;
>       uint32_t     nr_cores;
>       uint32_t     nr_threads;
>       uint64_t     cores_mask;
> diff --git a/include/hw/ppc/pnv_core.h b/include/hw/ppc/pnv_core.h
> index 1de79a818e..d8afb4f95f 100644
> --- a/include/hw/ppc/pnv_core.h
> +++ b/include/hw/ppc/pnv_core.h
> @@ -57,6 +57,7 @@ struct PnvCore {
>       /*< public >*/
>       PowerPCCPU **threads;
>       bool big_core;
> +    bool lpar_per_core;
>       uint32_t pir;
>       uint32_t hwid;
>       uint64_t hrmor;
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index cd96cde6c9..be1a48d2cb 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -1026,6 +1026,11 @@ static void pnv_init(MachineState *machine)
>           exit(1);
>       }
>   
> +    /* Set lpar-per-core mode if lpar-per-thread is not supported */
> +    if (!pmc->has_lpar_per_thread) {
> +        pnv->lpar_per_core = true;
> +    }
> +
>       pnv->num_chips =
>           machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads);

Please add :

@@ -1108,6 +1113,8 @@ static void pnv_init(MachineState *machi
                                  &error_fatal);
          object_property_set_bool(chip, "big-core", pnv->big_core,
                                   &error_fatal);
+        object_property_set_bool(chip, "lpar-per-core", pnv->lpar_per_core,
+                                 &error_fatal);
  
          /*
           * The POWER8 machine use the XICS interrupt interface.



>   
> @@ -2308,6 +2313,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
>       }
>   
>       chip->big_core = pnv->big_core;
> +    chip->lpar_per_core = pnv->lpar_per_core;

and drop this change



>   
>       chip->cores = g_new0(PnvCore *, chip->nr_cores);
>   
> @@ -2339,6 +2345,7 @@ static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
>   
>           pnv_core->tod_state.big_core_quirk = pmc->quirk_tb_big_core;
>           pnv_core->big_core = chip->big_core;
> +        pnv_core->lpar_per_core = chip->lpar_per_core;

please drop and add

@@ -2366,6 +2373,8 @@ static void pnv_chip_core_realize(PnvChi
                                   &error_abort);
          object_property_set_bool(OBJECT(pnv_core), "big-core-quirk",
                                   pmc->quirk_tb_big_core, &error_abort);
+        object_property_set_bool(OBJECT(pnv_core), "lpar-per-core",
+                                 chip->lpar_per_core, &error_abort);
  
          qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
  
@@ -2399,6 +2408,7 @@ static Property pnv_chip_properties[] =
      DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
      DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
      DEFINE_PROP_BOOL("big-core", PnvChip, big_core, false),
+    DEFINE_PROP_BOOL("lpar-per-core", PnvCore, lpar_per_core, false),
      DEFINE_PROP_END_OF_LIST(),
  };

>   
>           qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
>   
> @@ -2591,6 +2598,18 @@ static void pnv_machine_set_big_core(Object *obj, bool value, Error **errp)
>       pnv->big_core = value;
>   }
>   
> +static bool pnv_machine_get_lpar_per_core(Object *obj, Error **errp)
> +{
> +    PnvMachineState *pnv = PNV_MACHINE(obj);
> +    return pnv->lpar_per_core;
> +}
> +
> +static void pnv_machine_set_lpar_per_core(Object *obj, bool value, Error **errp)
> +{
> +    PnvMachineState *pnv = PNV_MACHINE(obj);
> +    pnv->lpar_per_core = value;
> +}
> +
>   static bool pnv_machine_get_hb(Object *obj, Error **errp)
>   {
>       PnvMachineState *pnv = PNV_MACHINE(obj);
> @@ -2630,6 +2649,8 @@ static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
>       pmc->compat = compat;
>       pmc->compat_size = sizeof(compat);
>       pmc->max_smt_threads = 8;
> +    /* POWER8 is always lpar-per-core mode */
> +    pmc->has_lpar_per_thread = false;
>   
>       machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
>   }
> @@ -2655,6 +2676,7 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
>       pmc->compat = compat;
>       pmc->compat_size = sizeof(compat);
>       pmc->max_smt_threads = 4;
> +    pmc->has_lpar_per_thread = true;
>       pmc->dt_power_mgt = pnv_dt_power_mgt;
>   
>       machine_class_allow_dynamic_sysbus_dev(mc, TYPE_PNV_PHB);
> @@ -2664,6 +2686,12 @@ static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
>                                      pnv_machine_set_big_core);
>       object_class_property_set_description(oc, "big-core",
>                                 "Use big-core (aka fused-core) mode");
> +
> +    object_class_property_add_bool(oc, "lpar-per-core",
> +                                   pnv_machine_get_lpar_per_core,
> +                                   pnv_machine_set_lpar_per_core);
> +    object_class_property_set_description(oc, "lpar-per-core",
> +                              "Use 1 LPAR per core mode");
>   }
>   
>   static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
> @@ -2686,6 +2714,7 @@ static void pnv_machine_p10_common_class_init(ObjectClass *oc, void *data)
>       pmc->compat = compat;
>       pmc->compat_size = sizeof(compat);
>       pmc->max_smt_threads = 4;
> +    pmc->has_lpar_per_thread = true;
>       pmc->quirk_tb_big_core = true;
>       pmc->dt_power_mgt = pnv_dt_power_mgt;
>   
> @@ -2711,6 +2740,12 @@ static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
>                                      pnv_machine_set_big_core);
>       object_class_property_set_description(oc, "big-core",
>                                 "Use big-core (aka fused-core) mode");
> +
> +    object_class_property_add_bool(oc, "lpar-per-core",
> +                                   pnv_machine_get_lpar_per_core,
> +                                   pnv_machine_set_lpar_per_core);
> +    object_class_property_set_description(oc, "lpar-per-core",
> +                              "Use 1 LPAR per core mode");
>   }
>   
>   static void pnv_machine_p10_rainier_class_init(ObjectClass *oc, void *data)
> diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
> index b32bdb79ff..d59da16ce4 100644
> --- a/hw/ppc/pnv_core.c
> +++ b/hw/ppc/pnv_core.c
> @@ -208,6 +208,9 @@ static uint64_t pnv_core_power10_xscom_read(void *opaque, hwaddr addr,
>                   val |= PPC_BIT(56 + i);
>               }
>           }
> +        if (pc->lpar_per_core) {
> +            val |= PPC_BIT(62);
> +        }
>           break;
>       case PNV10_XSCOM_EC_CORE_THREAD_INFO:
>           break;
> @@ -321,6 +324,10 @@ static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp,
>           env->core_index = core_hwid;
>       }
>   
> +    if (pc->lpar_per_core) {
> +        cpu_ppc_set_1lpar(cpu);
> +    }
> +
>       /* Set time-base frequency to 512 MHz */
>       cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
>   }


you will need  this extra :

@@ -427,6 +434,7 @@ static Property pnv_core_properties[] =
      DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
      DEFINE_PROP_BOOL("big-core", PnvCore, big_core, false),
      DEFINE_PROP_BOOL("big-core-quirk", PnvCore, tod_state.big_core_quirk, false),
+    DEFINE_PROP_BOOL("lpar-per-core", PnvCore, lpar_per_core, false),
      DEFINE_PROP_END_OF_LIST(),
  };
  

Thanks,

C.



> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 81dd4e1a7a..4ba7f54510 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -6803,7 +6803,8 @@ void cpu_ppc_set_1lpar(PowerPCCPU *cpu)
>   
>       /*
>        * pseries SMT means "LPAR per core" mode, e.g., msgsndp is usable
> -     * between threads.
> +     * between threads. powernv be in either mode, and it mostly affects
> +     * supervisor visible registers and instructions.
>        */
>       if (env->flags & POWERPC_FLAG_SMT) {
>           env->flags |= POWERPC_FLAG_SMT_1LPAR;