[PATCH v5 15/18] whpx: arm64: implement -cpu host

Mohamed Mediouni posted 18 patches 4 months, 1 week ago
Maintainers: Cameron Esfahani <dirty@apple.com>, Roman Bolshakov <rbolshakov@ddn.com>, Phil Dennis-Jordan <phil@philjordan.eu>, Mads Ynddal <mads@ynddal.dk>, Sunil Muthuswamy <sunilmut@microsoft.com>, Mohamed Mediouni <mohamed@unpredictable.fr>, Peter Maydell <peter.maydell@linaro.org>, Shannon Zhao <shannon.zhaosl@gmail.com>, "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Alexander Graf <agraf@csgraf.de>
There is a newer version of this series
[PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Mohamed Mediouni 4 months, 1 week ago
OpenProcessorKey and ReadRegU64 adapted from:
https://github.com/FEX-Emu/FEX/blob/e6de17e72ef03aa88ba14fa0ec13163061608c74/Source/Windows/Common/CPUFeatures.cpp#L62

Signed-off-by: Mohamed Mediouni <mohamed@unpredictable.fr>

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 hw/arm/virt.c              |   2 +-
 target/arm/cpu64.c         |  19 +++++--
 target/arm/whpx/whpx-all.c | 110 +++++++++++++++++++++++++++++++++++++
 target/arm/whpx_arm.h      |   1 +
 4 files changed, 125 insertions(+), 7 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index c980f59e82..a23edeb2d2 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3217,7 +3217,7 @@ static void virt_machine_class_init(ObjectClass *oc, const void *data)
 #ifdef TARGET_AARCH64
         ARM_CPU_TYPE_NAME("cortex-a53"),
         ARM_CPU_TYPE_NAME("cortex-a57"),
-#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF) || defined(CONFIG_WHPX)
         ARM_CPU_TYPE_NAME("host"),
 #endif /* CONFIG_KVM || CONFIG_HVF */
 #endif /* TARGET_AARCH64 */
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index 26cf7e6dfa..3f00071081 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -26,10 +26,13 @@
 #include "qemu/units.h"
 #include "system/kvm.h"
 #include "system/hvf.h"
+#include "system/whpx.h"
+#include "system/hw_accel.h"
 #include "system/qtest.h"
 #include "system/tcg.h"
 #include "kvm_arm.h"
 #include "hvf_arm.h"
+#include "whpx_arm.h"
 #include "qapi/visitor.h"
 #include "hw/qdev-properties.h"
 #include "internals.h"
@@ -522,7 +525,7 @@ void arm_cpu_pauth_finalize(ARMCPU *cpu, Error **errp)
     isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, APA3, 0);
     isar2 = FIELD_DP64(isar2, ID_AA64ISAR2, GPA3, 0);
 
-    if (kvm_enabled() || hvf_enabled()) {
+    if (hwaccel_enabled()) {
         /*
          * Exit early if PAuth is enabled and fall through to disable it.
          * The algorithm selection properties are not present.
@@ -599,10 +602,10 @@ void aarch64_add_pauth_properties(Object *obj)
 
     /* Default to PAUTH on, with the architected algorithm on TCG. */
     qdev_property_add_static(DEVICE(obj), &arm_cpu_pauth_property);
-    if (kvm_enabled() || hvf_enabled()) {
+    if (hwaccel_enabled()) {
         /*
          * Mirror PAuth support from the probed sysregs back into the
-         * property for KVM or hvf. Is it just a bit backward? Yes it is!
+         * property for HW accel. Is it just a bit backward? Yes it is!
          * Note that prop_pauth is true whether the host CPU supports the
          * architected QARMA5 algorithm or the IMPDEF one. We don't
          * provide the separate pauth-impdef property for KVM or hvf,
@@ -773,6 +776,10 @@ static void aarch64_host_initfn(Object *obj)
     ARMCPU *cpu = ARM_CPU(obj);
     hvf_arm_set_cpu_features_from_host(cpu);
     aarch64_add_pauth_properties(obj);
+#elif defined(CONFIG_WHPX)
+    ARMCPU *cpu = ARM_CPU(obj);
+    whpx_arm_set_cpu_features_from_host(cpu);
+    aarch64_add_pauth_properties(obj);
 #else
     g_assert_not_reached();
 #endif
@@ -780,8 +787,8 @@ static void aarch64_host_initfn(Object *obj)
 
 static void aarch64_max_initfn(Object *obj)
 {
-    if (kvm_enabled() || hvf_enabled()) {
-        /* With KVM or HVF, '-cpu max' is identical to '-cpu host' */
+    if (hwaccel_enabled()) {
+        /* When hardware acceleration enabled, '-cpu max' is identical to '-cpu host' */
         aarch64_host_initfn(obj);
         return;
     }
@@ -800,7 +807,7 @@ static const ARMCPUInfo aarch64_cpus[] = {
     { .name = "cortex-a57",         .initfn = aarch64_a57_initfn },
     { .name = "cortex-a53",         .initfn = aarch64_a53_initfn },
     { .name = "max",                .initfn = aarch64_max_initfn },
-#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF) || defined(CONFIG_WHPX)
     { .name = "host",               .initfn = aarch64_host_initfn },
 #endif
 };
diff --git a/target/arm/whpx/whpx-all.c b/target/arm/whpx/whpx-all.c
index 3dcccfe7fd..b33473dcd3 100644
--- a/target/arm/whpx/whpx-all.c
+++ b/target/arm/whpx/whpx-all.c
@@ -41,6 +41,17 @@
 
 #include <winhvplatform.h>
 #include <winhvplatformdefs.h>
+#include <winreg.h>
+
+typedef struct ARMHostCPUFeatures {
+    ARMISARegisters isar;
+    uint64_t features;
+    uint64_t midr;
+    uint32_t reset_sctlr;
+    const char *dtb_compatible;
+} ARMHostCPUFeatures;
+
+static ARMHostCPUFeatures arm_host_cpu_features;
 
 struct whpx_reg_match {
     WHV_REGISTER_NAME reg;
@@ -693,6 +704,105 @@ static void clamp_id_aa64mmfr0_parange_to_ipa_size(ARMISARegisters *isar)
     SET_IDREG(isar, ID_AA64MMFR0, id_aa64mmfr0);
 }
 
+static HKEY OpenProcessorKey(void)
+{
+    HKEY Out;
+    const char *path = "Hardware\\Description\\System\\CentralProcessor\\0\\";
+    assert(!RegOpenKeyExA(HKEY_LOCAL_MACHINE, path, 0, KEY_READ, &Out));
+    return Out;
+}
+
+static uint64_t ReadRegU64(HKEY Key, const char *name)
+{
+    uint64_t Value = 0;
+    DWORD Size = sizeof(Value);
+    assert(!RegGetValueA(Key, NULL, name, RRF_RT_REG_QWORD, NULL, &Value, &Size));
+    return Value;
+}
+
+static bool whpx_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf)
+{
+    const struct isar_regs {
+        WHV_REGISTER_NAME reg;
+        uint64_t *val;
+    } regs[] = {
+        { WHvArm64RegisterIdAa64Pfr0El1, &ahcf->isar.idregs[ID_AA64PFR0_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Pfr1El1, &ahcf->isar.idregs[ID_AA64PFR1_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Dfr0El1, &ahcf->isar.idregs[ID_AA64DFR0_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Dfr1El1 , &ahcf->isar.idregs[ID_AA64DFR1_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Isar0El1, &ahcf->isar.idregs[ID_AA64ISAR0_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Isar1El1, &ahcf->isar.idregs[ID_AA64ISAR1_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Isar2El1, &ahcf->isar.idregs[ID_AA64ISAR2_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Mmfr0El1, &ahcf->isar.idregs[ID_AA64MMFR0_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Mmfr1El1, &ahcf->isar.idregs[ID_AA64MMFR1_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Mmfr2El1, &ahcf->isar.idregs[ID_AA64MMFR2_EL1_IDX] },
+        { WHvArm64RegisterIdAa64Mmfr3El1, &ahcf->isar.idregs[ID_AA64MMFR2_EL1_IDX] }
+    };
+
+    int i;
+    WHV_REGISTER_VALUE val;
+
+    ahcf->dtb_compatible = "arm,armv8";
+    ahcf->features = (1ULL << ARM_FEATURE_V8) |
+                     (1ULL << ARM_FEATURE_NEON) |
+                     (1ULL << ARM_FEATURE_AARCH64) |
+                     (1ULL << ARM_FEATURE_PMU) |
+                     (1ULL << ARM_FEATURE_GENERIC_TIMER);
+
+    for (i = 0; i < ARRAY_SIZE(regs); i++) {
+        clean_whv_register_value(&val);
+        whpx_get_global_reg(regs[i].reg, &val);
+        *regs[i].val = val.Reg64;
+    }
+
+    /*
+     * MIDR_EL1 is not a global register on WHPX
+     * As such, read the CPU0 from the registry to get a consistent value.
+     * Otherwise, on heterogenous systems, you'll get variance between CPUs.
+     */
+    HKEY ProcessorKey = OpenProcessorKey();
+    ahcf->midr = ReadRegU64(ProcessorKey, "CP 4000");
+    RegCloseKey(ProcessorKey);
+
+    clamp_id_aa64mmfr0_parange_to_ipa_size(&ahcf->isar);
+
+    /*
+     * Disable SVE, which is not supported by QEMU whpx yet.
+     * Work needed for SVE support:
+     * - SVE state save/restore
+     * - any potentially needed VL management
+     * Also disable SME at the same time. (not currently supported by Hyper-V)
+     */
+    SET_IDREG(&ahcf->isar, ID_AA64PFR0,
+              GET_IDREG(&ahcf->isar, ID_AA64PFR0) & ~R_ID_AA64PFR0_SVE_MASK);
+
+    SET_IDREG(&ahcf->isar, ID_AA64PFR1,
+              GET_IDREG(&ahcf->isar, ID_AA64PFR1) & ~R_ID_AA64PFR1_SME_MASK);
+
+    return true;
+}
+
+void whpx_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+    if (!arm_host_cpu_features.dtb_compatible) {
+        if (!whpx_enabled() ||
+            !whpx_arm_get_host_cpu_features(&arm_host_cpu_features)) {
+            /*
+             * We can't report this error yet, so flag that we need to
+             * in arm_cpu_realizefn().
+             */
+            cpu->host_cpu_probe_failed = true;
+            return;
+        }
+    }
+
+    cpu->dtb_compatible = arm_host_cpu_features.dtb_compatible;
+    cpu->isar = arm_host_cpu_features.isar;
+    cpu->env.features = arm_host_cpu_features.features;
+    cpu->midr = arm_host_cpu_features.midr;
+    cpu->reset_sctlr = arm_host_cpu_features.reset_sctlr;
+}
+
 int whpx_init_vcpu(CPUState *cpu)
 {
     HRESULT hr;
diff --git a/target/arm/whpx_arm.h b/target/arm/whpx_arm.h
index de7406b66f..df65fd753c 100644
--- a/target/arm/whpx_arm.h
+++ b/target/arm/whpx_arm.h
@@ -12,5 +12,6 @@
 #include "target/arm/cpu-qom.h"
 
 uint32_t whpx_arm_get_ipa_bit_size(void);
+void whpx_arm_set_cpu_features_from_host(ARMCPU *cpu);
 
 #endif
-- 
2.39.5 (Apple Git-154)
Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Peter Maydell 4 months ago
On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
>
> OpenProcessorKey and ReadRegU64 adapted from:
> https://github.com/FEX-Emu/FEX/blob/e6de17e72ef03aa88ba14fa0ec13163061608c74/Source/Windows/Common/CPUFeatures.cpp#L62
>
> Signed-off-by: Mohamed Mediouni <mohamed@unpredictable.fr>

> +    /*
> +     * MIDR_EL1 is not a global register on WHPX
> +     * As such, read the CPU0 from the registry to get a consistent value.
> +     * Otherwise, on heterogenous systems, you'll get variance between CPUs.
> +     */
> +    HKEY ProcessorKey = OpenProcessorKey();


Can you follow the QEMU coding style, please (here and elsewhere)?
Variables and function names should be all lower case,
and variable declarations go at the start of a C code
block, not in the middle of one.

thanks
-- PMM
Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Pierrick Bouvier 4 months ago
On 8/19/25 6:24 AM, Peter Maydell wrote:
> On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
>>
>> OpenProcessorKey and ReadRegU64 adapted from:
>> https://github.com/FEX-Emu/FEX/blob/e6de17e72ef03aa88ba14fa0ec13163061608c74/Source/Windows/Common/CPUFeatures.cpp#L62
>>
>> Signed-off-by: Mohamed Mediouni <mohamed@unpredictable.fr>
> 
>> +    /*
>> +     * MIDR_EL1 is not a global register on WHPX
>> +     * As such, read the CPU0 from the registry to get a consistent value.
>> +     * Otherwise, on heterogenous systems, you'll get variance between CPUs.
>> +     */
>> +    HKEY ProcessorKey = OpenProcessorKey();
> 
> 
> Can you follow the QEMU coding style, please (here and elsewhere)?
> Variables and function names should be all lower case,
> and variable declarations go at the start of a C code
> block, not in the middle of one.
>

In some cases, including in this function, I feel that the rule to 
declare variables at the start of a block is not really helpful, and is 
more related to legacy C than a real point nowadays.
As well, it sometimes forces to reuse some variables between various sub 
blocks, which definitely can create bugs.

Anyway, I'm not discussing the existing QEMU coding style, but just 
asking if for the current context, is it really a problem to declare 
variable here?

> thanks
> -- PMM
Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Peter Maydell 4 months ago
On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
<pierrick.bouvier@linaro.org> wrote:
>
> On 8/19/25 6:24 AM, Peter Maydell wrote:
> > On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
> > Can you follow the QEMU coding style, please (here and elsewhere)?
> > Variables and function names should be all lower case,
> > and variable declarations go at the start of a C code
> > block, not in the middle of one.
> >
>
> In some cases, including in this function, I feel that the rule to
> declare variables at the start of a block is not really helpful, and is
> more related to legacy C than a real point nowadays.
> As well, it sometimes forces to reuse some variables between various sub
> blocks, which definitely can create bugs.
>
> Anyway, I'm not discussing the existing QEMU coding style, but just
> asking if for the current context, is it really a problem to declare
> variable here?

The point of a coding style is to aim for consistency. QEMU
is pretty terrible at being consistent, but we should try.
The rule about variables at start of block is not because
some compilers fail to compile it, but because we think
it's overall more readable that way.

thanks
-- PMM
Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Daniel P. Berrangé 4 months ago
On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
> On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
> <pierrick.bouvier@linaro.org> wrote:
> >
> > On 8/19/25 6:24 AM, Peter Maydell wrote:
> > > On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
> > > Can you follow the QEMU coding style, please (here and elsewhere)?
> > > Variables and function names should be all lower case,
> > > and variable declarations go at the start of a C code
> > > block, not in the middle of one.
> > >
> >
> > In some cases, including in this function, I feel that the rule to
> > declare variables at the start of a block is not really helpful, and is
> > more related to legacy C than a real point nowadays.
> > As well, it sometimes forces to reuse some variables between various sub
> > blocks, which definitely can create bugs.
> >
> > Anyway, I'm not discussing the existing QEMU coding style, but just
> > asking if for the current context, is it really a problem to declare
> > variable here?
> 
> The point of a coding style is to aim for consistency. QEMU
> is pretty terrible at being consistent, but we should try.
> The rule about variables at start of block is not because
> some compilers fail to compile it, but because we think
> it's overall more readable that way.

There are also potential[1] functional problems with not declaring
at the start of block, because if you have a "goto cleanup" which
jumps over the line of the declaration, the variable will have
undefined state when the 'cleanup:' block is running. This is
something which is very subtle and easily missed when reading the
code flow.

With regards,
Daniel

[1] I say "potential" because we unconditionally build with
    -ftrivial-auto-var-init=zero to mitigate this danger
    in practice.
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Pierrick Bouvier 4 months ago
On 8/19/25 8:12 AM, Daniel P. Berrangé wrote:
> On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
>> On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
>> <pierrick.bouvier@linaro.org> wrote:
>>>
>>> On 8/19/25 6:24 AM, Peter Maydell wrote:
>>>> On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
>>>> Can you follow the QEMU coding style, please (here and elsewhere)?
>>>> Variables and function names should be all lower case,
>>>> and variable declarations go at the start of a C code
>>>> block, not in the middle of one.
>>>>
>>>
>>> In some cases, including in this function, I feel that the rule to
>>> declare variables at the start of a block is not really helpful, and is
>>> more related to legacy C than a real point nowadays.
>>> As well, it sometimes forces to reuse some variables between various sub
>>> blocks, which definitely can create bugs.
>>>
>>> Anyway, I'm not discussing the existing QEMU coding style, but just
>>> asking if for the current context, is it really a problem to declare
>>> variable here?
>>
>> The point of a coding style is to aim for consistency. QEMU
>> is pretty terrible at being consistent, but we should try.
>> The rule about variables at start of block is not because
>> some compilers fail to compile it, but because we think
>> it's overall more readable that way.
> 
> There are also potential[1] functional problems with not declaring
> at the start of block, because if you have a "goto cleanup" which
> jumps over the line of the declaration, the variable will have
> undefined state when the 'cleanup:' block is running. This is
> something which is very subtle and easily missed when reading the
> code flow.
>

This has nothing to do with where variables are declared, but where they 
are assigned. The same issue can happen whether or not it's declared at 
the start of a block.

I suspect we use -ftrivial-auto-var-init precisely because we force 
variables to be declared at start of the scope, i.e. where they don't 
have any value yet. So, instead of forcing an explicit initialization or 
rely on compiler warnings for uninitialized values, it was decided to 
initialize them to 0 by default.

If we declared them at the point where they have a defined semantic 
value, this problem would not exist anyway, out of the goto_cleanup 
situation, which has the same fundamental issue in both cases.

> With regards,
> Daniel
> 
> [1] I say "potential" because we unconditionally build with
>      -ftrivial-auto-var-init=zero to mitigate this danger
>      in practice.


Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Daniel P. Berrangé 4 months ago
On Tue, Aug 19, 2025 at 08:48:16AM -0700, Pierrick Bouvier wrote:
> On 8/19/25 8:12 AM, Daniel P. Berrangé wrote:
> > On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
> > > On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
> > > <pierrick.bouvier@linaro.org> wrote:
> > > > 
> > > > On 8/19/25 6:24 AM, Peter Maydell wrote:
> > > > > On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
> > > > > Can you follow the QEMU coding style, please (here and elsewhere)?
> > > > > Variables and function names should be all lower case,
> > > > > and variable declarations go at the start of a C code
> > > > > block, not in the middle of one.
> > > > > 
> > > > 
> > > > In some cases, including in this function, I feel that the rule to
> > > > declare variables at the start of a block is not really helpful, and is
> > > > more related to legacy C than a real point nowadays.
> > > > As well, it sometimes forces to reuse some variables between various sub
> > > > blocks, which definitely can create bugs.
> > > > 
> > > > Anyway, I'm not discussing the existing QEMU coding style, but just
> > > > asking if for the current context, is it really a problem to declare
> > > > variable here?
> > > 
> > > The point of a coding style is to aim for consistency. QEMU
> > > is pretty terrible at being consistent, but we should try.
> > > The rule about variables at start of block is not because
> > > some compilers fail to compile it, but because we think
> > > it's overall more readable that way.
> > 
> > There are also potential[1] functional problems with not declaring
> > at the start of block, because if you have a "goto cleanup" which
> > jumps over the line of the declaration, the variable will have
> > undefined state when the 'cleanup:' block is running. This is
> > something which is very subtle and easily missed when reading the
> > code flow.
> > 
> 
> This has nothing to do with where variables are declared, but where they are
> assigned. The same issue can happen whether or not it's declared at the
> start of a block.
> 
> I suspect we use -ftrivial-auto-var-init precisely because we force
> variables to be declared at start of the scope, i.e. where they don't have
> any value yet. So, instead of forcing an explicit initialization or rely on
> compiler warnings for uninitialized values, it was decided to initialize
> them to 0 by default.
> 
> If we declared them at the point where they have a defined semantic value,
> this problem would not exist anyway, out of the goto_cleanup situation,
> which has the same fundamental issue in both cases.

It really isn't the same issue when you compare

  void bar(void) {
    char *foo = NULL;

    if (blah)
       goto cleanup:

  cleanup:
    if (foo)
       ....
  }

vs

  void bar(void) {
    if (blah)
       goto cleanup:

    char *foo = NULL;

    ...some code...

  cleanup:
    if (foo)
       ....
  }

The late declaration of 'foo' is outright misleading to reviewers.

Its initialization at time of declaration gives the impression
that 'foo' has well defined value in the 'cleanup' block, when
that is not actually true. In big methods it is very easy to
overlook an earlier 'goto' that jumps across a variable declaration
and initialization.

Even if not all methods have this problem, the coding standards
guide us into the habit of writing code that is immune from this
kind of problem. That habit only forms reliably if we apply the
coding standards unconditionally, rather than selectively.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Pierrick Bouvier 4 months ago
On 8/19/25 9:08 AM, Daniel P. Berrangé wrote:
> On Tue, Aug 19, 2025 at 08:48:16AM -0700, Pierrick Bouvier wrote:
>> On 8/19/25 8:12 AM, Daniel P. Berrangé wrote:
>>> On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
>>>> On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
>>>> <pierrick.bouvier@linaro.org> wrote:
>>>>>
>>>>> On 8/19/25 6:24 AM, Peter Maydell wrote:
>>>>>> On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
>>>>>> Can you follow the QEMU coding style, please (here and elsewhere)?
>>>>>> Variables and function names should be all lower case,
>>>>>> and variable declarations go at the start of a C code
>>>>>> block, not in the middle of one.
>>>>>>
>>>>>
>>>>> In some cases, including in this function, I feel that the rule to
>>>>> declare variables at the start of a block is not really helpful, and is
>>>>> more related to legacy C than a real point nowadays.
>>>>> As well, it sometimes forces to reuse some variables between various sub
>>>>> blocks, which definitely can create bugs.
>>>>>
>>>>> Anyway, I'm not discussing the existing QEMU coding style, but just
>>>>> asking if for the current context, is it really a problem to declare
>>>>> variable here?
>>>>
>>>> The point of a coding style is to aim for consistency. QEMU
>>>> is pretty terrible at being consistent, but we should try.
>>>> The rule about variables at start of block is not because
>>>> some compilers fail to compile it, but because we think
>>>> it's overall more readable that way.
>>>
>>> There are also potential[1] functional problems with not declaring
>>> at the start of block, because if you have a "goto cleanup" which
>>> jumps over the line of the declaration, the variable will have
>>> undefined state when the 'cleanup:' block is running. This is
>>> something which is very subtle and easily missed when reading the
>>> code flow.
>>>
>>
>> This has nothing to do with where variables are declared, but where they are
>> assigned. The same issue can happen whether or not it's declared at the
>> start of a block.
>>
>> I suspect we use -ftrivial-auto-var-init precisely because we force
>> variables to be declared at start of the scope, i.e. where they don't have
>> any value yet. So, instead of forcing an explicit initialization or rely on
>> compiler warnings for uninitialized values, it was decided to initialize
>> them to 0 by default.
>>
>> If we declared them at the point where they have a defined semantic value,
>> this problem would not exist anyway, out of the goto_cleanup situation,
>> which has the same fundamental issue in both cases.
> 
> It really isn't the same issue when you compare
> 
>    void bar(void) {
>      char *foo = NULL;
> 
>      if (blah)
>         goto cleanup:
> 
>    cleanup:
>      if (foo)
>         ....
>    }
> 
> vs
> 
>    void bar(void) {
>      if (blah)
>         goto cleanup:
> 
>      char *foo = NULL;
>
>      ...some code...
>
>    cleanup:>      if (foo)
>         ....
>    }
> 
> The late declaration of 'foo' is outright misleading to reviewers.
> 
> Its initialization at time of declaration gives the impression
> that 'foo' has well defined value in the 'cleanup' block, when
> that is not actually true. In big methods it is very easy to
> overlook an earlier 'goto' that jumps across a variable declaration
> and initialization.
>

"Big" method is probably the issue there. If it's not possible to follow 
control flow in a given function, it's a strong hint there is a problem 
with its size, independently of any standard. And even though 
goto_cleanup is a legit pattern in C, I still don't get the argument 
about declaring variable far from their definition point in this case.
It seems that we are trying to solve the consequence without really 
understanding the root cause issue.

> Even if not all methods have this problem, the coding standards
> guide us into the habit of writing code that is immune from this
> kind of problem. That habit only forms reliably if we apply the
> coding standards unconditionally, rather than selectively.
>

That's right, but humanly enforced coding standard are usually a waste 
of time for everyone (reviewers and developers).

How many messages and exchanges on the mailing list could we save by 
using something like clang-format on the codebase, and force it to be 
"clean" as part of the CI? There would be no more discussion, as there 
would be only one single and objective source of truth.

> With regards,
> Daniel


Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Daniel P. Berrangé 4 months ago
On Tue, Aug 19, 2025 at 09:25:22AM -0700, Pierrick Bouvier wrote:
> On 8/19/25 9:08 AM, Daniel P. Berrangé wrote:
> > On Tue, Aug 19, 2025 at 08:48:16AM -0700, Pierrick Bouvier wrote:
> > > On 8/19/25 8:12 AM, Daniel P. Berrangé wrote:
> > > > On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
> > > > > On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
> > > > > <pierrick.bouvier@linaro.org> wrote:
> > > > > > 
> > > > > > On 8/19/25 6:24 AM, Peter Maydell wrote:
> > > > > > > On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
> > > > > > > Can you follow the QEMU coding style, please (here and elsewhere)?
> > > > > > > Variables and function names should be all lower case,
> > > > > > > and variable declarations go at the start of a C code
> > > > > > > block, not in the middle of one.
> > > > > > > 
> > > > > > 
> > > > > > In some cases, including in this function, I feel that the rule to
> > > > > > declare variables at the start of a block is not really helpful, and is
> > > > > > more related to legacy C than a real point nowadays.
> > > > > > As well, it sometimes forces to reuse some variables between various sub
> > > > > > blocks, which definitely can create bugs.
> > > > > > 
> > > > > > Anyway, I'm not discussing the existing QEMU coding style, but just
> > > > > > asking if for the current context, is it really a problem to declare
> > > > > > variable here?
> > > > > 
> > > > > The point of a coding style is to aim for consistency. QEMU
> > > > > is pretty terrible at being consistent, but we should try.
> > > > > The rule about variables at start of block is not because
> > > > > some compilers fail to compile it, but because we think
> > > > > it's overall more readable that way.
> > > > 
> > > > There are also potential[1] functional problems with not declaring
> > > > at the start of block, because if you have a "goto cleanup" which
> > > > jumps over the line of the declaration, the variable will have
> > > > undefined state when the 'cleanup:' block is running. This is
> > > > something which is very subtle and easily missed when reading the
> > > > code flow.
> > > > 
> > > 
> > > This has nothing to do with where variables are declared, but where they are
> > > assigned. The same issue can happen whether or not it's declared at the
> > > start of a block.
> > > 
> > > I suspect we use -ftrivial-auto-var-init precisely because we force
> > > variables to be declared at start of the scope, i.e. where they don't have
> > > any value yet. So, instead of forcing an explicit initialization or rely on
> > > compiler warnings for uninitialized values, it was decided to initialize
> > > them to 0 by default.
> > > 
> > > If we declared them at the point where they have a defined semantic value,
> > > this problem would not exist anyway, out of the goto_cleanup situation,
> > > which has the same fundamental issue in both cases.
> > 
> > It really isn't the same issue when you compare
> > 
> >    void bar(void) {
> >      char *foo = NULL;
> > 
> >      if (blah)
> >         goto cleanup:
> > 
> >    cleanup:
> >      if (foo)
> >         ....
> >    }
> > 
> > vs
> > 
> >    void bar(void) {
> >      if (blah)
> >         goto cleanup:
> > 
> >      char *foo = NULL;
> > 
> >      ...some code...
> > 
> >    cleanup:>      if (foo)
> >         ....
> >    }
> > 
> > The late declaration of 'foo' is outright misleading to reviewers.
> > 
> > Its initialization at time of declaration gives the impression
> > that 'foo' has well defined value in the 'cleanup' block, when
> > that is not actually true. In big methods it is very easy to
> > overlook an earlier 'goto' that jumps across a variable declaration
> > and initialization.
> > 
> 
> "Big" method is probably the issue there. If it's not possible to follow
> control flow in a given function, it's a strong hint there is a problem with
> its size, independently of any standard.

Certainly some methods are too big & deserve refactoring, but that's a
non-trivial investment, and it isn't always a clearcut win to split
code out into a bunch of arbitrarily short methods. You may solve the
goto/initialization problem, but make other things harder as you often
still have to fully page all the code into mind to understand it.

> > Even if not all methods have this problem, the coding standards
> > guide us into the habit of writing code that is immune from this
> > kind of problem. That habit only forms reliably if we apply the
> > coding standards unconditionally, rather than selectively.
> > 
> 
> That's right, but humanly enforced coding standard are usually a waste of
> time for everyone (reviewers and developers).

Human enforced standards are absolutely better than a free-for-all. Over
time contributors will gain familiarity with the project standards and
largely comply without enforcement being required. If contributors
repeatedly ignore coding standards, it will disincentivise reviewers
from looking at their patches.

> How many messages and exchanges on the mailing list could we save by using
> something like clang-format on the codebase, and force it to be "clean" as
> part of the CI? There would be no more discussion, as there would be only
> one single and objective source of truth.

I would really love if it we could apply clang-format to everything, but
that has a non-trivial impact on maint when done on a large pre-existing
codebase like QEMU. Cherry-picking to upstream stable or distros would
be immensely painful, verging on impossible, after a bulk reformat. For
any new codebase I'd go for clang-format every time.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|


Re: [PATCH v5 15/18] whpx: arm64: implement -cpu host
Posted by Pierrick Bouvier 4 months ago
On 8/19/25 9:40 AM, Daniel P. Berrangé wrote:
> On Tue, Aug 19, 2025 at 09:25:22AM -0700, Pierrick Bouvier wrote:
>> On 8/19/25 9:08 AM, Daniel P. Berrangé wrote:
>>> On Tue, Aug 19, 2025 at 08:48:16AM -0700, Pierrick Bouvier wrote:
>>>> On 8/19/25 8:12 AM, Daniel P. Berrangé wrote:
>>>>> On Tue, Aug 19, 2025 at 04:06:45PM +0100, Peter Maydell wrote:
>>>>>> On Tue, 19 Aug 2025 at 16:04, Pierrick Bouvier
>>>>>> <pierrick.bouvier@linaro.org> wrote:
>>>>>>>
>>>>>>> On 8/19/25 6:24 AM, Peter Maydell wrote:
>>>>>>>> On Fri, 8 Aug 2025 at 07:55, Mohamed Mediouni <mohamed@unpredictable.fr> wrote:
>>>>>>>> Can you follow the QEMU coding style, please (here and elsewhere)?
>>>>>>>> Variables and function names should be all lower case,
>>>>>>>> and variable declarations go at the start of a C code
>>>>>>>> block, not in the middle of one.
>>>>>>>>
>>>>>>>
>>>>>>> In some cases, including in this function, I feel that the rule to
>>>>>>> declare variables at the start of a block is not really helpful, and is
>>>>>>> more related to legacy C than a real point nowadays.
>>>>>>> As well, it sometimes forces to reuse some variables between various sub
>>>>>>> blocks, which definitely can create bugs.
>>>>>>>
>>>>>>> Anyway, I'm not discussing the existing QEMU coding style, but just
>>>>>>> asking if for the current context, is it really a problem to declare
>>>>>>> variable here?
>>>>>>
>>>>>> The point of a coding style is to aim for consistency. QEMU
>>>>>> is pretty terrible at being consistent, but we should try.
>>>>>> The rule about variables at start of block is not because
>>>>>> some compilers fail to compile it, but because we think
>>>>>> it's overall more readable that way.
>>>>>
>>>>> There are also potential[1] functional problems with not declaring
>>>>> at the start of block, because if you have a "goto cleanup" which
>>>>> jumps over the line of the declaration, the variable will have
>>>>> undefined state when the 'cleanup:' block is running. This is
>>>>> something which is very subtle and easily missed when reading the
>>>>> code flow.
>>>>>
>>>>
>>>> This has nothing to do with where variables are declared, but where they are
>>>> assigned. The same issue can happen whether or not it's declared at the
>>>> start of a block.
>>>>
>>>> I suspect we use -ftrivial-auto-var-init precisely because we force
>>>> variables to be declared at start of the scope, i.e. where they don't have
>>>> any value yet. So, instead of forcing an explicit initialization or rely on
>>>> compiler warnings for uninitialized values, it was decided to initialize
>>>> them to 0 by default.
>>>>
>>>> If we declared them at the point where they have a defined semantic value,
>>>> this problem would not exist anyway, out of the goto_cleanup situation,
>>>> which has the same fundamental issue in both cases.
>>>
>>> It really isn't the same issue when you compare
>>>
>>>     void bar(void) {
>>>       char *foo = NULL;
>>>
>>>       if (blah)
>>>          goto cleanup:
>>>
>>>     cleanup:
>>>       if (foo)
>>>          ....
>>>     }
>>>
>>> vs
>>>
>>>     void bar(void) {
>>>       if (blah)
>>>          goto cleanup:
>>>
>>>       char *foo = NULL;
>>>
>>>       ...some code...
>>>
>>>     cleanup:>      if (foo)
>>>          ....
>>>     }
>>>
>>> The late declaration of 'foo' is outright misleading to reviewers.
>>>
>>> Its initialization at time of declaration gives the impression
>>> that 'foo' has well defined value in the 'cleanup' block, when
>>> that is not actually true. In big methods it is very easy to
>>> overlook an earlier 'goto' that jumps across a variable declaration
>>> and initialization.
>>>
>>
>> "Big" method is probably the issue there. If it's not possible to follow
>> control flow in a given function, it's a strong hint there is a problem with
>> its size, independently of any standard.
> 
> Certainly some methods are too big & deserve refactoring, but that's a
> non-trivial investment, and it isn't always a clearcut win to split
> code out into a bunch of arbitrarily short methods. You may solve the
> goto/initialization problem, but make other things harder as you often
> still have to fully page all the code into mind to understand it.
>

I am still looking for an example of where breaking down a big function 
in smaller logical chunks has reduced the readability for anyone, but I 
never met one so far in my professional life.
Usually it comes with the additional benefit that you need to *name* 
things explicitely, which is usually better than add a comment about them.
The point of breaking down code is explicitely to remove the need to 
keep things in mind and assume functions do what they are named for.

I respect the difference about tastes concerning readability and code 
structure, and I know the context and era from which QEMU codebase comes 
from.

However, arguing that variables should be at the start of a block 
because of a potential goto_cleanup situation is not a good argument.

>>> Even if not all methods have this problem, the coding standards
>>> guide us into the habit of writing code that is immune from this
>>> kind of problem. That habit only forms reliably if we apply the
>>> coding standards unconditionally, rather than selectively.
>>>
>>
>> That's right, but humanly enforced coding standard are usually a waste of
>> time for everyone (reviewers and developers).
> 
> Human enforced standards are absolutely better than a free-for-all. Over
> time contributors will gain familiarity with the project standards and
> largely comply without enforcement being required. If contributors
> repeatedly ignore coding standards, it will disincentivise reviewers
> from looking at their patches.
>

Sure.
As well, incessant pushbacks and nitpicking from those same reviewers 
can disincentivise people to send any patch.
But maybe the whole point is simply to keep people out of their lawn, or 
reduce the amount of patches they need to process daily, who knows.

>> How many messages and exchanges on the mailing list could we save by using
>> something like clang-format on the codebase, and force it to be "clean" as
>> part of the CI? There would be no more discussion, as there would be only
>> one single and objective source of truth.
> 
> I would really love if it we could apply clang-format to everything, but
> that has a non-trivial impact on maint when done on a large pre-existing
> codebase like QEMU. Cherry-picking to upstream stable or distros would
> be immensely painful, verging on impossible, after a bulk reformat. For
> any new codebase I'd go for clang-format every time.
>

Maybe we could organize a conversation about this, because the benefits 
are worth making cherry-picking a little bit harder. In this case, all 
the community benefits from this, while blocking this penalizes everyone 
except the stable maintenance part and downstream forks.
As well, it would be a once in a lifetime price to pay.

> With regards,
> Daniel