cpu_x86: Fix CPU model comparison heuristics

[PATCH 15/15] cpu_x86: Fix algorithm for computing CPU model weight

Posted by Jiri Denemark via Devel 9 months, 2 weeks ago

From: Jiri Denemark <jdenemar@redhat.com>

This patch is effectively a NOP, but it fixes a logic bug and makes the
heuristics more visible and easier to change should there be a need to
do so in the future.

We decide which CPU model is the best match for given CPU data by
comparing lists of features that need to be enabled/disabled on top of
the selected CPU model. Since the original approach of using just the
total number of features was not working well enough, commit
v8.3.0-42-g48341b025a implemented a penalty for disabled features which
would increase for each additional disabled features. Apparently the
intention was weighting disabled features as

                      disabled * (disabled + 3)
    weightDisabled =  -------------------------
                                2

and complete CPU model as

    weight = enabled + weightDisabled

But there was a bug in the code which caused it to ignore some of the
features and counted as enabled regardless on their policy. Instead of
going through all features the code used the number of "enabled"
features (the variable was not really counting number of enabled
features though) which was initialized to the total number of features
and decremented each time a disabled features was found. Thus depending
on the number of disabled features, some features at the end of the list
were ignored. Luckily we know all the ignored features had to be
disabled because the CPU definitions were created by x86DataToCPU which
constructs a list of enabled features followed by disabled features.

So to fix the bug while providing the same results we can come up with
an equivalent formula using properly counted features in the CPU
definition.

The number of disabled features counted by the buggy code is

    half = (disabled + 1) div 2

and the weight of all disabled features is

                     half * (half + 3)
    weightDisabled = -----------------
                            2

When computing the total weight, we can't no longer use number of
enabled features because the original code counted some of the disabled
features as enabled. So to match the old behavior, we count the total
weight as

    weight = features - half + weightDisabled

The weight of enabled features now differs from the value computed by
the old code, but we don't need to worry about it as it's not really
used anywhere except for logging.

Fixes: https://gitlab.com/libvirt/libvirt/-/issues/759
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
---
 src/cpu/cpu_x86.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c
index b0fe2bed4c..213af67ea4 100644
--- a/src/cpu/cpu_x86.c
+++ b/src/cpu/cpu_x86.c
@@ -2087,9 +2087,6 @@ virCPUx86Compare(virCPUDef *host,
 }
 
 
-/* Base penalty for disabled features. */
-#define BASE_PENALTY 2
-
 struct virCPUx86Weight {
     size_t total;
     size_t enabled;
@@ -2100,26 +2097,25 @@ static void
 virCPUx86WeightFeatures(const virCPUDef *cpu,
                         struct virCPUx86Weight *weight)
 {
-    int penalty = BASE_PENALTY;
     size_t i;
+    size_t half; /* half of disabled features rounded up */
 
     weight->enabled = cpu->nfeatures;
-    weight->disabled = 0;
 
     if (cpu->type == VIR_CPU_TYPE_HOST) {
+        weight->disabled = 0;
         weight->total = cpu->nfeatures;
         return;
     }
 
-    for (i = 0; i < weight->enabled; i++) {
-        if (cpu->features[i].policy == VIR_CPU_FEATURE_DISABLE) {
+    for (i = 0; i < cpu->nfeatures; i++) {
+        if (cpu->features[i].policy == VIR_CPU_FEATURE_DISABLE)
             weight->enabled--;
-            weight->disabled += penalty;
-            penalty++;
-        }
     }
 
-    weight->total = weight->enabled + weight->disabled;
+    half = (cpu->nfeatures - weight->enabled + 1) / 2;
+    weight->disabled = half * (half + 3) / 2;
+    weight->total = cpu->nfeatures - half + weight->disabled;
 }
 
 
-- 
2.49.0

Re: [PATCH 15/15] cpu_x86: Fix algorithm for computing CPU model weight

Posted by Peter Krempa via Devel 9 months, 2 weeks ago

On Tue, Apr 29, 2025 at 12:19:50 +0200, Jiri Denemark via Devel wrote:
> From: Jiri Denemark <jdenemar@redhat.com>
> 
> This patch is effectively a NOP, but it fixes a logic bug and makes the
> heuristics more visible and easier to change should there be a need to
> do so in the future.
> 
> We decide which CPU model is the best match for given CPU data by
> comparing lists of features that need to be enabled/disabled on top of
> the selected CPU model. Since the original approach of using just the
> total number of features was not working well enough, commit
> v8.3.0-42-g48341b025a implemented a penalty for disabled features which
> would increase for each additional disabled features. Apparently the
> intention was weighting disabled features as
> 
>                       disabled * (disabled + 3)
>     weightDisabled =  -------------------------
>                                 2
> 
> and complete CPU model as
> 
>     weight = enabled + weightDisabled
> 
> But there was a bug in the code which caused it to ignore some of the
> features and counted as enabled regardless on their policy. Instead of
> going through all features the code used the number of "enabled"
> features (the variable was not really counting number of enabled
> features though) which was initialized to the total number of features
> and decremented each time a disabled features was found. Thus depending
> on the number of disabled features, some features at the end of the list
> were ignored. Luckily we know all the ignored features had to be
> disabled because the CPU definitions were created by x86DataToCPU which
> constructs a list of enabled features followed by disabled features.
> 
> So to fix the bug while providing the same results we can come up with
> an equivalent formula using properly counted features in the CPU
> definition.
> 
> The number of disabled features counted by the buggy code is
> 
>     half = (disabled + 1) div 2
> 
> and the weight of all disabled features is
> 
>                      half * (half + 3)
>     weightDisabled = -----------------
>                             2
> 
> When computing the total weight, we can't no longer use number of
> enabled features because the original code counted some of the disabled
> features as enabled. So to match the old behavior, we count the total
> weight as
> 
>     weight = features - half + weightDisabled
> 
> The weight of enabled features now differs from the value computed by
> the old code, but we don't need to worry about it as it's not really
> used anywhere except for logging.
> 
> Fixes: https://gitlab.com/libvirt/libvirt/-/issues/759
> Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
> ---
>  src/cpu/cpu_x86.c | 18 +++++++-----------
>  1 file changed, 7 insertions(+), 11 deletions(-)
> 
> diff --git a/src/cpu/cpu_x86.c b/src/cpu/cpu_x86.c
> index b0fe2bed4c..213af67ea4 100644
> --- a/src/cpu/cpu_x86.c
> +++ b/src/cpu/cpu_x86.c
> @@ -2087,9 +2087,6 @@ virCPUx86Compare(virCPUDef *host,
>  }
>  
>  
> -/* Base penalty for disabled features. */
> -#define BASE_PENALTY 2
> -
>  struct virCPUx86Weight {
>      size_t total;
>      size_t enabled;
> @@ -2100,26 +2097,25 @@ static void
>  virCPUx86WeightFeatures(const virCPUDef *cpu,
>                          struct virCPUx86Weight *weight)
>  {
> -    int penalty = BASE_PENALTY;
>      size_t i;
> +    size_t half; /* half of disabled features rounded up */
>  
>      weight->enabled = cpu->nfeatures;
> -    weight->disabled = 0;
>  
>      if (cpu->type == VIR_CPU_TYPE_HOST) {
> +        weight->disabled = 0;
>          weight->total = cpu->nfeatures;
>          return;
>      }
>  
> -    for (i = 0; i < weight->enabled; i++) {
> -        if (cpu->features[i].policy == VIR_CPU_FEATURE_DISABLE) {
> +    for (i = 0; i < cpu->nfeatures; i++) {
> +        if (cpu->features[i].policy == VIR_CPU_FEATURE_DISABLE)
>              weight->enabled--;
> -            weight->disabled += penalty;
> -            penalty++;
> -        }

Okay I know why the code didn't make sense in the refactor :D

>      }
>  
> -    weight->total = weight->enabled + weight->disabled;
> +    half = (cpu->nfeatures - weight->enabled + 1) / 2;
> +    weight->disabled = half * (half + 3) / 2;
> +    weight->total = cpu->nfeatures - half + weight->disabled;

This looks like an excercise in reading C operator precedence :P

>  }
>  
>  
> -- 
> 2.49.0
> 

Reviewed-by: Peter Krempa <pkrempa@redhat.com>