[PATCH v3 14/15] xen/xenpm: Adapt cpu frequency monitor in xenpm

Penny Zheng posted 15 patches 11 months, 1 week ago
There is a newer version of this series
[PATCH v3 14/15] xen/xenpm: Adapt cpu frequency monitor in xenpm
Posted by Penny Zheng 11 months, 1 week ago
Make `xenpm get-cpureq-para/set-cpufreq-para` available in CPPC mode.
Also, In `xenpm get-cpufreq-para <cpuid>`, para scaling_available_frequencies
only has meaningful value when cpufreq driver in legacy P-states.
So we loosen "has_num" condition to bypass scaling_available_frequencies
check in CPPC mode.

Also, in `xenpm get-cpyfreq-para start`, the monitor of average frequency shall
not depend on the existence of legacy P-states.

Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v2 -> v3:
- new commit
---
 tools/libs/ctrl/xc_pm.c   | 12 +++++++-----
 tools/misc/xenpm.c        |  5 +++--
 xen/drivers/acpi/pmstat.c | 30 +++++++++++++++++-------------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/tools/libs/ctrl/xc_pm.c b/tools/libs/ctrl/xc_pm.c
index b27b45c3dc..d843b79d6d 100644
--- a/tools/libs/ctrl/xc_pm.c
+++ b/tools/libs/ctrl/xc_pm.c
@@ -214,13 +214,12 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
 			 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
 
     bool has_num = user_para->cpu_num &&
-                     user_para->freq_num &&
                      user_para->gov_num;
 
     if ( has_num )
     {
         if ( (!user_para->affected_cpus)                    ||
-             (!user_para->scaling_available_frequencies)    ||
+             (user_para->freq_num && !user_para->scaling_available_frequencies)    ||
              (user_para->gov_num && !user_para->scaling_available_governors) )
         {
             errno = EINVAL;
@@ -228,14 +227,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
         }
         if ( xc_hypercall_bounce_pre(xch, affected_cpus) )
             goto unlock_1;
-        if ( xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
+        if ( user_para->freq_num &&
+             xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
             goto unlock_2;
         if ( user_para->gov_num &&
              xc_hypercall_bounce_pre(xch, scaling_available_governors) )
             goto unlock_3;
 
         set_xen_guest_handle(sys_para->affected_cpus, affected_cpus);
-        set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);
+        if ( user_para->freq_num )
+            set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);
         if ( user_para->gov_num )
             set_xen_guest_handle(sys_para->scaling_available_governors,
                                  scaling_available_governors);
@@ -301,7 +302,8 @@ unlock_4:
     if ( user_para->gov_num )
         xc_hypercall_bounce_post(xch, scaling_available_governors);
 unlock_3:
-    xc_hypercall_bounce_post(xch, scaling_available_frequencies);
+    if ( user_para->freq_num )
+        xc_hypercall_bounce_post(xch, scaling_available_frequencies);
 unlock_2:
     xc_hypercall_bounce_post(xch, affected_cpus);
 unlock_1:
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index a7aeaea35e..a521800504 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -539,7 +539,7 @@ static void signal_int_handler(int signo)
                         res / 1000000UL, 100UL * res / (double)sum_px[i]);
             }
         }
-        if ( px_cap && avgfreq[i] )
+        if ( avgfreq[i] )
             printf("  Avg freq\t%d\tKHz\n", avgfreq[i]);
     }
 
@@ -926,7 +926,8 @@ static int show_cpufreq_para_by_cpuid(xc_interface *xc_handle, int cpuid)
             ret = -ENOMEM;
             goto out;
         }
-        if (!(p_cpufreq->scaling_available_frequencies =
+        if (p_cpufreq->freq_num &&
+            !(p_cpufreq->scaling_available_frequencies =
               malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
         {
             fprintf(stderr,
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index c8e00766a6..7f432be761 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -202,7 +202,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
     pmpt = processor_pminfo[op->cpuid];
     policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
 
-    if ( !pmpt || !pmpt->perf.states ||
+    if ( !pmpt || ((pmpt->init & XEN_PX_INIT) && !pmpt->perf.states) ||
          !policy || !policy->governor )
         return -EINVAL;
 
@@ -229,17 +229,20 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
     if ( ret )
         return ret;
 
-    if ( !(scaling_available_frequencies =
-           xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
-        return -ENOMEM;
-    for ( i = 0; i < op->u.get_para.freq_num; i++ )
-        scaling_available_frequencies[i] =
-                        pmpt->perf.states[i].core_frequency * 1000;
-    ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
-                   scaling_available_frequencies, op->u.get_para.freq_num);
-    xfree(scaling_available_frequencies);
-    if ( ret )
-        return ret;
+    if ( op->u.get_para.freq_num )
+    {
+        if ( !(scaling_available_frequencies =
+               xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
+            return -ENOMEM;
+        for ( i = 0; i < op->u.get_para.freq_num; i++ )
+            scaling_available_frequencies[i] =
+                            pmpt->perf.states[i].core_frequency * 1000;
+        ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
+                    scaling_available_frequencies, op->u.get_para.freq_num);
+        xfree(scaling_available_frequencies);
+        if ( ret )
+            return ret;
+    }
 
     op->u.get_para.cpuinfo_cur_freq =
         cpufreq_driver.get ? alternative_call(cpufreq_driver.get, op->cpuid)
@@ -465,7 +468,8 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
     switch ( op->cmd & PM_PARA_CATEGORY_MASK )
     {
     case CPUFREQ_PARA:
-        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+        if ( !(xen_processor_pmbits & (XEN_PROCESSOR_PM_PX |
+                                       XEN_PROCESSOR_PM_CPPC)) )
             return -ENODEV;
         if ( !pmpt || !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
             return -EINVAL;
-- 
2.34.1
Re: [PATCH v3 14/15] xen/xenpm: Adapt cpu frequency monitor in xenpm
Posted by Jan Beulich 10 months, 3 weeks ago
On 06.03.2025 09:39, Penny Zheng wrote:
> Make `xenpm get-cpureq-para/set-cpufreq-para` available in CPPC mode.
> --- a/tools/libs/ctrl/xc_pm.c
> +++ b/tools/libs/ctrl/xc_pm.c
> @@ -214,13 +214,12 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
>  			 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
>  
>      bool has_num = user_para->cpu_num &&
> -                     user_para->freq_num &&
>                       user_para->gov_num;
>  
>      if ( has_num )

Something looks wrong here already before your patch: With how has_num is set
and with this conditional, ...

>      {
>          if ( (!user_para->affected_cpus)                    ||
> -             (!user_para->scaling_available_frequencies)    ||
> +             (user_para->freq_num && !user_para->scaling_available_frequencies)    ||
>               (user_para->gov_num && !user_para->scaling_available_governors) )

... this ->gov_num check, ...

>          {
>              errno = EINVAL;
> @@ -228,14 +227,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
>          }
>          if ( xc_hypercall_bounce_pre(xch, affected_cpus) )
>              goto unlock_1;
> -        if ( xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
> +        if ( user_para->freq_num &&
> +             xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
>              goto unlock_2;
>          if ( user_para->gov_num &&

... this one, and ...

>               xc_hypercall_bounce_pre(xch, scaling_available_governors) )
>              goto unlock_3;
>  
>          set_xen_guest_handle(sys_para->affected_cpus, affected_cpus);
> -        set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);
> +        if ( user_para->freq_num )
> +            set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);

(Nit: Yet another overly long line. It was too long already before, yes, but
 that's no excuse to make it even longer.  The more that there is better
 formatting right in context below.)

>          if ( user_para->gov_num )

... this one are all dead code. Jason? I expect the has_num variable simply
wants dropping altogether, thus correcting the earlier anomaly and getting
the intended new behavior at the same time.

>              set_xen_guest_handle(sys_para->scaling_available_governors,
>                                   scaling_available_governors);

This is the piece of context I'm referring to in the nit above.

> @@ -301,7 +302,8 @@ unlock_4:
>      if ( user_para->gov_num )
>          xc_hypercall_bounce_post(xch, scaling_available_governors);
>  unlock_3:
> -    xc_hypercall_bounce_post(xch, scaling_available_frequencies);
> +    if ( user_para->freq_num )
> +        xc_hypercall_bounce_post(xch, scaling_available_frequencies);
>  unlock_2:
>      xc_hypercall_bounce_post(xch, affected_cpus);
>  unlock_1:

I'm also puzzled by the function's inconsistent return value - Anthony,
can you explain / spot why things are the way they are?

> --- a/tools/misc/xenpm.c
> +++ b/tools/misc/xenpm.c
> @@ -539,7 +539,7 @@ static void signal_int_handler(int signo)
>                          res / 1000000UL, 100UL * res / (double)sum_px[i]);
>              }
>          }
> -        if ( px_cap && avgfreq[i] )
> +        if ( avgfreq[i] )
>              printf("  Avg freq\t%d\tKHz\n", avgfreq[i]);
>      }

I wonder whether this shouldn't be an independent change (which then
could go in rather sooner).

> @@ -926,7 +926,8 @@ static int show_cpufreq_para_by_cpuid(xc_interface *xc_handle, int cpuid)
>              ret = -ENOMEM;
>              goto out;
>          }
> -        if (!(p_cpufreq->scaling_available_frequencies =
> +        if (p_cpufreq->freq_num &&
> +            !(p_cpufreq->scaling_available_frequencies =
>                malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
>          {
>              fprintf(stderr,

Can someone explain to me how the pre-existing logic here works? All
three ->*_num start out as zero. Hence respective allocations (of zero
size) may conceivably return NULL (the behavior there is implementation
defined after all). Yet then we'd bail from the loop, and hence from the
function. IOW adding a ->freq_num check and also a ->cpu_num one (along
with the ->gov_num one that apparently was added during HWP development)
would once again look like an independent (latent) bugfix to me.

> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -202,7 +202,7 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
>      pmpt = processor_pminfo[op->cpuid];
>      policy = per_cpu(cpufreq_cpu_policy, op->cpuid);
>  
> -    if ( !pmpt || !pmpt->perf.states ||
> +    if ( !pmpt || ((pmpt->init & XEN_PX_INIT) && !pmpt->perf.states) ||
>           !policy || !policy->governor )
>          return -EINVAL;

Wouldn't this change better belong in the earlier patch, where the code
in context of the last hunk below was adjusted?

> @@ -229,17 +229,20 @@ static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
>      if ( ret )
>          return ret;
>  
> -    if ( !(scaling_available_frequencies =
> -           xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
> -        return -ENOMEM;
> -    for ( i = 0; i < op->u.get_para.freq_num; i++ )
> -        scaling_available_frequencies[i] =
> -                        pmpt->perf.states[i].core_frequency * 1000;
> -    ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
> -                   scaling_available_frequencies, op->u.get_para.freq_num);
> -    xfree(scaling_available_frequencies);
> -    if ( ret )
> -        return ret;
> +    if ( op->u.get_para.freq_num )
> +    {
> +        if ( !(scaling_available_frequencies =
> +               xzalloc_array(uint32_t, op->u.get_para.freq_num)) )
> +            return -ENOMEM;
> +        for ( i = 0; i < op->u.get_para.freq_num; i++ )
> +            scaling_available_frequencies[i] =
> +                            pmpt->perf.states[i].core_frequency * 1000;

Nit: Indentation was bogus here and ...

> +        ret = copy_to_guest(op->u.get_para.scaling_available_frequencies,
> +                    scaling_available_frequencies, op->u.get_para.freq_num);

... here before, and sadly continues to be bogus now.

> +        xfree(scaling_available_frequencies);
> +        if ( ret )
> +            return ret;
> +    }

While (beyond the nit above) I'm okay with this simple change, I think the
code here would benefit from folding the two allocations into one. There
simply is no reason to pay the price of the allocation overhead twice, when
we need a uint32_t[max(.cpu_num, .freq_num)] array anyway. That way the
churn introduced here would then also be smaller.

> @@ -465,7 +468,8 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
>      switch ( op->cmd & PM_PARA_CATEGORY_MASK )
>      {
>      case CPUFREQ_PARA:
> -        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
> +        if ( !(xen_processor_pmbits & (XEN_PROCESSOR_PM_PX |
> +                                       XEN_PROCESSOR_PM_CPPC)) )
>              return -ENODEV;
>          if ( !pmpt || !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
>              return -EINVAL;

(This is the hunk I'm referring to further up.)

Jan
Re: [PATCH v3 14/15] xen/xenpm: Adapt cpu frequency monitor in xenpm
Posted by Anthony PERARD 10 months, 2 weeks ago
On Tue, Mar 25, 2025 at 12:26:09PM +0100, Jan Beulich wrote:
> On 06.03.2025 09:39, Penny Zheng wrote:
> > Make `xenpm get-cpureq-para/set-cpufreq-para` available in CPPC mode.
> > --- a/tools/libs/ctrl/xc_pm.c
> > +++ b/tools/libs/ctrl/xc_pm.c
> > @@ -214,13 +214,12 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
> > @@ -301,7 +302,8 @@ unlock_4:
> >      if ( user_para->gov_num )
> >          xc_hypercall_bounce_post(xch, scaling_available_governors);
> >  unlock_3:
> > -    xc_hypercall_bounce_post(xch, scaling_available_frequencies);
> > +    if ( user_para->freq_num )
> > +        xc_hypercall_bounce_post(xch, scaling_available_frequencies);
> >  unlock_2:
> >      xc_hypercall_bounce_post(xch, affected_cpus);
> >  unlock_1:
> 
> I'm also puzzled by the function's inconsistent return value - Anthony,
> can you explain / spot why things are the way they are?

Looks like 73367cf3b4b4 ("libxc: Fix xc_pm API calls to return negative
error and stash error in errno.") made some changes, and fixed some
return value to be like described in "xenctrl.h", but I guess failed to
also change the "ret = -errno".

Cheers,

-- 

Anthony Perard | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH v3 14/15] xen/xenpm: Adapt cpu frequency monitor in xenpm
Posted by Jason Andryuk 10 months, 3 weeks ago
On 2025-03-25 07:26, Jan Beulich wrote:
> On 06.03.2025 09:39, Penny Zheng wrote:
>> Make `xenpm get-cpureq-para/set-cpufreq-para` available in CPPC mode.
>> --- a/tools/libs/ctrl/xc_pm.c
>> +++ b/tools/libs/ctrl/xc_pm.c
>> @@ -214,13 +214,12 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
>>   			 user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char), XC_HYPERCALL_BUFFER_BOUNCE_BOTH);
>>   
>>       bool has_num = user_para->cpu_num &&
>> -                     user_para->freq_num &&
>>                        user_para->gov_num;
>>   
>>       if ( has_num )
> 
> Something looks wrong here already before your patch: With how has_num is set
> and with this conditional, ...
> 
>>       {
>>           if ( (!user_para->affected_cpus)                    ||
>> -             (!user_para->scaling_available_frequencies)    ||
>> +             (user_para->freq_num && !user_para->scaling_available_frequencies)    ||
>>                (user_para->gov_num && !user_para->scaling_available_governors) )
> 
> ... this ->gov_num check, ...>>           {
>>               errno = EINVAL;
>> @@ -228,14 +227,16 @@ int xc_get_cpufreq_para(xc_interface *xch, int cpuid,
>>           }
>>           if ( xc_hypercall_bounce_pre(xch, affected_cpus) )
>>               goto unlock_1;
>> -        if ( xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
>> +        if ( user_para->freq_num &&
>> +             xc_hypercall_bounce_pre(xch, scaling_available_frequencies) )
>>               goto unlock_2;
>>           if ( user_para->gov_num &&
> 
> ... this one, and ...
> 
>>                xc_hypercall_bounce_pre(xch, scaling_available_governors) )
>>               goto unlock_3;
>>   
>>           set_xen_guest_handle(sys_para->affected_cpus, affected_cpus);
>> -        set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);
>> +        if ( user_para->freq_num )
>> +            set_xen_guest_handle(sys_para->scaling_available_frequencies, scaling_available_frequencies);
> 
> (Nit: Yet another overly long line. It was too long already before, yes, but
>   that's no excuse to make it even longer.  The more that there is better
>   formatting right in context below.)
> 
>>           if ( user_para->gov_num )
> 
> ... this one are all dead code. Jason? I expect the has_num variable simply
> wants dropping altogether, thus correcting the earlier anomaly and getting
> the intended new behavior at the same time.

Hmmm.  The sysctl is executed twice - first to query the assorted *_num 
values and a second time to retrieve the results with sized arrays.

get_hwp_para() does not populate scaling_available_governors, so the 
intention was to be able to skip allocating the buffer for it.

     pmstat&xenpm: Re-arrage for cpufreq union

     Rearrange code now that xen_sysctl_pm_op's get_para fields has the
     nested union and struct.  In particular, the scaling governor
     information like scaling_available_governors is inside the union, so it
     is not always available.  Move those fields (op->u.get_para.u.s.u.*)
     together as well as the common fields (ones outside the union like
     op->u.get_para.turbo_enabled).

     With that, gov_num may be 0, so bounce buffer handling needs
     to be modified.

     scaling_governor and other fields inside op->u.get_para.u.s.u.* 
won't be
     used for hwp, so this will simplify the change when hwp support is
     introduced and re-indents these lines all together.

I noted that gov_num may be 0.  But that may have been before hwp had 
its own internal governor.  But, yes, the has_num handling looks wrong 
for gov_num == 0.  I don't have a machine with hwp to verify.


>> @@ -926,7 +926,8 @@ static int show_cpufreq_para_by_cpuid(xc_interface *xc_handle, int cpuid)
>>               ret = -ENOMEM;
>>               goto out;
>>           }
>> -        if (!(p_cpufreq->scaling_available_frequencies =
>> +        if (p_cpufreq->freq_num &&
>> +            !(p_cpufreq->scaling_available_frequencies =
>>                 malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
>>           {
>>               fprintf(stderr,
> 
> Can someone explain to me how the pre-existing logic here works? All
> three ->*_num start out as zero. Hence respective allocations (of zero
> size) may conceivably return NULL (the behavior there is implementation
> defined after all). Yet then we'd bail from the loop, and hence from the
> function. IOW adding a ->freq_num check and also a ->cpu_num one (along
> with the ->gov_num one that apparently was added during HWP development)
> would once again look like an independent (latent) bugfix to me.

I guess we rely on glibc providing non-NULL?  But also they are ignored 
for the initial query of *_num values.

Regards,
Jason