[RFC PATCH for-4.22 v2 3/3] xenpm: Add get-intel-temp subcommand

Teddy Astie posted 3 patches 3 months, 1 week ago
There is a newer version of this series
[RFC PATCH for-4.22 v2 3/3] xenpm: Add get-intel-temp subcommand
Posted by Teddy Astie 3 months, 1 week ago
get-intel-temp allows querying the per-core CPU temperature and
per-package one on Intel processors (as usual Dom0 drivers cannot
work due to misalignment between Dom0 vCPU and pCPUs).

Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
---
v2: moved from a separate command to xenpm

 tools/misc/xenpm.c | 93 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 92 insertions(+), 1 deletion(-)

diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 682d092479..ef9abee48e 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -37,6 +37,7 @@
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
+static xc_physinfo_t physinfo;
 
 /* help message */
 void show_help(void)
@@ -93,6 +94,7 @@ void show_help(void)
             "                                           units default to \"us\" if unspecified.\n"
             "                                           truncates un-representable values.\n"
             "                                           0 lets the hardware decide.\n"
+            " get-intel-temp        [cpuid]       get Intel CPU temperature of <cpuid> or all\n"
             " start [seconds]                     start collect Cx/Px statistics,\n"
             "                                     output after CTRL-C or SIGINT or several seconds.\n"
             " enable-turbo-mode     [cpuid]       enable Turbo Mode for processors that support it.\n"
@@ -1354,6 +1356,95 @@ void enable_turbo_mode(int argc, char *argv[])
                 errno, strerror(errno));
 }
 
+#define MSR_DTS_THERM_STATUS         0x0000019c
+#define MSR_DTS_TEMPERATURE_TARGET   0x000001a2
+#define MSR_DTS_PACKAGE_THERM_STATUS 0x000001b1
+
+static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
+{
+    xc_resource_entry_t entries[2] = {
+        (xc_resource_entry_t){
+            .idx = package ? MSR_DTS_PACKAGE_THERM_STATUS : MSR_DTS_THERM_STATUS
+        },
+        (xc_resource_entry_t){ .idx = MSR_DTS_TEMPERATURE_TARGET },
+    };
+    struct xc_resource_op ops = {
+        .cpu = cpu,
+        .entries = entries,
+        .nr_entries = 2,
+    };
+    int tjmax;
+
+    int ret = xc_resource_op(xch, 1, &ops);
+
+    if ( ret <= 0 )
+        /* This CPU isn't online or can't query this MSR */
+        return ret ?: -EOPNOTSUPP;
+
+    if ( ret == 2 )
+        tjmax = (entries[1].val >> 16) & 0xff;
+    else
+    {
+        /*
+         * The CPU doesn't support MSR_IA32_TEMPERATURE_TARGET, we assume it's 100 which
+         * is correct aside a few selected Atom CPUs. Check coretemp source code for more
+         * information.
+         */
+        fprintf(stderr, "[CPU%d] MSR_IA32_TEMPERATURE_TARGET is not supported, assume "
+                "tjmax=100°C, readings may be incorrect\n", cpu);
+        tjmax = 100;
+    }
+    
+    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
+    return 0;
+}
+
+
+void get_intel_temp(int argc, char *argv[])
+{
+    int temp, cpu = -1, socket;
+    bool has_data = false;
+
+    if (argc > 0)
+        parse_cpuid(argv[0], &cpu);
+
+    if (cpu != -1)
+    {
+        if ( !fetch_dts_temp(xc_handle, cpu, false, &temp) )
+            printf("CPU%d: %d°C\n", cpu, temp);
+        else
+            printf("No data\n");
+        return;
+    }
+
+    /* Per socket measurement */
+    for ( socket = 0, cpu = 0; cpu < max_cpu_nr;
+          socket++, cpu += physinfo.cores_per_socket * physinfo.threads_per_core )
+    {
+        if ( !fetch_dts_temp(xc_handle, cpu, true, &temp) )
+        {
+            has_data = true;
+            printf("Package%d: %d°C\n", socket, temp);
+        }
+    }
+
+    if ( has_data )
+        /* Avoid inserting a trailing line if we have nothing */
+        printf("\n");
+
+    for ( cpu = 0; cpu < max_cpu_nr; cpu += physinfo.threads_per_core )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
+            continue;
+
+        has_data = true;
+        printf("CPU%d: %d°C\n", cpu, temp);
+    }
+
+    if ( !has_data )
+        printf("No data\n");
+}
+
 void disable_turbo_mode(int argc, char *argv[])
 {
     int cpuid = -1;
@@ -1618,12 +1709,12 @@ struct {
     { "set-max-cstate", set_max_cstate_func},
     { "enable-turbo-mode", enable_turbo_mode },
     { "disable-turbo-mode", disable_turbo_mode },
+    { "get-intel-temp", get_intel_temp },
 };
 
 int main(int argc, char *argv[])
 {
     int i, ret = 0;
-    xc_physinfo_t physinfo;
     int nr_matches = 0;
     int matches_main_options[ARRAY_SIZE(main_options)];
 
-- 
2.51.2



--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [RFC PATCH for-4.22 v2 3/3] xenpm: Add get-intel-temp subcommand
Posted by Jan Beulich 3 months, 1 week ago
On 29.10.2025 16:59, Teddy Astie wrote:
> @@ -1354,6 +1356,95 @@ void enable_turbo_mode(int argc, char *argv[])
>                  errno, strerror(errno));
>  }
>  
> +#define MSR_DTS_THERM_STATUS         0x0000019c
> +#define MSR_DTS_TEMPERATURE_TARGET   0x000001a2
> +#define MSR_DTS_PACKAGE_THERM_STATUS 0x000001b1

DTS infix question again. Actually, can't we use the hypervisor's msr-index.h here?
We already use it from the emulator test harness.

> +static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
> +{
> +    xc_resource_entry_t entries[2] = {
> +        (xc_resource_entry_t){
> +            .idx = package ? MSR_DTS_PACKAGE_THERM_STATUS : MSR_DTS_THERM_STATUS
> +        },
> +        (xc_resource_entry_t){ .idx = MSR_DTS_TEMPERATURE_TARGET },
> +    };
> +    struct xc_resource_op ops = {
> +        .cpu = cpu,
> +        .entries = entries,
> +        .nr_entries = 2,
> +    };
> +    int tjmax;

Plain int? (Same for the last function parameter.)

> +    int ret = xc_resource_op(xch, 1, &ops);
> +
> +    if ( ret <= 0 )
> +        /* This CPU isn't online or can't query this MSR */
> +        return ret ?: -EOPNOTSUPP;
> +
> +    if ( ret == 2 )
> +        tjmax = (entries[1].val >> 16) & 0xff;
> +    else
> +    {
> +        /*
> +         * The CPU doesn't support MSR_IA32_TEMPERATURE_TARGET, we assume it's 100 which
> +         * is correct aside a few selected Atom CPUs. Check coretemp source code for more
> +         * information.
> +         */
> +        fprintf(stderr, "[CPU%d] MSR_IA32_TEMPERATURE_TARGET is not supported, assume "
> +                "tjmax=100°C, readings may be incorrect\n", cpu);

As per remarks elsewhere, I don't see why there is an IA32 infix here.

> +        tjmax = 100;
> +    }
> +    
> +    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
> +    return 0;
> +}
> +
> +
> +void get_intel_temp(int argc, char *argv[])
> +{
> +    int temp, cpu = -1, socket;

Plain int question again, for temp and socket.

> +    bool has_data = false;
> +
> +    if (argc > 0)

This and ...

> +        parse_cpuid(argv[0], &cpu);
> +
> +    if (cpu != -1)

... this if() don't fit the (hypervisor) style used elsewhere.

Jan

Re: [RFC PATCH for-4.22 v2 3/3] xenpm: Add get-intel-temp subcommand
Posted by Teddy Astie 3 months, 1 week ago
Le 30/10/2025 à 15:05, Jan Beulich a écrit :
> On 29.10.2025 16:59, Teddy Astie wrote:
>> @@ -1354,6 +1356,95 @@ void enable_turbo_mode(int argc, char *argv[])
>>                   errno, strerror(errno));
>>   }
>>   
>> +#define MSR_DTS_THERM_STATUS         0x0000019c
>> +#define MSR_DTS_TEMPERATURE_TARGET   0x000001a2
>> +#define MSR_DTS_PACKAGE_THERM_STATUS 0x000001b1
> 
> DTS infix question again. Actually, can't we use the hypervisor's msr-index.h here?
> We already use it from the emulator test harness.
> 

I wasn't sure whether tools could use msr-index.h or not. If we can, we 
also likely want to make some of the existing tools to rely on it 
instead of having them defining it in their files.

>> +static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
>> +{
>> +    xc_resource_entry_t entries[2] = {
>> +        (xc_resource_entry_t){
>> +            .idx = package ? MSR_DTS_PACKAGE_THERM_STATUS : MSR_DTS_THERM_STATUS
>> +        },
>> +        (xc_resource_entry_t){ .idx = MSR_DTS_TEMPERATURE_TARGET },
>> +    };
>> +    struct xc_resource_op ops = {
>> +        .cpu = cpu,
>> +        .entries = entries,
>> +        .nr_entries = 2,
>> +    };
>> +    int tjmax;
> 
> Plain int? (Same for the last function parameter.)
> 
>> +    int ret = xc_resource_op(xch, 1, &ops);
>> +
>> +    if ( ret <= 0 )
>> +        /* This CPU isn't online or can't query this MSR */
>> +        return ret ?: -EOPNOTSUPP;
>> +
>> +    if ( ret == 2 )
>> +        tjmax = (entries[1].val >> 16) & 0xff;
>> +    else
>> +    {
>> +        /*
>> +         * The CPU doesn't support MSR_IA32_TEMPERATURE_TARGET, we assume it's 100 which
>> +         * is correct aside a few selected Atom CPUs. Check coretemp source code for more
>> +         * information.
>> +         */
>> +        fprintf(stderr, "[CPU%d] MSR_IA32_TEMPERATURE_TARGET is not supported, assume "
>> +                "tjmax=100°C, readings may be incorrect\n", cpu);
> 
> As per remarks elsewhere, I don't see why there is an IA32 infix here.
> 
>> +        tjmax = 100;
>> +    }
>> +
>> +    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
>> +    return 0;
>> +}
>> +
>> +
>> +void get_intel_temp(int argc, char *argv[])
>> +{
>> +    int temp, cpu = -1, socket;
> 
> Plain int question again, for temp and socket.
> 

socket should be unsigned. But temp (as being CPU temperature) can 
actually be negative (even though it is going to be quite specific).
The use of int here is consistent with what Linux coretemp uses to store 
temperatures.

>> +    bool has_data = false;
>> +
>> +    if (argc > 0)
> 
> This and ...
> 
>> +        parse_cpuid(argv[0], &cpu);
>> +
>> +    if (cpu != -1)
> 
> ... this if() don't fit the (hypervisor) style used elsewhere.
> 

ok

> Jan
> 



--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech