[PATCH v10] xenpm: Add get-core-temp subcommand

Teddy Astie posted 1 patch 5 days, 6 hours ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/22bcde2914c0303b2c594485542af19d2952e782.1775570823.git.teddy.astie@vates.tech
CHANGELOG.md       |   2 +
tools/misc/xenpm.c | 121 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 122 insertions(+), 1 deletion(-)
[PATCH v10] xenpm: Add get-core-temp subcommand
Posted by Teddy Astie 5 days, 6 hours ago
get-core-temp allows querying the per-core CPU temperature and
per-package one on processors that supports Digital Temperature Sensors
(most Intel processors; as usual Dom0 drivers cannot work due to
misalignment between Dom0 vCPU and pCPUs).

Signed-off-by: Teddy Astie <teddy.astie@vates.tech>
---
v4: https://lore.kernel.org/xen-devel/cover.1766158766.git.teddy.astie@vates.tech/
v5: Removed trailing whitespace.
v6: Report errors through errno and use strerror() to display them
v7:
 - Rename get-intel-temp with get-dts-temp
 - handle properly errno
 - make process return a error code if no data
v8:
 - update Changelog
 - improve error handling
 - rename core-dts-temp with get-core-temp
v9:
 - exit with EXIT_{SUCCESS,FAILURE} instead of errno
v10:
 - make error handling more uniform
 - removed "No data" message
 - stop querying package temperature on failure

 CHANGELOG.md       |   2 +
 tools/misc/xenpm.c | 121 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 122 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 166df43c63..c8cb125fae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
      mitigate (by rate-limiting) the system wide impact of an HVM guest
      misusing atomic instructions.
    - Support for CPIO microcode in discrete multiboot modules.
+   - Introduce get-core-temp to xenpm to query CPU temperatures on Intel
+     platforms.
 
  - On Arm:
    - Support for guest suspend and resume to/from RAM via vPSCI.
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index e4902d2e82..3371bb5be7 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -32,11 +32,14 @@
 
 #include <xen-tools/common-macros.h>
 
+#include <xen/asm/msr-index.h>
+
 #define MAX_PKG_RESIDENCIES 12
 #define MAX_CORE_RESIDENCIES 8
 
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
+static xc_physinfo_t physinfo;
 
 /* help message */
 void show_help(void)
@@ -93,6 +96,7 @@ void show_help(void)
             "                                           units default to \"us\" if unspecified.\n"
             "                                           truncates un-representable values.\n"
             "                                           0 lets the hardware decide.\n"
+            " get-core-temp          [cpuid]      get CPU temperature for <cpuid> or all (Intel only)\n"
             " start [seconds]                     start collect Cx/Px statistics,\n"
             "                                     output after CTRL-C or SIGINT or several seconds.\n"
             " enable-turbo-mode     [cpuid]       enable Turbo Mode for processors that support it.\n"
@@ -1354,6 +1358,121 @@ void enable_turbo_mode(int argc, char *argv[])
                 errno, strerror(errno));
 }
 
+static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
+{
+    xc_resource_entry_t entries[] = {
+        { .idx = package ? MSR_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS },
+        { .idx = MSR_TEMPERATURE_TARGET },
+    };
+    struct xc_resource_op ops = {
+        .cpu = cpu,
+        .entries = entries,
+        .nr_entries = ARRAY_SIZE(entries),
+    };
+    int tjmax;
+
+    int ret = xc_resource_op(xch, 1, &ops);
+
+    switch ( ret )
+    {
+    case 0:
+        /* This CPU isn't online or can't query this MSR */
+        errno = ENODATA;
+        return -1;
+
+    case 1:
+    {
+        /*
+         * The CPU doesn't support MSR_TEMPERATURE_TARGET, we assume it's 100
+         * which is correct aside a few selected Atom CPUs. Check Linux
+         * kernel's coretemp.c for more information.
+         */
+        static bool has_reported_once = false;
+
+        if ( !has_reported_once )
+        {
+            fprintf(stderr, "MSR_TEMPERATURE_TARGET is not supported, assume "
+                            "tjmax = 100, readings may be incorrect.\n");
+            has_reported_once = true;
+        }
+
+        tjmax = 100;
+        break;
+    }
+
+    case 2:
+        tjmax = (entries[1].val >> 16) & 0xff;
+        break;
+
+    default:
+        if ( ret > 0 )
+        {
+            fprintf(stderr, "Got unexpected xc_resource_op return value: %d", ret);
+            errno = EINVAL;
+        }
+        return -1;
+    }
+
+    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
+    return 0;
+}
+
+static void get_core_temp(int argc, char *argv[])
+{
+    int temp = -1, cpu = -1;
+    unsigned int socket;
+    bool has_data = false;
+
+    if ( argc > 0 )
+        parse_cpuid(argv[0], &cpu);
+
+    if ( cpu != -1 )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
+        {
+            fprintf(stderr, "Unable to fetch temperature (%d - %s)\n",
+                    errno, strerror(errno));
+            exit(EXIT_FAILURE);
+        }
+        else
+            printf("CPU%d: %d°C\n", cpu, temp);
+        return;
+    }
+
+    /* Per socket measurement */
+    for ( socket = 0, cpu = 0; cpu < max_cpu_nr;
+          socket++, cpu += physinfo.cores_per_socket * physinfo.threads_per_core )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, true, &temp) )
+        {
+            fprintf(stderr,
+                    "[Package%u] Unable to fetch temperature (%d - %s)\n",
+                    cpu, errno, strerror(errno));
+            /* CPU may not support package temperatures, but still support DTS */
+            break;
+        }
+
+        has_data = true;
+        printf("Package%u: %d°C\n", socket, temp);
+    }
+
+    for ( cpu = 0; cpu < max_cpu_nr; cpu += physinfo.threads_per_core )
+    {
+        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
+        {
+            fprintf(stderr, "[CPU%d] Unable to fetch temperature (%d - %s)\n",
+                    cpu, errno, strerror(errno));
+            continue;
+        }
+        
+        has_data = true;
+        printf("CPU%d: %d°C\n", cpu, temp);
+    }
+
+    if ( !has_data )
+        exit(EXIT_FAILURE);
+}
+
 void disable_turbo_mode(int argc, char *argv[])
 {
     int cpuid = -1;
@@ -1618,12 +1737,12 @@ struct {
     { "set-max-cstate", set_max_cstate_func},
     { "enable-turbo-mode", enable_turbo_mode },
     { "disable-turbo-mode", disable_turbo_mode },
+    { "get-core-temp", get_core_temp },
 };
 
 int main(int argc, char *argv[])
 {
     int i, ret = 0;
-    xc_physinfo_t physinfo;
     int nr_matches = 0;
     int matches_main_options[ARRAY_SIZE(main_options)];
 
-- 
2.52.0



--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech
Re: [PATCH v10] xenpm: Add get-core-temp subcommand
Posted by Jan Beulich 4 days, 8 hours ago
On 07.04.2026 16:10, Teddy Astie wrote:
> --- a/CHANGELOG.md
> +++ b/CHANGELOG.md
> @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
>       mitigate (by rate-limiting) the system wide impact of an HVM guest
>       misusing atomic instructions.
>     - Support for CPIO microcode in discrete multiboot modules.
> +   - Introduce get-core-temp to xenpm to query CPU temperatures on Intel
> +     platforms.

Would you mind inserting "command" or "option" before "to xenpm"?

> @@ -1354,6 +1358,121 @@ void enable_turbo_mode(int argc, char *argv[])
>                  errno, strerror(errno));
>  }
>  
> +static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
> +{
> +    xc_resource_entry_t entries[] = {
> +        { .idx = package ? MSR_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS },
> +        { .idx = MSR_TEMPERATURE_TARGET },
> +    };
> +    struct xc_resource_op ops = {
> +        .cpu = cpu,
> +        .entries = entries,
> +        .nr_entries = ARRAY_SIZE(entries),
> +    };
> +    int tjmax;
> +
> +    int ret = xc_resource_op(xch, 1, &ops);
> +
> +    switch ( ret )
> +    {
> +    case 0:
> +        /* This CPU isn't online or can't query this MSR */
> +        errno = ENODATA;
> +        return -1;
> +
> +    case 1:
> +    {
> +        /*
> +         * The CPU doesn't support MSR_TEMPERATURE_TARGET, we assume it's 100
> +         * which is correct aside a few selected Atom CPUs. Check Linux
> +         * kernel's coretemp.c for more information.
> +         */
> +        static bool has_reported_once = false;
> +
> +        if ( !has_reported_once )
> +        {
> +            fprintf(stderr, "MSR_TEMPERATURE_TARGET is not supported, assume "
> +                            "tjmax = 100, readings may be incorrect.\n");
> +            has_reported_once = true;
> +        }
> +
> +        tjmax = 100;
> +        break;
> +    }
> +
> +    case 2:
> +        tjmax = (entries[1].val >> 16) & 0xff;
> +        break;
> +
> +    default:
> +        if ( ret > 0 )
> +        {
> +            fprintf(stderr, "Got unexpected xc_resource_op return value: %d", ret);
> +            errno = EINVAL;
> +        }
> +        return -1;
> +    }
> +
> +    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
> +    return 0;
> +}
> +
> +static void get_core_temp(int argc, char *argv[])
> +{
> +    int temp = -1, cpu = -1;

cpu's initializer is needed, but why would temp need one? You rely on ...

> +    unsigned int socket;
> +    bool has_data = false;
> +
> +    if ( argc > 0 )
> +        parse_cpuid(argv[0], &cpu);
> +
> +    if ( cpu != -1 )
> +    {
> +        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
> +        {
> +            fprintf(stderr, "Unable to fetch temperature (%d - %s)\n",
> +                    errno, strerror(errno));
> +            exit(EXIT_FAILURE);
> +        }
> +        else
> +            printf("CPU%d: %d°C\n", cpu, temp);
> +        return;
> +    }
> +
> +    /* Per socket measurement */
> +    for ( socket = 0, cpu = 0; cpu < max_cpu_nr;
> +          socket++, cpu += physinfo.cores_per_socket * physinfo.threads_per_core )
> +    {
> +        if ( fetch_dts_temp(xc_handle, cpu, true, &temp) )

... fetch_dts_temp() to always update it in the success case anyway, both here and
in the other loop further down.

Other than this (happy to adjust while committing, provided you agree):
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan

Re: [PATCH v10] xenpm: Add get-core-temp subcommand
Posted by Teddy Astie 4 days, 7 hours ago
Le 08/04/2026 à 14:36, Jan Beulich a écrit :
> On 07.04.2026 16:10, Teddy Astie wrote:
>> --- a/CHANGELOG.md
>> +++ b/CHANGELOG.md
>> @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
>>        mitigate (by rate-limiting) the system wide impact of an HVM guest
>>        misusing atomic instructions.
>>      - Support for CPIO microcode in discrete multiboot modules.
>> +   - Introduce get-core-temp to xenpm to query CPU temperatures on Intel
>> +     platforms.
> 
> Would you mind inserting "command" or "option" before "to xenpm"?
> 

Some like
 > Introduce get-core-temp option to xenpm command ... ?

(or something like that)

I don't have any issue with rewording it.

>> @@ -1354,6 +1358,121 @@ void enable_turbo_mode(int argc, char *argv[])
>>                   errno, strerror(errno));
>>   }
>>   
>> +static int fetch_dts_temp(xc_interface *xch, uint32_t cpu, bool package, int *temp)
>> +{
>> +    xc_resource_entry_t entries[] = {
>> +        { .idx = package ? MSR_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS },
>> +        { .idx = MSR_TEMPERATURE_TARGET },
>> +    };
>> +    struct xc_resource_op ops = {
>> +        .cpu = cpu,
>> +        .entries = entries,
>> +        .nr_entries = ARRAY_SIZE(entries),
>> +    };
>> +    int tjmax;
>> +
>> +    int ret = xc_resource_op(xch, 1, &ops);
>> +
>> +    switch ( ret )
>> +    {
>> +    case 0:
>> +        /* This CPU isn't online or can't query this MSR */
>> +        errno = ENODATA;
>> +        return -1;
>> +
>> +    case 1:
>> +    {
>> +        /*
>> +         * The CPU doesn't support MSR_TEMPERATURE_TARGET, we assume it's 100
>> +         * which is correct aside a few selected Atom CPUs. Check Linux
>> +         * kernel's coretemp.c for more information.
>> +         */
>> +        static bool has_reported_once = false;
>> +
>> +        if ( !has_reported_once )
>> +        {
>> +            fprintf(stderr, "MSR_TEMPERATURE_TARGET is not supported, assume "
>> +                            "tjmax = 100, readings may be incorrect.\n");
>> +            has_reported_once = true;
>> +        }
>> +
>> +        tjmax = 100;
>> +        break;
>> +    }
>> +
>> +    case 2:
>> +        tjmax = (entries[1].val >> 16) & 0xff;
>> +        break;
>> +
>> +    default:
>> +        if ( ret > 0 )
>> +        {
>> +            fprintf(stderr, "Got unexpected xc_resource_op return value: %d", ret);
>> +            errno = EINVAL;
>> +        }
>> +        return -1;
>> +    }
>> +
>> +    *temp = tjmax - ((entries[0].val >> 16) & 0xff);
>> +    return 0;
>> +}
>> +
>> +static void get_core_temp(int argc, char *argv[])
>> +{
>> +    int temp = -1, cpu = -1;
> 
> cpu's initializer is needed, but why would temp need one? You rely on ...
> 
>> +    unsigned int socket;
>> +    bool has_data = false;
>> +
>> +    if ( argc > 0 )
>> +        parse_cpuid(argv[0], &cpu);
>> +
>> +    if ( cpu != -1 )
>> +    {
>> +        if ( fetch_dts_temp(xc_handle, cpu, false, &temp) )
>> +        {
>> +            fprintf(stderr, "Unable to fetch temperature (%d - %s)\n",
>> +                    errno, strerror(errno));
>> +            exit(EXIT_FAILURE);
>> +        }
>> +        else
>> +            printf("CPU%d: %d°C\n", cpu, temp);
>> +        return;
>> +    }
>> +
>> +    /* Per socket measurement */
>> +    for ( socket = 0, cpu = 0; cpu < max_cpu_nr;
>> +          socket++, cpu += physinfo.cores_per_socket * physinfo.threads_per_core )
>> +    {
>> +        if ( fetch_dts_temp(xc_handle, cpu, true, &temp) )
> 
> ... fetch_dts_temp() to always update it in the success case anyway, both here and
> in the other loop further down.
> 

Indeed, that's not really required anymore.

> Other than this (happy to adjust while committing, provided you agree):
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 

Looks good to me with the changes.

> Jan
> 

Teddy


--
Teddy Astie | Vates XCP-ng Developer

XCP-ng & Xen Orchestra - Vates solutions

web: https://vates.tech