From: Ingo Molnar <mingo@kernel.org>
Convert all uses of 'unsigned int' to 'u32' in <asm/cpuid/api.h>.
This is how a lot of the call sites are doing it, and the two
types are equivalent in the C sense - but 'u32' better expresses
that these are expressions of an immutable hardware ABI.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Ahmed S. Darwish <darwi@linutronix.de>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Ogness <john.ogness@linutronix.de>
Cc: x86-cpuid@lists.linux.dev
Link: https://lore.kernel.org/r/20250317164745.4754-3-darwi@linutronix.de
---
arch/x86/include/asm/cpuid/api.h | 40 ++++++++++++++++++++--------------------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
index f26926ba5289..356db1894588 100644
--- a/arch/x86/include/asm/cpuid/api.h
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -22,8 +22,8 @@ static inline bool have_cpuid_p(void)
}
#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void native_cpuid(u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
{
/* ecx is often an input as well as an output. */
asm volatile("cpuid"
@@ -36,9 +36,9 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
}
#define NATIVE_CPUID_REG(reg) \
-static inline unsigned int native_cpuid_##reg(unsigned int op) \
+static inline u32 native_cpuid_##reg(u32 op) \
{ \
- unsigned int eax = op, ebx, ecx = 0, edx; \
+ u32 eax = op, ebx, ecx = 0, edx; \
\
native_cpuid(&eax, &ebx, &ecx, &edx); \
\
@@ -65,9 +65,9 @@ NATIVE_CPUID_REG(edx)
* Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
* resulting in stale register contents being returned.
*/
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void cpuid(u32 op,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
{
*eax = op;
*ecx = 0;
@@ -75,9 +75,9 @@ static inline void cpuid(unsigned int op,
}
/* Some CPUID calls want 'count' to be placed in ECX */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
+static inline void cpuid_count(u32 op, int count,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
{
*eax = op;
*ecx = count;
@@ -88,43 +88,43 @@ static inline void cpuid_count(unsigned int op, int count,
* CPUID functions returning a single datum:
*/
-static inline unsigned int cpuid_eax(unsigned int op)
+static inline u32 cpuid_eax(u32 op)
{
- unsigned int eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return eax;
}
-static inline unsigned int cpuid_ebx(unsigned int op)
+static inline u32 cpuid_ebx(u32 op)
{
- unsigned int eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ebx;
}
-static inline unsigned int cpuid_ecx(unsigned int op)
+static inline u32 cpuid_ecx(u32 op)
{
- unsigned int eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return ecx;
}
-static inline unsigned int cpuid_edx(unsigned int op)
+static inline u32 cpuid_edx(u32 op)
{
- unsigned int eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
cpuid(op, &eax, &ebx, &ecx, &edx);
return edx;
}
-static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
{
regs[CPUID_EAX] = leaf;
regs[CPUID_ECX] = subleaf;
@@ -141,7 +141,7 @@ static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *re
__cpuid_read(leaf, 0, (u32 *)(regs)); \
}
-static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
enum cpuid_regs_idx regidx, u32 *reg)
{
u32 regs[4];
--
2.45.2
On March 17, 2025 3:30:38 PM PDT, mingo@kernel.org wrote:
>From: Ingo Molnar <mingo@kernel.org>
>
>Convert all uses of 'unsigned int' to 'u32' in <asm/cpuid/api.h>.
>
>This is how a lot of the call sites are doing it, and the two
>types are equivalent in the C sense - but 'u32' better expresses
>that these are expressions of an immutable hardware ABI.
>
>Signed-off-by: Ingo Molnar <mingo@kernel.org>
>Cc: Ahmed S. Darwish <darwi@linutronix.de>
>Cc: Andrew Cooper <andrew.cooper3@citrix.com>
>Cc: "H. Peter Anvin" <hpa@zytor.com>
>Cc: John Ogness <john.ogness@linutronix.de>
>Cc: x86-cpuid@lists.linux.dev
>Link: https://lore.kernel.org/r/20250317164745.4754-3-darwi@linutronix.de
>---
> arch/x86/include/asm/cpuid/api.h | 40 ++++++++++++++++++++--------------------
> 1 file changed, 20 insertions(+), 20 deletions(-)
>
>diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
>index f26926ba5289..356db1894588 100644
>--- a/arch/x86/include/asm/cpuid/api.h
>+++ b/arch/x86/include/asm/cpuid/api.h
>@@ -22,8 +22,8 @@ static inline bool have_cpuid_p(void)
> }
> #endif
>
>-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
>- unsigned int *ecx, unsigned int *edx)
>+static inline void native_cpuid(u32 *eax, u32 *ebx,
>+ u32 *ecx, u32 *edx)
> {
> /* ecx is often an input as well as an output. */
> asm volatile("cpuid"
>@@ -36,9 +36,9 @@ static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
> }
>
> #define NATIVE_CPUID_REG(reg) \
>-static inline unsigned int native_cpuid_##reg(unsigned int op) \
>+static inline u32 native_cpuid_##reg(u32 op) \
> { \
>- unsigned int eax = op, ebx, ecx = 0, edx; \
>+ u32 eax = op, ebx, ecx = 0, edx; \
> \
> native_cpuid(&eax, &ebx, &ecx, &edx); \
> \
>@@ -65,9 +65,9 @@ NATIVE_CPUID_REG(edx)
> * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
> * resulting in stale register contents being returned.
> */
>-static inline void cpuid(unsigned int op,
>- unsigned int *eax, unsigned int *ebx,
>- unsigned int *ecx, unsigned int *edx)
>+static inline void cpuid(u32 op,
>+ u32 *eax, u32 *ebx,
>+ u32 *ecx, u32 *edx)
> {
> *eax = op;
> *ecx = 0;
>@@ -75,9 +75,9 @@ static inline void cpuid(unsigned int op,
> }
>
> /* Some CPUID calls want 'count' to be placed in ECX */
>-static inline void cpuid_count(unsigned int op, int count,
>- unsigned int *eax, unsigned int *ebx,
>- unsigned int *ecx, unsigned int *edx)
>+static inline void cpuid_count(u32 op, int count,
>+ u32 *eax, u32 *ebx,
>+ u32 *ecx, u32 *edx)
> {
> *eax = op;
> *ecx = count;
>@@ -88,43 +88,43 @@ static inline void cpuid_count(unsigned int op, int count,
> * CPUID functions returning a single datum:
> */
>
>-static inline unsigned int cpuid_eax(unsigned int op)
>+static inline u32 cpuid_eax(u32 op)
> {
>- unsigned int eax, ebx, ecx, edx;
>+ u32 eax, ebx, ecx, edx;
>
> cpuid(op, &eax, &ebx, &ecx, &edx);
>
> return eax;
> }
>
>-static inline unsigned int cpuid_ebx(unsigned int op)
>+static inline u32 cpuid_ebx(u32 op)
> {
>- unsigned int eax, ebx, ecx, edx;
>+ u32 eax, ebx, ecx, edx;
>
> cpuid(op, &eax, &ebx, &ecx, &edx);
>
> return ebx;
> }
>
>-static inline unsigned int cpuid_ecx(unsigned int op)
>+static inline u32 cpuid_ecx(u32 op)
> {
>- unsigned int eax, ebx, ecx, edx;
>+ u32 eax, ebx, ecx, edx;
>
> cpuid(op, &eax, &ebx, &ecx, &edx);
>
> return ecx;
> }
>
>-static inline unsigned int cpuid_edx(unsigned int op)
>+static inline u32 cpuid_edx(u32 op)
> {
>- unsigned int eax, ebx, ecx, edx;
>+ u32 eax, ebx, ecx, edx;
>
> cpuid(op, &eax, &ebx, &ecx, &edx);
>
> return edx;
> }
>
>-static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
>+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
> {
> regs[CPUID_EAX] = leaf;
> regs[CPUID_ECX] = subleaf;
>@@ -141,7 +141,7 @@ static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *re
> __cpuid_read(leaf, 0, (u32 *)(regs)); \
> }
>
>-static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
>+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
> enum cpuid_regs_idx regidx, u32 *reg)
> {
> u32 regs[4];
So in addition to avoid the in/out pointer hack, I would like to point out that cpuid() is now *exactly* the same as cpuid_count with a count of 0 (which is probably a good idea anyway.)
One more thing is that we ought to be able to make cpuid a const function, allowing the compiler to elide multiple calls. (Slight warning for feature-enabling MSRs changing CPUID), but that would require changing the API to returning a structure, since a pure or const structure can't return values by reference.
On 3/18/25 11:48, H. Peter Anvin wrote: > > One more thing is that we ought to be able to make cpuid a const > function, allowing the compiler to elide multiple calls. (Slight warning > for feature-enabling MSRs changing CPUID), but that would require > changing the API to returning a structure, since a pure or const > structure can't return values by reference. > So I experimented (test included) and found that gcc 14.2 does not seem to merge the cpuid calls in this code, whereas clang 19.1 does. -hpa
On 18/03/2025 6:48 pm, H. Peter Anvin wrote: > One more thing is that we ought to be able to make cpuid a const function, allowing the compiler to elide multiple calls. (Slight warning for feature-enabling MSRs changing CPUID), but that would require changing the API to returning a structure, since a pure or const structure can't return values by reference. It's not only the feature-enabling MSRs. It's also OSXSAVE/OSPKE/etc in CR4, and on Intel CPUs, the CPUID instruction still has a side effect for microcode patch revision MSR. There are a few too many side effects to call it const/pure. That said, when experimenting with the same in Xen, there was nothing interesting the compiler could do with const/pure because of how the existing logic is laid out. Removing volatile and the memory clobber however did allow the compiler to make slightly better code. ~Andrew
On March 18, 2025 12:09:59 PM PDT, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >On 18/03/2025 6:48 pm, H. Peter Anvin wrote: >> One more thing is that we ought to be able to make cpuid a const function, allowing the compiler to elide multiple calls. (Slight warning for feature-enabling MSRs changing CPUID), but that would require changing the API to returning a structure, since a pure or const structure can't return values by reference. > >It's not only the feature-enabling MSRs. It's also OSXSAVE/OSPKE/etc in >CR4, and on Intel CPUs, the CPUID instruction still has a side effect >for microcode patch revision MSR. > >There are a few too many side effects to call it const/pure. > >That said, when experimenting with the same in Xen, there was nothing >interesting the compiler could do with const/pure because of how the >existing logic is laid out. Removing volatile and the memory clobber >however did allow the compiler to make slightly better code. > >~Andrew Well, I guess I lump CRs, DRs and MSRs together. There is also CPUID for serialization, which is really a totally different use for the same instruction. tglx has suggested that we should cache or even preload the cpuid data (the latter would have the potential advantage of making the memory data structures a little easier to manage, given the very large potential space.) The biggest issue is that there is no general mechanism for detecting which cpuid leaves have subleaves, and if they do, how many. I *believe* all existing subleaf sets are dense, but one could at least hypothetically see a vendor or VM define a CPUID leaf with a sparse subleaf set.
On Tue, 18 Mar 2025, H. Peter Anvin wrote:
>
> tglx has suggested that we should cache or even preload the cpuid data
> (the latter would have the potential advantage of making the memory
> data structures a little easier to manage, given the very large
> potential space.)
>
This is indeed in the patch queue that I plan to send after this leaf
cleanups one gets merged. We have a data model and CPUIDs are cached on
early boot. The cache is also refreshed during machine state changes
where the CPUIDs can change; e.g. a microcode update, PSN disable, etc.
Call sites then just do:
a = cpudata_cpuid(c, 0x0)->max_std_leaf;
b = cpudata_cpuid(c, 0x80000000)->max_ext_leaf;
struct leaf_0x1_0 *l1 = cpudata_cpuid(c, 0x1);
x = l0->cpu_vendorid_0;
y = l0->cpu_vendorid_1;
z = l0->cpu_vendorid_2;
and all the data is retrieved auto-magically from the cached tables.
The struct leaf_0xM_N C99 bitfield listings are auto generated by
x86-cpuid-db of course, just like in tools/arch/x86/kcpuid/cpuid.csv.
Thanks,
--
Ahmed S. Darwish
Linutronix GmbH
On 18/03/2025 7:25 pm, H. Peter Anvin wrote: > On March 18, 2025 12:09:59 PM PDT, Andrew Cooper <andrew.cooper3@citrix.com> wrote: >> On 18/03/2025 6:48 pm, H. Peter Anvin wrote: >>> One more thing is that we ought to be able to make cpuid a const function, allowing the compiler to elide multiple calls. (Slight warning for feature-enabling MSRs changing CPUID), but that would require changing the API to returning a structure, since a pure or const structure can't return values by reference. >> It's not only the feature-enabling MSRs. It's also OSXSAVE/OSPKE/etc in >> CR4, and on Intel CPUs, the CPUID instruction still has a side effect >> for microcode patch revision MSR. >> >> There are a few too many side effects to call it const/pure. >> >> That said, when experimenting with the same in Xen, there was nothing >> interesting the compiler could do with const/pure because of how the >> existing logic is laid out. Removing volatile and the memory clobber >> however did allow the compiler to make slightly better code. >> >> ~Andrew > Well, I guess I lump CRs, DRs and MSRs together. There is also CPUID for serialization, which is really a totally different use for the same instruction. Andy Luto got rid of all CPUID serialisation ages back. It's about the worst of the available options, even on native. It's IRET-to-self (doesn't exit under any virt), or SERIALISE on bleeding edge CPUs. > tglx has suggested that we should cache or even preload the cpuid data (the latter would have the potential advantage of making the memory data structures a little easier to manage, given the very large potential space.) > > The biggest issue is that there is no general mechanism for detecting which cpuid leaves have subleaves, and if they do, how many. I *believe* all existing subleaf sets are dense, but one could at least hypothetically see a vendor or VM define a CPUID leaf with a sparse subleaf set. XSTATE is sparse in general; AVX-512 being the notable absence on Intel's client line, and AMD has LWP at subleaf 62. But yes, these are all problems trying to maintain an in-memory copy of the CPUID state. Maintenance of this in Xen leaves a lot to be desired. ~Andrew
© 2016 - 2025 Red Hat, Inc.