From: Dapeng Mi <dapeng1.mi@linux.intel.com>
If IA32_PERF_CAPABILITIES.FW_WRITE (bit 13) is set, each general-
purpose counter IA32_PMCi (starting at 0xc1) is accompanied by a
corresponding alias MSR starting at 0x4c1 (IA32_A_PMC0), which are
64-bit wide.
The legacy IA32_PMCi MSRs are not full-width and their effective width
is determined by CPUID.0AH:EAX[23:16].
Since these two sets of MSRs are aliases, when IA32_A_PMCi is supported
it is safe to use it for save/restore instead of the legacy MSRs,
regardless of whether the hypervisor uses the legacy or the 64-bit
counterpart.
Full-width write is a user-visible feature and can be disabled
individually.
Reduce MAX_GP_COUNTERS from 18 to 15 to avoid conflicts between the
full-width MSR range and MSR_MCG_EXT_CTL. Current CPUs support at most
10 general-purpose counters, so 15 is sufficient for now and leaves room
for future expansion.
Bump minimum_version_id to avoid migration from older QEMU, as this may
otherwise cause VMState overflow. This also requires bumping version_id,
which prevents migration to older QEMU as well.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Zide Chen <zide.chen@intel.com>
---
target/i386/cpu.h | 5 ++++-
target/i386/kvm/kvm.c | 19 +++++++++++++++++--
target/i386/machine.c | 4 ++--
3 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 0b480c631ed0..e7cf4a7bd594 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -421,6 +421,7 @@ typedef enum X86Seg {
#define MSR_IA32_PERF_CAPABILITIES 0x345
#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_FULL_WRITE (1U << 13)
#define MSR_IA32_TSX_CTRL 0x122
#define MSR_IA32_TSCDEADLINE 0x6e0
@@ -448,6 +449,8 @@ typedef enum X86Seg {
#define MSR_IA32_SGXLEPUBKEYHASH3 0x8f
#define MSR_P6_PERFCTR0 0xc1
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x4c1
#define MSR_IA32_SMBASE 0x9e
#define MSR_SMI_COUNT 0x34
@@ -1740,7 +1743,7 @@ typedef struct {
#endif
#define MAX_FIXED_COUNTERS 3
-#define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
+#define MAX_GP_COUNTERS 15
#define NB_OPMASK_REGS 8
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e81fa46ed66c..530f50e4b218 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -4049,6 +4049,12 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
}
if (has_architectural_pmu_version > 0) {
+ uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
+
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
+ perf_cntr_base = MSR_IA32_PMC0;
+ }
+
if (has_architectural_pmu_version > 1) {
/* Stop the counter. */
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
@@ -4061,7 +4067,7 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
env->msr_fixed_counters[i]);
}
for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
- kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
+ kvm_msr_entry_add(cpu, perf_cntr_base + i,
env->msr_gp_counters[i]);
kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
env->msr_gp_evtsel[i]);
@@ -4582,6 +4588,12 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1);
}
if (has_architectural_pmu_version > 0) {
+ uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
+
+ if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
+ perf_cntr_base = MSR_IA32_PMC0;
+ }
+
if (has_architectural_pmu_version > 1) {
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -4591,7 +4603,7 @@ static int kvm_get_msrs(X86CPU *cpu)
kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
}
for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
- kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
+ kvm_msr_entry_add(cpu, perf_cntr_base + i, 0);
kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
}
}
@@ -4920,6 +4932,9 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
break;
+ case MSR_IA32_PMC0 ... MSR_IA32_PMC0 + MAX_GP_COUNTERS - 1:
+ env->msr_gp_counters[index - MSR_IA32_PMC0] = msrs[i].data;
+ break;
case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
break;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 1125c8a64ec5..7d08a05835fc 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -685,8 +685,8 @@ static bool pmu_enable_needed(void *opaque)
static const VMStateDescription vmstate_msr_architectural_pmu = {
.name = "cpu/msr_architectural_pmu",
- .version_id = 1,
- .minimum_version_id = 1,
+ .version_id = 2,
+ .minimum_version_id = 2,
.needed = pmu_enable_needed,
.fields = (const VMStateField[]) {
VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
--
2.52.0
On 1/17/2026 9:10 AM, Zide Chen wrote:
> From: Dapeng Mi <dapeng1.mi@linux.intel.com>
>
> If IA32_PERF_CAPABILITIES.FW_WRITE (bit 13) is set, each general-
> purpose counter IA32_PMCi (starting at 0xc1) is accompanied by a
> corresponding alias MSR starting at 0x4c1 (IA32_A_PMC0), which are
> 64-bit wide.
>
> The legacy IA32_PMCi MSRs are not full-width and their effective width
> is determined by CPUID.0AH:EAX[23:16].
>
> Since these two sets of MSRs are aliases, when IA32_A_PMCi is supported
> it is safe to use it for save/restore instead of the legacy MSRs,
> regardless of whether the hypervisor uses the legacy or the 64-bit
> counterpart.
>
> Full-width write is a user-visible feature and can be disabled
> individually.
>
> Reduce MAX_GP_COUNTERS from 18 to 15 to avoid conflicts between the
> full-width MSR range and MSR_MCG_EXT_CTL. Current CPUs support at most
> 10 general-purpose counters, so 15 is sufficient for now and leaves room
> for future expansion.
>
> Bump minimum_version_id to avoid migration from older QEMU, as this may
> otherwise cause VMState overflow. This also requires bumping version_id,
> which prevents migration to older QEMU as well.
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> Signed-off-by: Zide Chen <zide.chen@intel.com>
> ---
> target/i386/cpu.h | 5 ++++-
> target/i386/kvm/kvm.c | 19 +++++++++++++++++--
> target/i386/machine.c | 4 ++--
> 3 files changed, 23 insertions(+), 5 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 0b480c631ed0..e7cf4a7bd594 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -421,6 +421,7 @@ typedef enum X86Seg {
>
> #define MSR_IA32_PERF_CAPABILITIES 0x345
> #define PERF_CAP_LBR_FMT 0x3f
> +#define PERF_CAP_FULL_WRITE (1U << 13)
>
> #define MSR_IA32_TSX_CTRL 0x122
> #define MSR_IA32_TSCDEADLINE 0x6e0
> @@ -448,6 +449,8 @@ typedef enum X86Seg {
> #define MSR_IA32_SGXLEPUBKEYHASH3 0x8f
>
> #define MSR_P6_PERFCTR0 0xc1
> +/* Alternative perfctr range with full access. */
> +#define MSR_IA32_PMC0 0x4c1
>
> #define MSR_IA32_SMBASE 0x9e
> #define MSR_SMI_COUNT 0x34
> @@ -1740,7 +1743,7 @@ typedef struct {
> #endif
>
> #define MAX_FIXED_COUNTERS 3
> -#define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0)
> +#define MAX_GP_COUNTERS 15
I suppose this is good enough for AMD CPUs, but need AMD guys to double
confirm. Thanks.
>
> #define NB_OPMASK_REGS 8
>
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index e81fa46ed66c..530f50e4b218 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -4049,6 +4049,12 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
> }
>
> if (has_architectural_pmu_version > 0) {
> + uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
> +
> + if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
> + perf_cntr_base = MSR_IA32_PMC0;
> + }
> +
> if (has_architectural_pmu_version > 1) {
> /* Stop the counter. */
> kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
> @@ -4061,7 +4067,7 @@ static int kvm_put_msrs(X86CPU *cpu, KvmPutState level)
> env->msr_fixed_counters[i]);
> }
> for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
> - kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i,
> + kvm_msr_entry_add(cpu, perf_cntr_base + i,
> env->msr_gp_counters[i]);
> kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i,
> env->msr_gp_evtsel[i]);
> @@ -4582,6 +4588,12 @@ static int kvm_get_msrs(X86CPU *cpu)
> kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1);
> }
> if (has_architectural_pmu_version > 0) {
> + uint32_t perf_cntr_base = MSR_P6_PERFCTR0;
> +
> + if (env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_FULL_WRITE) {
> + perf_cntr_base = MSR_IA32_PMC0;
> + }
> +
> if (has_architectural_pmu_version > 1) {
> kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
> kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0);
> @@ -4591,7 +4603,7 @@ static int kvm_get_msrs(X86CPU *cpu)
> kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0);
> }
> for (i = 0; i < num_architectural_pmu_gp_counters; i++) {
> - kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0);
> + kvm_msr_entry_add(cpu, perf_cntr_base + i, 0);
> kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0);
> }
> }
> @@ -4920,6 +4932,9 @@ static int kvm_get_msrs(X86CPU *cpu)
> case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1:
> env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data;
> break;
> + case MSR_IA32_PMC0 ... MSR_IA32_PMC0 + MAX_GP_COUNTERS - 1:
> + env->msr_gp_counters[index - MSR_IA32_PMC0] = msrs[i].data;
> + break;
> case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1:
> env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data;
> break;
> diff --git a/target/i386/machine.c b/target/i386/machine.c
> index 1125c8a64ec5..7d08a05835fc 100644
> --- a/target/i386/machine.c
> +++ b/target/i386/machine.c
> @@ -685,8 +685,8 @@ static bool pmu_enable_needed(void *opaque)
>
> static const VMStateDescription vmstate_msr_architectural_pmu = {
> .name = "cpu/msr_architectural_pmu",
> - .version_id = 1,
> - .minimum_version_id = 1,
> + .version_id = 2,
> + .minimum_version_id = 2,
> .needed = pmu_enable_needed,
> .fields = (const VMStateField[]) {
> VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU),
© 2016 - 2026 Red Hat, Inc.