From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].
Modify the SVM driver to check the capability. If detected, extend bitmask
for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
order to accommodate up to 4096 entries.
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
arch/x86/include/asm/svm.h | 4 ++++
arch/x86/kvm/svm/avic.c | 45 ++++++++++++++++++++++++++------------
2 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e2fac21471f5..4ff5b2f767e1 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
};
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_4K_MASK GENMASK_ULL(11, 0)
/*
* For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
@@ -277,11 +278,14 @@ enum avic_ipi_failure_cause {
/*
* For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * For extended x2AVIC, the max index allowed for physical APIC ID table is 0xfff (4095).
*/
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
+#define X2AVIC_MAX_PHYSICAL_ID_4K 0xFFFUL
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID_4K & AVIC_PHYSICAL_MAX_INDEX_4K_MASK) == X2AVIC_MAX_PHYSICAL_ID_4K);
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 65fd245a9953..1fb322d2ac18 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -38,9 +38,9 @@
* size of the GATag is defined by hardware (32 bits), but is an opaque value
* as far as hardware is concerned.
*/
-#define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_MASK
+#define AVIC_VCPU_ID_MASK AVIC_PHYSICAL_MAX_INDEX_4K_MASK
-#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_SHIFT HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_4K_MASK)
#define AVIC_VM_ID_MASK (GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
@@ -73,6 +73,9 @@ static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
bool x2avic_enabled;
+static bool x2avic_4k_vcpu_supported;
+static u64 x2avic_max_physical_id;
+static u64 avic_physical_max_index_mask;
/*
* This is a wrapper of struct amd_iommu_ir_data.
@@ -87,7 +90,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
- vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+ vmcb->control.avic_physical_id &= ~avic_physical_max_index_mask;
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
@@ -100,7 +103,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
*/
if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
- vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
+ vmcb->control.avic_physical_id |= x2avic_max_physical_id;
/* Disabling MSR intercept for x2APIC registers */
svm_set_x2apic_msr_interception(svm, false);
} else {
@@ -122,7 +125,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
struct vmcb *vmcb = svm->vmcb01.ptr;
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
- vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+ vmcb->control.avic_physical_id &= ~avic_physical_max_index_mask;
/*
* If running nested and the guest uses its own MSR bitmap, there
@@ -182,7 +185,8 @@ void avic_vm_destroy(struct kvm *kvm)
if (kvm_svm->avic_logical_id_table_page)
__free_page(kvm_svm->avic_logical_id_table_page);
if (kvm_svm->avic_physical_id_table_page)
- __free_page(kvm_svm->avic_physical_id_table_page);
+ __free_pages(kvm_svm->avic_physical_id_table_page,
+ get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
hash_del(&kvm_svm->hnode);
@@ -197,13 +201,15 @@ int avic_vm_init(struct kvm *kvm)
struct kvm_svm *k2;
struct page *p_page;
struct page *l_page;
- u32 vm_id;
+ u32 vm_id, entries;
if (!enable_apicv)
return 0;
- /* Allocating physical APIC ID table (4KB) */
- p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ /* Allocating physical APIC ID table */
+ entries = x2avic_max_physical_id + 1;
+ p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+ get_order(sizeof(u64) * entries));
if (!p_page)
goto free_avic;
@@ -266,7 +272,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) ||
- (index > X2AVIC_MAX_PHYSICAL_ID))
+ (index > x2avic_max_physical_id))
return NULL;
avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
@@ -281,7 +287,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
- (id > X2AVIC_MAX_PHYSICAL_ID))
+ (id > x2avic_max_physical_id))
return -EINVAL;
if (!vcpu->arch.apic->regs)
@@ -493,7 +499,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
u32 icrl = svm->vmcb->control.exit_info_1;
u32 id = svm->vmcb->control.exit_info_2 >> 32;
- u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
+ u32 index = svm->vmcb->control.exit_info_2 & avic_physical_max_index_mask;
struct kvm_lapic *apic = vcpu->arch.apic;
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
@@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
/* AVIC is a prerequisite for x2AVIC. */
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
- if (x2avic_enabled)
- pr_info("x2AVIC enabled\n");
+ if (x2avic_enabled) {
+ x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);
+ if (x2avic_4k_vcpu_supported) {
+ x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
+ avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
+ } else {
+ x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
+ avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
+ }
+
+ pr_info("x2AVIC enabled%s\n",
+ x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
+ }
amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
--
2.48.1
On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
> This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].
>
> Modify the SVM driver to check the capability. If detected, extend bitmask
> for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
> 4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
> order to accommodate up to 4096 entries.
Kinda silly, but please split this into (at least) two patches. One to introduce
the variables to replace the macros, and then one to actually implement support
for 4096 entries. That makes it a _lot_ easier to review each change (I'm having
trouble teasing out what's actually changing for 4k support).
The changelog also needs more info. Unless I'm misreading the diff, only the
physical table is being expanded? How does that work? (I might be able to
figure it out if I think hard, but I shouldn't have to think that hard).
> @@ -182,7 +185,8 @@ void avic_vm_destroy(struct kvm *kvm)
> if (kvm_svm->avic_logical_id_table_page)
> __free_page(kvm_svm->avic_logical_id_table_page);
> if (kvm_svm->avic_physical_id_table_page)
> - __free_page(kvm_svm->avic_physical_id_table_page);
> + __free_pages(kvm_svm->avic_physical_id_table_page,
> + get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
The order should be encapsulated in some way, e.g. through a global or a helper.
> @@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
>
> /* AVIC is a prerequisite for x2AVIC. */
> x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> - if (x2avic_enabled)
> - pr_info("x2AVIC enabled\n");
> + if (x2avic_enabled) {
> + x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);
No, add an X86_FEATURE_xxx for this, don't open code the CPUID lookup. I think
I'd even be tempted to use helpers instead of
> + if (x2avic_4k_vcpu_supported) {
> + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> + } else {
> + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> + }
> +
> + pr_info("x2AVIC enabled%s\n",
> + x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
Maybe print the max number of vCPUs that are supported? That way there is clear
signal when 4k *isn't* supported (and communicating the max number of vCPUs in
the !4k case would be helpful too).
> + }
>
> amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
>
> --
> 2.48.1
>
On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> >
> > Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
> > This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].
> >
> > Modify the SVM driver to check the capability. If detected, extend bitmask
> > for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
> > 4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
> > order to accommodate up to 4096 entries.
>
> Kinda silly, but please split this into (at least) two patches. One to introduce
> the variables to replace the macros, and then one to actually implement support
> for 4096 entries. That makes it a _lot_ easier to review each change (I'm having
> trouble teasing out what's actually changing for 4k support).
Sure, let me re-work the patches.
>
> The changelog also needs more info. Unless I'm misreading the diff, only the
> physical table is being expanded? How does that work? (I might be able to
> figure it out if I think hard, but I shouldn't have to think that hard).
Right - it is primarily just the physical ID table being expanded to
allow AVIC hardware to lookup APIC IDs for vCPUs beyond 511.
As an aside, updated APM covering 4k vCPU support (and other updates)
has now been published:
https://docs.amd.com/v/u/en-US/24593_3.43
> > @@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
> >
> > /* AVIC is a prerequisite for x2AVIC. */
> > x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> > - if (x2avic_enabled)
> > - pr_info("x2AVIC enabled\n");
> > + if (x2avic_enabled) {
> > + x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);
>
> No, add an X86_FEATURE_xxx for this, don't open code the CPUID lookup. I think
> I'd even be tempted to use helpers instead of
Ack.
>
> > + if (x2avic_4k_vcpu_supported) {
> > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > + } else {
> > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > + }
> > +
> > + pr_info("x2AVIC enabled%s\n",
> > + x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
>
> Maybe print the max number of vCPUs that are supported? That way there is clear
> signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> the !4k case would be helpful too).
I'm tempted to go the opposite way and not print that 4k vCPUs are
supported by x2AVIC. As it is, there are many reasons AVIC may be
inhibited and lack of 4k vCPU support is just one other reason, but only
for large VMs.
Most users shouldn't have to care: where possible, AVIC will be enabled
by default (once that patch series lands). Users who truly care about
AVIC will anyway need to confirm AVIC isn't inhibited since looking at
the kernel log won't be sufficient. Those users can very well use cpuid
to figure out if 4k vCPU support is present.
Thanks,
Naveen
On Fri, Jul 18, 2025, Naveen N Rao wrote:
> On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> > On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > > + if (x2avic_4k_vcpu_supported) {
> > > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > > + } else {
> > > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > > + }
> > > +
> > > + pr_info("x2AVIC enabled%s\n",
> > > + x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
> >
> > Maybe print the max number of vCPUs that are supported? That way there is clear
> > signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> > the !4k case would be helpful too).
>
> I'm tempted to go the opposite way and not print that 4k vCPUs are
> supported by x2AVIC. As it is, there are many reasons AVIC may be
> inhibited and lack of 4k vCPU support is just one other reason, but only
> for large VMs.
This isn't just about AVIC being inhibited though, it's about communicating
hardware support to the admin/user. While I usually advocate *against* using
printk to log information, I find SVM's pr_info()s about what is/isn't enabled
during module load to be extremely useful, e.g. as sanity checks. I (re)load
kvm-amd.ko on various hardware configurations on a regular basis, and more than
once the prints have helped me "remember" which platforms do/don't have SEV-ES,
AVIC, etc, and/or detect that I loaded kvm-amd.ko with the wrong overrides.
> Most users shouldn't have to care: where possible, AVIC will be enabled
> by default (once that patch series lands). Users who truly care about
> AVIC will anyway need to confirm AVIC isn't inhibited since looking at
> the kernel log won't be sufficient. Those users can very well use cpuid
> to figure out if 4k vCPU support is present.
If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
doing nothing. But since pr_info("x2AVIC enabled\n") already exists, and has
plently of free space for adding extra information, there's basically zero downside
to printing out the number of supported CPUs. And it's not just a binary yes/no,
e.g. I would wager most people couldn't state the number of vCPUs supported by
the "old" x2AVIC.
On Fri, Jul 18, 2025 at 07:24:15AM -0700, Sean Christopherson wrote:
> On Fri, Jul 18, 2025, Naveen N Rao wrote:
> > On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> > > On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > > > + if (x2avic_4k_vcpu_supported) {
> > > > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > > > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > > > + } else {
> > > > + x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > > > + avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > > > + }
> > > > +
> > > > + pr_info("x2AVIC enabled%s\n",
> > > > + x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
> > >
> > > Maybe print the max number of vCPUs that are supported? That way there is clear
> > > signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> > > the !4k case would be helpful too).
> >
> > I'm tempted to go the opposite way and not print that 4k vCPUs are
> > supported by x2AVIC. As it is, there are many reasons AVIC may be
> > inhibited and lack of 4k vCPU support is just one other reason, but only
> > for large VMs.
>
> This isn't just about AVIC being inhibited though, it's about communicating
> hardware support to the admin/user. While I usually advocate *against* using
> printk to log information, I find SVM's pr_info()s about what is/isn't enabled
> during module load to be extremely useful, e.g. as sanity checks. I (re)load
> kvm-amd.ko on various hardware configurations on a regular basis, and more than
> once the prints have helped me "remember" which platforms do/don't have SEV-ES,
> AVIC, etc, and/or detect that I loaded kvm-amd.ko with the wrong overrides.
Sure, if you are finding it helpful, that's fine.
>
> > Most users shouldn't have to care: where possible, AVIC will be enabled
> > by default (once that patch series lands). Users who truly care about
> > AVIC will anyway need to confirm AVIC isn't inhibited since looking at
> > the kernel log won't be sufficient. Those users can very well use cpuid
> > to figure out if 4k vCPU support is present.
>
> If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
> doing nothing. But since pr_info("x2AVIC enabled\n") already exists, and has
> plently of free space for adding extra information, there's basically zero downside
> to printing out the number of supported CPUs. And it's not just a binary yes/no,
> e.g. I would wager most people couldn't state the number of vCPUs supported by
> the "old" x2AVIC.
Ok, this is what I have now. Let me know if you prefer different
wording:
/* AVIC is a prerequisite for x2AVIC. */
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
- if (x2avic_enabled)
- pr_info("x2AVIC enabled\n");
+ if (x2avic_enabled) {
+ if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
+ x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;
+ pr_info("x2AVIC enabled (upto %lld vCPUs)\n", x2avic_max_physical_id + 1);
+ }
- Naveen
On Mon, Jul 21, 2025, Naveen N Rao wrote:
> On Fri, Jul 18, 2025 at 07:24:15AM -0700, Sean Christopherson wrote:
> > If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
> > doing nothing. But since pr_info("x2AVIC enabled\n") already exists, and has
> > plently of free space for adding extra information, there's basically zero downside
> > to printing out the number of supported CPUs. And it's not just a binary yes/no,
> > e.g. I would wager most people couldn't state the number of vCPUs supported by
> > the "old" x2AVIC.
>
> Ok, this is what I have now. Let me know if you prefer different
> wording:
>
> /* AVIC is a prerequisite for x2AVIC. */
> x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> - if (x2avic_enabled)
> - pr_info("x2AVIC enabled\n");
> + if (x2avic_enabled) {
> + if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
> + x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;
I actually like the approach you initially posted, where KVM explicitly sets
x2avic_max_physical_id for both paths. KVM could obviously rely on global
initialization to set X2AVIC_MAX_PHYSICAL_ID, but it's not like this code is
performance critical, and have all paths in one place makes it easy to understand
what the "default" value is.
if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
else
x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> + pr_info("x2AVIC enabled (upto %lld vCPUs)\n", x2avic_max_physical_id + 1);
Maybe s/upto/max?
> + }
>
>
> - Naveen
© 2016 - 2025 Red Hat, Inc.