Move the setup to vPMU code, doing the allocation of a vector only when
one is actually going to be needed. With that the handler function also
doesn't need to be split across two places anymore.
Add the freed up vector to the dynamically allocatable range.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
In case down the road we also want to have a build mode with vPMU code
excluded, this may also simplify things a little there.
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1313,16 +1313,6 @@ static void cf_check error_interrupt(voi
entries[3], entries[2], entries[1], entries[0]);
}
-/*
- * This interrupt handles performance counters interrupt
- */
-
-static void cf_check pmu_interrupt(void)
-{
- ack_APIC_irq();
- vpmu_do_interrupt();
-}
-
void __init apic_intr_init(void)
{
smp_intr_init();
@@ -1333,9 +1323,6 @@ void __init apic_intr_init(void)
/* IPI vectors for APIC spurious and error interrupts */
set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
-
- /* Performance Counters Interrupt */
- set_direct_apic_vector(PMU_APIC_VECTOR, pmu_interrupt);
}
/*
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -40,6 +40,8 @@ static struct arch_vpmu_ops __initdata v
static DEFINE_SPINLOCK(vpmu_lock);
static unsigned vpmu_count;
+static uint8_t __ro_after_init pmu_apic_vector;
+
static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
static int __init cf_check parse_vpmu_params(const char *s)
@@ -94,7 +96,7 @@ void vpmu_lvtpc_update(uint32_t val)
vpmu = vcpu_vpmu(curr);
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
+ vpmu->hw_lapic_lvtpc = pmu_apic_vector | (val & APIC_LVT_MASKED);
/* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
if ( has_vlapic(curr->domain) || !vpmu->xenpmu_data ||
@@ -160,7 +162,7 @@ static inline struct vcpu *choose_hwdom_
return hardware_domain->vcpu[idx];
}
-void vpmu_do_interrupt(void)
+static void cf_check vpmu_interrupt(void)
{
struct vcpu *sampled = current, *sampling;
struct vpmu_struct *vpmu;
@@ -169,6 +171,8 @@ void vpmu_do_interrupt(void)
uint32_t vlapic_lvtpc;
#endif
+ ack_APIC_irq();
+
/*
* dom0 will handle interrupt for special domains (e.g. idle domain) or,
* in XENPMU_MODE_ALL, for everyone.
@@ -369,7 +373,7 @@ void vpmu_save(struct vcpu *v)
vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
- apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, pmu_apic_vector | APIC_LVT_MASKED);
}
int vpmu_load(struct vcpu *v, bool from_guest)
@@ -432,7 +436,7 @@ static int vpmu_arch_initialise(struct v
return ret;
}
- vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
+ vpmu->hw_lapic_lvtpc = pmu_apic_vector | APIC_LVT_MASKED;
vpmu_set(vpmu, VPMU_INITIALIZED);
return 0;
@@ -860,6 +864,8 @@ static int __init cf_check vpmu_init(voi
register_cpu_notifier(&cpu_nfb);
printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
__stringify(XENPMU_VER_MIN) "\n");
+
+ alloc_direct_apic_vector(&pmu_apic_vector, vpmu_interrupt);
}
else
{
--- a/xen/arch/x86/include/asm/irq-vectors.h
+++ b/xen/arch/x86/include/asm/irq-vectors.h
@@ -8,13 +8,12 @@
#define EVENT_CHECK_VECTOR 0xfc
#define CALL_FUNCTION_VECTOR 0xfb
#define LOCAL_TIMER_VECTOR 0xfa
-#define PMU_APIC_VECTOR 0xf9
/*
* High-priority dynamically-allocated vectors. For interrupts that
* must be higher priority than any guest-bound interrupt.
*/
#define FIRST_HIPRIORITY_VECTOR 0xf1
-#define LAST_HIPRIORITY_VECTOR 0xf8
+#define LAST_HIPRIORITY_VECTOR 0xf9
/* IRQ0 (timer) is statically allocated but must be high priority. */
#define IRQ0_VECTOR 0xf0
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -99,7 +99,6 @@ static inline bool vpmu_are_all_set(cons
void vpmu_lvtpc_update(uint32_t val);
int vpmu_do_msr(unsigned int msr, uint64_t *msr_content, bool is_write);
-void vpmu_do_interrupt(void);
void vpmu_initialise(struct vcpu *v);
void vpmu_destroy(struct vcpu *v);
void vpmu_save(struct vcpu *v);
On 19/11/2025 10:51 am, Jan Beulich wrote:
> Move the setup to vPMU code, doing the allocation of a vector only when
> one is actually going to be needed. With that the handler function also
> doesn't need to be split across two places anymore.
>
> Add the freed up vector to the dynamically allocatable range.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
> ---
> In case down the road we also want to have a build mode with vPMU code
> excluded, this may also simplify things a little there.
>
> --- a/xen/arch/x86/apic.c
> +++ b/xen/arch/x86/apic.c
> @@ -1313,16 +1313,6 @@ static void cf_check error_interrupt(voi
> entries[3], entries[2], entries[1], entries[0]);
> }
>
> -/*
> - * This interrupt handles performance counters interrupt
> - */
> -
> -static void cf_check pmu_interrupt(void)
> -{
> - ack_APIC_irq();
> - vpmu_do_interrupt();
> -}
> -
I know you're only moving this, but it's likely-buggy before and after.
ack_APIC_irq() needs to be last, and Xen's habit for acking early is why
we have reentrancy problems.
I think there wants to be a patch ahead of this one swapping the order
so the ack is at the end, so that this patch can retain that property
when merging the functions.
Or, if you're absolutely certain it doesn't need backporting as a
bugfix, then merging into this patch is probably ok as long as it's
called out clearly in the commit message.
> --- a/xen/arch/x86/cpu/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu.c
> @@ -369,7 +373,7 @@ void vpmu_save(struct vcpu *v)
>
> vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>
> - apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> + apic_write(APIC_LVTPC, pmu_apic_vector | APIC_LVT_MASKED);
> }
>
> int vpmu_load(struct vcpu *v, bool from_guest)
> @@ -432,7 +436,7 @@ static int vpmu_arch_initialise(struct v
> return ret;
> }
>
> - vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
> + vpmu->hw_lapic_lvtpc = pmu_apic_vector | APIC_LVT_MASKED;
Taking a step back, I'm confused as to why we have pmu_apic_vector at all.
LVTPC needs programming with NMIs in order to provide coherent information.
I think this might go a long way to explaining some of the complaints
we've had in the past about junk showing up.
~Andrew
On 20.11.2025 13:24, Andrew Cooper wrote:
> On 19/11/2025 10:51 am, Jan Beulich wrote:
>> --- a/xen/arch/x86/apic.c
>> +++ b/xen/arch/x86/apic.c
>> @@ -1313,16 +1313,6 @@ static void cf_check error_interrupt(voi
>> entries[3], entries[2], entries[1], entries[0]);
>> }
>>
>> -/*
>> - * This interrupt handles performance counters interrupt
>> - */
>> -
>> -static void cf_check pmu_interrupt(void)
>> -{
>> - ack_APIC_irq();
>> - vpmu_do_interrupt();
>> -}
>> -
>
> I know you're only moving this, but it's likely-buggy before and after.
> ack_APIC_irq() needs to be last, and Xen's habit for acking early is why
> we have reentrancy problems.
I was wondering, but was vaguely (but apparently wrongly) remembering that
the PMU interrupt is self-disabling (i.e. requires re-enabling before it
can fire again). Should have checked vpmu_do_interrupt() a little more
closely, where from the various plain "return" it's pretty clear that isn't
the case.
> I think there wants to be a patch ahead of this one swapping the order
> so the ack is at the end, so that this patch can retain that property
> when merging the functions.
>
> Or, if you're absolutely certain it doesn't need backporting as a
> bugfix, then merging into this patch is probably ok as long as it's
> called out clearly in the commit message.
No, I'll make this a separate, prereq patch.
Jan
On 20.11.2025 13:31, Jan Beulich wrote:
> On 20.11.2025 13:24, Andrew Cooper wrote:
>> On 19/11/2025 10:51 am, Jan Beulich wrote:
>>> --- a/xen/arch/x86/apic.c
>>> +++ b/xen/arch/x86/apic.c
>>> @@ -1313,16 +1313,6 @@ static void cf_check error_interrupt(voi
>>> entries[3], entries[2], entries[1], entries[0]);
>>> }
>>>
>>> -/*
>>> - * This interrupt handles performance counters interrupt
>>> - */
>>> -
>>> -static void cf_check pmu_interrupt(void)
>>> -{
>>> - ack_APIC_irq();
>>> - vpmu_do_interrupt();
>>> -}
>>> -
>>
>> I know you're only moving this, but it's likely-buggy before and after.
>> ack_APIC_irq() needs to be last, and Xen's habit for acking early is why
>> we have reentrancy problems.
>
> I was wondering, but was vaguely (but apparently wrongly) remembering that
> the PMU interrupt is self-disabling (i.e. requires re-enabling before it
> can fire again). Should have checked vpmu_do_interrupt() a little more
> closely, where from the various plain "return" it's pretty clear that isn't
> the case.
>
>> I think there wants to be a patch ahead of this one swapping the order
>> so the ack is at the end, so that this patch can retain that property
>> when merging the functions.
>>
>> Or, if you're absolutely certain it doesn't need backporting as a
>> bugfix, then merging into this patch is probably ok as long as it's
>> called out clearly in the commit message.
>
> No, I'll make this a separate, prereq patch.
It won't really need backporting, though: Direct-APIC-vector handlers are
called with IRQs off, and hence when to ack is benign as long as IRQs aren't
transiently turned on while handling. Nevertheless it probably makes sense
to switch things around, so I'll add that extra patch anyway.
Jan
On 20/11/2025 12:31 pm, Jan Beulich wrote:
> On 20.11.2025 13:24, Andrew Cooper wrote:
>> On 19/11/2025 10:51 am, Jan Beulich wrote:
>>> --- a/xen/arch/x86/apic.c
>>> +++ b/xen/arch/x86/apic.c
>>> @@ -1313,16 +1313,6 @@ static void cf_check error_interrupt(voi
>>> entries[3], entries[2], entries[1], entries[0]);
>>> }
>>>
>>> -/*
>>> - * This interrupt handles performance counters interrupt
>>> - */
>>> -
>>> -static void cf_check pmu_interrupt(void)
>>> -{
>>> - ack_APIC_irq();
>>> - vpmu_do_interrupt();
>>> -}
>>> -
>> I know you're only moving this, but it's likely-buggy before and after.
>> ack_APIC_irq() needs to be last, and Xen's habit for acking early is why
>> we have reentrancy problems.
> I was wondering, but was vaguely (but apparently wrongly) remembering that
> the PMU interrupt is self-disabling (i.e. requires re-enabling before it
> can fire again). Should have checked vpmu_do_interrupt() a little more
> closely, where from the various plain "return" it's pretty clear that isn't
> the case.
It can be configured to be self-disabling.
IA32_DEBUGCTL.Freeze_PerfMon_On_PMI, and variations on this theme
depending on the arch perfmon revision.
I'm not aware of AMD having a similar capability.
>
>> I think there wants to be a patch ahead of this one swapping the order
>> so the ack is at the end, so that this patch can retain that property
>> when merging the functions.
>>
>> Or, if you're absolutely certain it doesn't need backporting as a
>> bugfix, then merging into this patch is probably ok as long as it's
>> called out clearly in the commit message.
> No, I'll make this a separate, prereq patch.
Ok, and with this rebased on top, Acked-by: Andrew Cooper
<andrew.cooper3@citrix.com>
© 2016 - 2025 Red Hat, Inc.