Hi James,
On 12/5/25 21:58, James Morse wrote:
> While we trap the guest's attempts to read/write the MPAM control
> registers, the hardware continues to use them. Guest-EL0 uses KVM's
> user-space's configuration, as the value is left in the register, and
> guest-EL1 uses either the host kernel's configuration, or in the case of
> VHE, the UNKNOWN reset value of MPAM1_EL1.
>
> On nVHE systems, EL2 continues to use partid-0 for world-switch, even
> when the host may have configured its kernel threads to use a different
> partid. 0 may have been assigned to another task.
>
> We want to force the guest-EL1 to use KVM's user-space's MPAM
> configuration, and EL2s to match the host's EL1 config.
>
> On a nVHE system, copy the EL1 MPAM register to EL2. This ensures
> world-switch uses the same partid as the kernel thread does on the host.
>
> When loading the guest's EL1 registers, copy the VMM's EL0 partid to
> the EL1 register.
>
> For VHE systems, we can skip restoring the EL1 register for the host,
> as it is out-of-context once HCR_EL2.TGE is set.
>
> This is done outside the usual sysreg save/restore as the values can
> change behind KVMs back, so should not be stored in the guest context.
>
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 16 ++++++++++++++++
> arch/arm64/kvm/hyp/nvhe/switch.c | 12 ++++++++++++
> arch/arm64/kvm/hyp/vhe/sysreg-sr.c | 1 +
> 4 files changed, 30 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index b763293281c8..baba23b7ce97 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -447,6 +447,7 @@ enum vcpu_sysreg {
> MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
> OSLSR_EL1, /* OS Lock Status Register */
> DISR_EL1, /* Deferred Interrupt Status Register */
> + MPAM1_EL1, /* Memory Partitioning And Monitoring register */
>
> /* Performance Monitors Registers */
> PMCR_EL0, /* Control Register */
> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> index a17cbe7582de..d8ab0ced0403 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> @@ -166,6 +166,9 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
> ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
> }
>
> + if (system_supports_mpam())
> + ctxt_sys_reg(ctxt, MPAM1_EL1) = read_sysreg_el1(SYS_MPAM1);
> +
> ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
> ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
> ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
> @@ -261,6 +264,9 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt,
> write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
> }
>
> + if (system_supports_mpam())
> + write_sysreg_el1(ctxt_sys_reg(ctxt, MPAM1_EL1), SYS_MPAM1);
> +
I don't think this
__sysreg_save_el1_state()/__sysreg_restore_el1_state() mpam change adds
anything. Assuming MPAM0 and MPAM1 are always set together and that we
continue to trap accesses from the guest there is nothing to change the
value away from that of the task. If MPAM0 and MPAM1 were set separately
then we would need a way to restore the host value of MPAM1 in the nvhe
case due to the copy from MPAM0 to MPAM1.
> if (!has_vhe() &&
> cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
> ctxt->__hyp_running_vcpu) {
> @@ -374,4 +380,14 @@ static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu)
> write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2);
> }
>
> +/*
> + * The _EL0 value was written by the host's context switch and belongs to the
> + * VMM. Copy this into the guest's _EL1 register.
> + */
> +static inline void __mpam_guest_load(void)
> +{
> + if (system_supports_mpam())
> + write_sysreg_el1(read_sysreg_s(SYS_MPAM0_EL1), SYS_MPAM1);
> +}
> +
> #endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index d3b9ec8a7c28..b785977aa61e 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -238,6 +238,15 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
> return __fixup_guest_exit(vcpu, exit_code, handlers);
> }
>
> +/* Use the host thread's partid and pmg for world switch */
> +static void __mpam_copy_el1_to_el2(void)
> +{
> + if (system_supports_mpam()) {
> + write_sysreg_s(read_sysreg_s(SYS_MPAM1_EL1), SYS_MPAM2_EL2);
> + isb();
> + }
> +}
> +
> /* Switch to the guest for legacy non-VHE systems */
> int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> {
> @@ -247,6 +256,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> bool pmu_switch_needed;
> u64 exit_code;
>
> + __mpam_copy_el1_to_el2();
> +
What about the other hypercalls? E.g. __pkvm_init_vm(). Don't we end up
just running them all with the MPAM settings of the previous vcpu that ran?
> /*
> * Having IRQs masked via PMR when entering the guest means the GIC
> * will not signal the CPU of interrupts of lower priority, and the
> @@ -306,6 +317,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> __timer_enable_traps(vcpu);
>
> __debug_switch_to_guest(vcpu);
> + __mpam_guest_load();
As MPAM0 and MPAM1 are kept in sync this doesn't do anything in the nvhe
case.
>
> do {
> /* Jump in the fire! */
> diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> index f28c6cf4fe1b..2a84edc90465 100644
> --- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
> @@ -222,6 +222,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
> */
> __sysreg32_restore_state(vcpu);
> __sysreg_restore_user_state(guest_ctxt);
> + __mpam_guest_load();
>
> if (unlikely(is_hyp_ctxt(vcpu))) {
> __sysreg_restore_vel2_state(vcpu);
Thanks,
Ben