[PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register

Sebastian Ott posted 6 patches 1 year, 7 months ago
There is a newer version of this series
[PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Sebastian Ott 1 year, 7 months ago
CTR_EL0 is currently handled as an invariant register, thus
guests will be presented with the host value of that register.

Add emulation for CTR_EL0 based on a per VM value. Userspace can
switch off DIC and IDC bits and reduce DminLine and IminLine sizes.

When CTR_EL0 is changed validate that against CLIDR_EL1 and CCSIDR_EL1
to make sure we present the guest with consistent register values.
Changes that affect the generated cache topology values are allowed if
they don't clash with previous register writes.

Signed-off-by: Sebastian Ott <sebott@redhat.com>
---
 arch/arm64/kvm/sys_regs.c | 123 +++++++++++++++++++++++++++++++++-----
 1 file changed, 107 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1488b93050d4..2fe3492ba3c4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -215,13 +215,8 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
 /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */
 #define CSSELR_MAX 14
 
-/*
- * Returns the minimum line size for the selected cache, expressed as
- * Log2(bytes).
- */
-static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
+static u8 __get_min_cache_line_size(u64 ctr, bool icache)
 {
-	u64 ctr = kvm->arch.ctr_el0;
 	u8 field;
 
 	if (icache)
@@ -240,6 +235,15 @@ static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
 	return field + 2;
 }
 
+/*
+ * Returns the minimum line size for the selected cache, expressed as
+ * Log2(bytes).
+ */
+static u8 get_min_cache_line_size(struct kvm *kvm, bool icache)
+{
+	return __get_min_cache_line_size(kvm->arch.ctr_el0, icache);
+}
+
 /* Which cache CCSIDR represents depends on CSSELR value. */
 static u32 get_ccsidr(struct kvm_vcpu *vcpu, u32 csselr)
 {
@@ -1856,6 +1860,45 @@ static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
+static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding);
+
+static int validate_clidr_el1(u64 clidr_el1, u64 ctr_el0)
+{
+	u64 idc = !CLIDR_LOC(clidr_el1) ||
+		  (!CLIDR_LOUIS(clidr_el1) && !CLIDR_LOUU(clidr_el1));
+
+	if ((clidr_el1 & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int validate_cache_top(struct kvm_vcpu *vcpu, u64 ctr_el0)
+{
+	const struct sys_reg_desc *clidr_el1;
+	unsigned int i;
+	int ret;
+
+	clidr_el1 = get_sys_reg_desc(SYS_CLIDR_EL1);
+	if (!clidr_el1)
+		return -ENOENT;
+
+	ret = validate_clidr_el1(__vcpu_sys_reg(vcpu, clidr_el1->reg), ctr_el0);
+	if (ret)
+		return ret;
+
+	if (!vcpu->arch.ccsidr)
+		return 0;
+
+	for (i = 0; i < CSSELR_MAX; i++) {
+		if ((FIELD_GET(CCSIDR_EL1_LineSize, get_ccsidr(vcpu, i)) + 4)
+		    < __get_min_cache_line_size(ctr_el0, i & CSSELR_EL1_InD))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		       const struct sys_reg_desc *r)
 {
@@ -1866,6 +1909,48 @@ static bool access_ctr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
+{
+	vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
+	return vcpu->kvm->arch.ctr_el0;
+}
+
+static int get_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		   u64 *val)
+{
+	*val = vcpu->kvm->arch.ctr_el0;
+	return 0;
+}
+
+static int set_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+		   u64 val)
+{
+	int ret;
+
+	if (val == vcpu->kvm->arch.ctr_el0)
+		return 0;
+
+	if (kvm_vm_has_ran_once(vcpu->kvm))
+		return -EBUSY;
+
+	mutex_lock(&vcpu->kvm->arch.config_lock);
+	ret = arm64_check_features(vcpu, rd, val);
+	if (ret) {
+		mutex_unlock(&vcpu->kvm->arch.config_lock);
+		return ret;
+	}
+	ret = validate_cache_top(vcpu, val);
+	if (ret) {
+		mutex_unlock(&vcpu->kvm->arch.config_lock);
+		return ret;
+	}
+
+	vcpu->kvm->arch.ctr_el0 = val;
+	mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+	return 0;
+}
+
 static bool access_clidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 			 const struct sys_reg_desc *r)
 {
@@ -1935,10 +2020,9 @@ static u64 reset_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 static int set_clidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 		      u64 val)
 {
-	u64 idc = !CLIDR_LOC(val) || (!CLIDR_LOUIS(val) && !CLIDR_LOUU(val));
 	u64 ctr_el0 = vcpu->kvm->arch.ctr_el0;
 
-	if ((val & CLIDR_EL1_RES0) || (!(ctr_el0 & CTR_EL0_IDC) && idc))
+	if (validate_clidr_el1(val, ctr_el0))
 		return -EINVAL;
 
 	__vcpu_sys_reg(vcpu, rd->reg) = val;
@@ -2452,7 +2536,11 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_CCSIDR2_EL1), undef_access },
 	{ SYS_DESC(SYS_SMIDR_EL1), undef_access },
 	{ SYS_DESC(SYS_CSSELR_EL1), access_csselr, reset_unknown, CSSELR_EL1 },
-	{ SYS_DESC(SYS_CTR_EL0), access_ctr },
+	{ SYS_DESC(SYS_CTR_EL0), access_ctr, .reset = reset_ctr,
+	  .get_user = get_ctr, .set_user = set_ctr, .val = (CTR_EL0_DIC_MASK |
+							    CTR_EL0_IDC_MASK |
+							    CTR_EL0_DminLine_MASK |
+							    CTR_EL0_IminLine_MASK)},
 	{ SYS_DESC(SYS_SVCR), undef_access },
 
 	{ PMU_SYS_REG(PMCR_EL0), .access = access_pmcr, .reset = reset_pmcr,
@@ -3616,6 +3704,13 @@ static bool index_to_params(u64 id, struct sys_reg_params *params)
 	}
 }
 
+static const struct sys_reg_desc *get_sys_reg_desc(u32 encoding)
+{
+	struct sys_reg_params params = encoding_to_params(encoding);
+
+	return find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
+}
+
 const struct sys_reg_desc *get_reg_by_id(u64 id,
 					 const struct sys_reg_desc table[],
 					 unsigned int num)
@@ -3669,18 +3764,11 @@ FUNCTION_INVARIANT(midr_el1)
 FUNCTION_INVARIANT(revidr_el1)
 FUNCTION_INVARIANT(aidr_el1)
 
-static u64 get_ctr_el0(struct kvm_vcpu *v, const struct sys_reg_desc *r)
-{
-	((struct sys_reg_desc *)r)->val = read_sanitised_ftr_reg(SYS_CTR_EL0);
-	return ((struct sys_reg_desc *)r)->val;
-}
-
 /* ->val is filled in by kvm_sys_reg_table_init() */
 static struct sys_reg_desc invariant_sys_regs[] __ro_after_init = {
 	{ SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 },
 	{ SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 },
 	{ SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 },
-	{ SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 },
 };
 
 static int get_invariant_sys_reg(u64 id, u64 __user *uaddr)
@@ -4066,6 +4154,9 @@ static void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
 	 */
 	if (!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
 		vcpu->arch.hcr_el2 |= HCR_TTLBOS;
+
+	if (kvm->arch.ctr_el0 != read_sanitised_ftr_reg(SYS_CTR_EL0))
+		vcpu->arch.hcr_el2 |= HCR_TID2;
 }
 
 void kvm_setup_traps(struct kvm_vcpu *vcpu)
-- 
2.42.0
Re: [PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Oliver Upton 1 year, 7 months ago
On Fri, Apr 26, 2024 at 12:49:48PM +0200, Sebastian Ott wrote:
> CTR_EL0 is currently handled as an invariant register, thus
> guests will be presented with the host value of that register.
> 
> Add emulation for CTR_EL0 based on a per VM value. Userspace can
> switch off DIC and IDC bits and reduce DminLine and IminLine sizes.
> 
> When CTR_EL0 is changed validate that against CLIDR_EL1 and CCSIDR_EL1
> to make sure we present the guest with consistent register values.
> Changes that affect the generated cache topology values are allowed if
> they don't clash with previous register writes.

Sorry I didn't speak up earlier, but I'm not sold on the need to
cross-validate userspace values for the cache type registers.

KVM should only be concerned about whether or not the selected feature
set matches what hardware is capable of and what KVM can virtualize. So
in the context of the CTR and the cache topology, I feel that they
should be _separately_ evaluated against the host's CTR_EL0.

Inconsistencies between fields in userspace values should be out of
scope; userspace shares the responsibility of presenting something
architectural, especially if it starts modifying ID registers. Otherwise
I'm quite worried about the amount of glue required to plumb exhaustive
consitency checks for registers, especially considering the lack of
ordering.

Marc, I know this goes against what you had suggested earlier, is there
something in particular that you think warrants the consistency checks?

> +static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
> +{
> +	vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> +	return vcpu->kvm->arch.ctr_el0;
> +}
> +

We definitely do not want this value to change across a vCPU reset, it
should be handled like the other ID registers where they only get reset
once for the VM lifetime.

-- 
Thanks,
Oliver
Re: [PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Sebastian Ott 1 year, 7 months ago
Hej Oliver,

On Wed, 1 May 2024, Oliver Upton wrote:
>> +static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
>> +{
>> +	vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
>> +	return vcpu->kvm->arch.ctr_el0;
>> +}
>> +
>
> We definitely do not want this value to change across a vCPU reset, it
> should be handled like the other ID registers where they only get reset
> once for the VM lifetime.

Hm, maybe I'm misreading the code here but I don't think this is true
for existing regs e.g. CLIDR_EL1 or the stuff defined via ID_WRITABLE().

Sebastian
Re: [PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Oliver Upton 1 year, 7 months ago
On Wed, May 08, 2024 at 05:17:25PM +0200, Sebastian Ott wrote:
> Hej Oliver,
> 
> On Wed, 1 May 2024, Oliver Upton wrote:
> > > +static u64 reset_ctr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd)
> > > +{
> > > +	vcpu->kvm->arch.ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
> > > +	return vcpu->kvm->arch.ctr_el0;
> > > +}
> > > +
> > 
> > We definitely do not want this value to change across a vCPU reset, it
> > should be handled like the other ID registers where they only get reset
> > once for the VM lifetime.
> 
> Hm, maybe I'm misreading the code here but I don't think this is true
> for existing regs e.g. CLIDR_EL1 or the stuff defined via ID_WRITABLE().

This works for the feature ID registers we maintain per-VM, but not for
the feature ID registers local to a vCPU. Sent some fixes out for this
but forgot to Cc you on it, apologies.

https://lore.kernel.org/kvmarm/20240502233529.1958459-1-oliver.upton@linux.dev/

-- 
Thanks,
Oliver
Re: [PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Marc Zyngier 1 year, 7 months ago
On Wed, 01 May 2024 09:15:09 +0100,
Oliver Upton <oliver.upton@linux.dev> wrote:
> 
> On Fri, Apr 26, 2024 at 12:49:48PM +0200, Sebastian Ott wrote:
> > CTR_EL0 is currently handled as an invariant register, thus
> > guests will be presented with the host value of that register.
> > 
> > Add emulation for CTR_EL0 based on a per VM value. Userspace can
> > switch off DIC and IDC bits and reduce DminLine and IminLine sizes.
> > 
> > When CTR_EL0 is changed validate that against CLIDR_EL1 and CCSIDR_EL1
> > to make sure we present the guest with consistent register values.
> > Changes that affect the generated cache topology values are allowed if
> > they don't clash with previous register writes.
> 
> Sorry I didn't speak up earlier, but I'm not sold on the need to
> cross-validate userspace values for the cache type registers.
> 
> KVM should only be concerned about whether or not the selected feature
> set matches what hardware is capable of and what KVM can virtualize. So
> in the context of the CTR and the cache topology, I feel that they
> should be _separately_ evaluated against the host's CTR_EL0.
> 
> Inconsistencies between fields in userspace values should be out of
> scope; userspace shares the responsibility of presenting something
> architectural, especially if it starts modifying ID registers. Otherwise
> I'm quite worried about the amount of glue required to plumb exhaustive
> consitency checks for registers, especially considering the lack of
> ordering.
> 
> Marc, I know this goes against what you had suggested earlier, is there
> something in particular that you think warrants the consistency
> checks?

The problem is that we have a dependency chain: individual cache
levels are validated against CLIDR/CCSIDR, which are themselves
validated against CTR_EL0.

Change one, and everything becomes inconsistent. I absolutely don't
trust userspace to do a good job on that, and not validating this will
result in extremely hard to debug issues in the guest. Which is why
CTR_EL0 was an invariant the first place, and everything derived from
it.

Take for example CLIDR_EL1.Lo{UU,UIS,C}. Their values depend on
CTR_EL0.{IDC,DIC}. SW is free to check one or the other. If you don't
have this dependency, you're in for some serious trouble.

The alternative is to *regenerate* the whole cache hierarchy when
CTR_EL0 is written, and too bad if it changes behind the guest's
back. Yes, the latter is a problem on its own...

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
Re: [PATCH v2 4/6] KVM: arm64: add emulation for CTR_EL0 register
Posted by Oliver Upton 1 year, 7 months ago
On Fri, May 03, 2024 at 04:50:02PM +0100, Marc Zyngier wrote:
> > Marc, I know this goes against what you had suggested earlier, is there
> > something in particular that you think warrants the consistency
> > checks?
> 
> The problem is that we have a dependency chain: individual cache
> levels are validated against CLIDR/CCSIDR, which are themselves
> validated against CTR_EL0.
> 
> Change one, and everything becomes inconsistent. I absolutely don't
> trust userspace to do a good job on that

Violent agreement on this point, heh. 

> and not validating this will result in extremely hard to debug issues
> in the guest. Which is why CTR_EL0 was an invariant the first place,
> and everything derived from it.

Sure, but userspace can completely hose the guest in tons of spectacular
ways, I don't see why feature ID registers require thorough
cross-checking of relationships between CPU features.

We already fail at this. Just looking at ID_AA64ISAR0_EL1, we do not
enforce any of the "FEAT_X implies FEAT_Y" relationships between all of
the crypto extensions. Userspace can also setup ID_AA64MMFR0_EL1 to
advertise that no translation granule is supported by the MMU.

I agree that KVM needs to sanitize feature ID registers against the
capabilities of hardware + KVM itself. Beyond that cross-checking
userspace against itself is difficult to get right, and I'm worried
about what the tangled mess will look like when we finish up the
plumbing for the whole feature ID space.

> Take for example CLIDR_EL1.Lo{UU,UIS,C}. Their values depend on
> CTR_EL0.{IDC,DIC}. SW is free to check one or the other. If you don't
> have this dependency, you're in for some serious trouble.

Right, we absolutely need to sanitize these against *hardware*, and
using CTR_EL0 definitely the way to go. Userspace cannot promise a
stricter cache coherency model than what's offered in hardware.

Making sure userspace's values for CLIDR_EL1 and CTR_EL0 agree with each
other shouldn't matter if we've determined hardware coherency is at least
as strict as the model described through these registers.

Without the cross-check, it would be possible for userspace to setup the
vCPU as:

 - CTR_EL0.{IDC,DIC} = {1, 1}
 - CLIDR_EL1.Lo{UU,UIS,C} = {1, 1, 1}

But we would only allow this if hardware was {IDC,DIC} = {1,1}. So while
the values presented to the guest aren't consistent with one another, it
seems in the worst case the guest will do I$ maintenance where it isn't
actually necessary.

-- 
Thanks,
Oliver