With the introduction of stage-2 huge mappings in the pKVM hypervisor,
guest pages CMO is needed for PMD_SIZE size. Fixmap only supports
PAGE_SIZE and iterating over the huge-page is time consuming (mostly due
to TLBI on hyp_fixmap_unmap) which is a problem for EL2 latency.
Introduce a shared PMD_SIZE fixmap (hyp_fixblock_map/hyp_fixblock_unmap)
to improve guest page CMOs when stage-2 huge mappings are installed.
On a Pixel6, the iterative solution resulted in a latency of ~700us,
while the PMD_SIZE fixmap reduces it to ~100us.
Because of the horrendous private range allocation that would be
necessary, this is disabled for 64KiB pages systems.
Suggested-by: Quentin Perret <qperret@google.com>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 1b43bcd2a679..2888b5d03757 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -59,6 +59,11 @@ typedef u64 kvm_pte_t;
#define KVM_PHYS_INVALID (-1ULL)
+#define KVM_PTE_TYPE BIT(1)
+#define KVM_PTE_TYPE_BLOCK 0
+#define KVM_PTE_TYPE_PAGE 1
+#define KVM_PTE_TYPE_TABLE 1
+
#define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
#define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 230e4f2527de..b0c72bc2d5ba 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -13,9 +13,11 @@
extern struct kvm_pgtable pkvm_pgtable;
extern hyp_spinlock_t pkvm_pgd_lock;
-int hyp_create_pcpu_fixmap(void);
+int hyp_create_fixmap(void);
void *hyp_fixmap_map(phys_addr_t phys);
void hyp_fixmap_unmap(void);
+void *hyp_fixblock_map(phys_addr_t phys);
+void hyp_fixblock_unmap(void);
int hyp_create_idmap(u32 hyp_va_bits);
int hyp_map_vectors(void);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 97e0fea9db4e..9f3ffa4e0690 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -220,16 +220,52 @@ static void guest_s2_put_page(void *addr)
hyp_put_page(¤t_vm->pool, addr);
}
+static void *__fixmap_guest_page(void *va, size_t *size)
+{
+ if (IS_ALIGNED(*size, PMD_SIZE)) {
+ void *addr = hyp_fixblock_map(__hyp_pa(va));
+
+ if (addr)
+ return addr;
+
+ *size = PAGE_SIZE;
+ }
+
+ if (IS_ALIGNED(*size, PAGE_SIZE))
+ return hyp_fixmap_map(__hyp_pa(va));
+
+ WARN_ON(1);
+
+ return NULL;
+}
+
+static void __fixunmap_guest_page(size_t size)
+{
+ switch (size) {
+ case PAGE_SIZE:
+ hyp_fixmap_unmap();
+ break;
+ case PMD_SIZE:
+ hyp_fixblock_unmap();
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
static void clean_dcache_guest_page(void *va, size_t size)
{
WARN_ON(!PAGE_ALIGNED(size));
while (size) {
- __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
- PAGE_SIZE);
- hyp_fixmap_unmap();
- va += PAGE_SIZE;
- size -= PAGE_SIZE;
+ size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
+ void *addr = __fixmap_guest_page(va, &fixmap_size);
+
+ __clean_dcache_guest_page(addr, fixmap_size);
+ __fixunmap_guest_page(fixmap_size);
+
+ size -= fixmap_size;
+ va += fixmap_size;
}
}
@@ -238,11 +274,14 @@ static void invalidate_icache_guest_page(void *va, size_t size)
WARN_ON(!PAGE_ALIGNED(size));
while (size) {
- __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
- PAGE_SIZE);
- hyp_fixmap_unmap();
- va += PAGE_SIZE;
- size -= PAGE_SIZE;
+ size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
+ void *addr = __fixmap_guest_page(va, &fixmap_size);
+
+ __invalidate_icache_guest_page(addr, fixmap_size);
+ __fixunmap_guest_page(fixmap_size);
+
+ size -= fixmap_size;
+ va += fixmap_size;
}
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index f41c7440b34b..e3b1bece8504 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -229,9 +229,8 @@ int hyp_map_vectors(void)
return 0;
}
-void *hyp_fixmap_map(phys_addr_t phys)
+static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
{
- struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
kvm_pte_t pte, *ptep = slot->ptep;
pte = *ptep;
@@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys)
return (void *)slot->addr;
}
+void *hyp_fixmap_map(phys_addr_t phys)
+{
+ return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
+}
+
static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
{
kvm_pte_t *ptep = slot->ptep;
u64 addr = slot->addr;
+ u32 level;
+
+ if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE)
+ level = KVM_PGTABLE_LAST_LEVEL;
+ else
+ level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */
WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
@@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
* https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
*/
dsb(ishst);
- __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
+ __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
dsb(ish);
isb();
}
@@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void)
static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
- struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
+ struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg;
- if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
+ if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level))
return -EINVAL;
slot->addr = ctx->addr;
@@ -296,13 +306,73 @@ static int create_fixmap_slot(u64 addr, u64 cpu)
struct kvm_pgtable_walker walker = {
.cb = __create_fixmap_slot_cb,
.flags = KVM_PGTABLE_WALK_LEAF,
- .arg = (void *)cpu,
+ .arg = (void *)per_cpu_ptr(&fixmap_slots, cpu),
};
return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
}
-int hyp_create_pcpu_fixmap(void)
+#ifndef CONFIG_ARM64_64K_PAGES
+static struct hyp_fixmap_slot hyp_fixblock_slot;
+static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock);
+
+void *hyp_fixblock_map(phys_addr_t phys)
+{
+ hyp_spin_lock(&hyp_fixblock_lock);
+ return fixmap_map_slot(&hyp_fixblock_slot, phys);
+}
+
+void hyp_fixblock_unmap(void)
+{
+ fixmap_clear_slot(&hyp_fixblock_slot);
+ hyp_spin_unlock(&hyp_fixblock_lock);
+}
+
+static int create_fixblock(void)
+{
+ struct kvm_pgtable_walker walker = {
+ .cb = __create_fixmap_slot_cb,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ .arg = (void *)&hyp_fixblock_slot,
+ };
+ unsigned long addr;
+ phys_addr_t phys;
+ int ret, i;
+
+ /* Find a RAM phys address, PMD aligned */
+ for (i = 0; i < hyp_memblock_nr; i++) {
+ phys = ALIGN(hyp_memory[i].base, PMD_SIZE);
+ if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size))
+ break;
+ }
+
+ if (i >= hyp_memblock_nr)
+ return -EINVAL;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+ addr = ALIGN(__io_map_base, PMD_SIZE);
+ ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE);
+ if (ret)
+ goto unlock;
+
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP);
+ if (ret)
+ goto unlock;
+
+ ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker);
+
+unlock:
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ return ret;
+}
+#else
+void hyp_fixblock_unmap(void) { WARN_ON(1); }
+void *hyp_fixblock_map(phys_addr_t phys) { return NULL; }
+static int create_fixblock(void) { return 0; }
+#endif
+
+int hyp_create_fixmap(void)
{
unsigned long addr, i;
int ret;
@@ -322,7 +392,7 @@ int hyp_create_pcpu_fixmap(void)
return ret;
}
- return 0;
+ return create_fixblock();
}
int hyp_create_idmap(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index c19860fc8183..a48d3f5a5afb 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -312,7 +312,7 @@ void __noreturn __pkvm_init_finalise(void)
if (ret)
goto out;
- ret = hyp_create_pcpu_fixmap();
+ ret = hyp_create_fixmap();
if (ret)
goto out;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index df5cc74a7dd0..c351b4abd5db 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -11,12 +11,6 @@
#include <asm/kvm_pgtable.h>
#include <asm/stage2_pgtable.h>
-
-#define KVM_PTE_TYPE BIT(1)
-#define KVM_PTE_TYPE_BLOCK 0
-#define KVM_PTE_TYPE_PAGE 1
-#define KVM_PTE_TYPE_TABLE 1
-
struct kvm_pgtable_walk_data {
struct kvm_pgtable_walker *walker;
--
2.49.0.1015.ga840276032-goog
On Fri, 09 May 2025 14:17:06 +0100,
Vincent Donnefort <vdonnefort@google.com> wrote:
>
> With the introduction of stage-2 huge mappings in the pKVM hypervisor,
> guest pages CMO is needed for PMD_SIZE size. Fixmap only supports
> PAGE_SIZE and iterating over the huge-page is time consuming (mostly due
> to TLBI on hyp_fixmap_unmap) which is a problem for EL2 latency.
>
> Introduce a shared PMD_SIZE fixmap (hyp_fixblock_map/hyp_fixblock_unmap)
> to improve guest page CMOs when stage-2 huge mappings are installed.
>
> On a Pixel6, the iterative solution resulted in a latency of ~700us,
> while the PMD_SIZE fixmap reduces it to ~100us.
>
> Because of the horrendous private range allocation that would be
> necessary, this is disabled for 64KiB pages systems.
>
> Suggested-by: Quentin Perret <qperret@google.com>
> Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
> Signed-off-by: Quentin Perret <qperret@google.com>
>
> diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
> index 1b43bcd2a679..2888b5d03757 100644
> --- a/arch/arm64/include/asm/kvm_pgtable.h
> +++ b/arch/arm64/include/asm/kvm_pgtable.h
> @@ -59,6 +59,11 @@ typedef u64 kvm_pte_t;
>
> #define KVM_PHYS_INVALID (-1ULL)
>
> +#define KVM_PTE_TYPE BIT(1)
> +#define KVM_PTE_TYPE_BLOCK 0
> +#define KVM_PTE_TYPE_PAGE 1
> +#define KVM_PTE_TYPE_TABLE 1
> +
> #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2)
>
> #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2)
> diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> index 230e4f2527de..b0c72bc2d5ba 100644
> --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
> +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
> @@ -13,9 +13,11 @@
> extern struct kvm_pgtable pkvm_pgtable;
> extern hyp_spinlock_t pkvm_pgd_lock;
>
> -int hyp_create_pcpu_fixmap(void);
> +int hyp_create_fixmap(void);
> void *hyp_fixmap_map(phys_addr_t phys);
> void hyp_fixmap_unmap(void);
> +void *hyp_fixblock_map(phys_addr_t phys);
> +void hyp_fixblock_unmap(void);
>
> int hyp_create_idmap(u32 hyp_va_bits);
> int hyp_map_vectors(void);
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 97e0fea9db4e..9f3ffa4e0690 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -220,16 +220,52 @@ static void guest_s2_put_page(void *addr)
> hyp_put_page(¤t_vm->pool, addr);
> }
>
> +static void *__fixmap_guest_page(void *va, size_t *size)
> +{
> + if (IS_ALIGNED(*size, PMD_SIZE)) {
> + void *addr = hyp_fixblock_map(__hyp_pa(va));
> +
> + if (addr)
> + return addr;
> +
> + *size = PAGE_SIZE;
> + }
> +
> + if (IS_ALIGNED(*size, PAGE_SIZE))
> + return hyp_fixmap_map(__hyp_pa(va));
> +
> + WARN_ON(1);
> +
> + return NULL;
> +}
> +
> +static void __fixunmap_guest_page(size_t size)
> +{
> + switch (size) {
> + case PAGE_SIZE:
> + hyp_fixmap_unmap();
> + break;
> + case PMD_SIZE:
> + hyp_fixblock_unmap();
> + break;
> + default:
> + WARN_ON(1);
> + }
This is pretty ugly. How can we end-up there the first place? I'd
rather you make sure we can't reach this default path at all. See also
towards the end of this patch (tl;dr: hyp_fixblock_unmap() should
never explode).
> +}
> +
> static void clean_dcache_guest_page(void *va, size_t size)
> {
> WARN_ON(!PAGE_ALIGNED(size));
>
> while (size) {
> - __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
> - PAGE_SIZE);
> - hyp_fixmap_unmap();
> - va += PAGE_SIZE;
> - size -= PAGE_SIZE;
> + size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
> + void *addr = __fixmap_guest_page(va, &fixmap_size);
> +
> + __clean_dcache_guest_page(addr, fixmap_size);
> + __fixunmap_guest_page(fixmap_size);
> +
> + size -= fixmap_size;
> + va += fixmap_size;
Can this ever be called with a *multiple* of PMD_SIZE? In this case
you'd still end-up doing PAGE_SIZEd-bite CMOs until there is only
PMD_SIZE left, ruining the optimisation.
I think this needs fixing.
> }
> }
>
> @@ -238,11 +274,14 @@ static void invalidate_icache_guest_page(void *va, size_t size)
> WARN_ON(!PAGE_ALIGNED(size));
>
> while (size) {
> - __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
> - PAGE_SIZE);
> - hyp_fixmap_unmap();
> - va += PAGE_SIZE;
> - size -= PAGE_SIZE;
> + size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
> + void *addr = __fixmap_guest_page(va, &fixmap_size);
> +
> + __invalidate_icache_guest_page(addr, fixmap_size);
> + __fixunmap_guest_page(fixmap_size);
> +
> + size -= fixmap_size;
> + va += fixmap_size;
> }
> }
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
> index f41c7440b34b..e3b1bece8504 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mm.c
> @@ -229,9 +229,8 @@ int hyp_map_vectors(void)
> return 0;
> }
>
> -void *hyp_fixmap_map(phys_addr_t phys)
> +static void *fixmap_map_slot(struct hyp_fixmap_slot *slot, phys_addr_t phys)
> {
> - struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
> kvm_pte_t pte, *ptep = slot->ptep;
>
> pte = *ptep;
> @@ -243,10 +242,21 @@ void *hyp_fixmap_map(phys_addr_t phys)
> return (void *)slot->addr;
> }
>
> +void *hyp_fixmap_map(phys_addr_t phys)
> +{
> + return fixmap_map_slot(this_cpu_ptr(&fixmap_slots), phys);
> +}
> +
> static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
> {
> kvm_pte_t *ptep = slot->ptep;
> u64 addr = slot->addr;
> + u32 level;
> +
> + if (FIELD_GET(KVM_PTE_TYPE, *ptep) == KVM_PTE_TYPE_PAGE)
> + level = KVM_PGTABLE_LAST_LEVEL;
> + else
> + level = KVM_PGTABLE_LAST_LEVEL - 1; /* create_fixblock() guarantees PMD level */
Seeing this, (KVM_PGTABLE_LAST_LEVEL - 1) looks nicee than the "2" I
suggested in one of the previous patches.
>
> WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
>
> @@ -260,7 +270,7 @@ static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
> * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
> */
> dsb(ishst);
> - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), KVM_PGTABLE_LAST_LEVEL);
> + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
> dsb(ish);
> isb();
> }
> @@ -273,9 +283,9 @@ void hyp_fixmap_unmap(void)
> static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx,
> enum kvm_pgtable_walk_flags visit)
> {
> - struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg);
> + struct hyp_fixmap_slot *slot = (struct hyp_fixmap_slot *)ctx->arg;
>
> - if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_LAST_LEVEL)
> + if (!kvm_pte_valid(ctx->old) || (ctx->end - ctx->start) != kvm_granule_size(ctx->level))
> return -EINVAL;
>
> slot->addr = ctx->addr;
> @@ -296,13 +306,73 @@ static int create_fixmap_slot(u64 addr, u64 cpu)
> struct kvm_pgtable_walker walker = {
> .cb = __create_fixmap_slot_cb,
> .flags = KVM_PGTABLE_WALK_LEAF,
> - .arg = (void *)cpu,
> + .arg = (void *)per_cpu_ptr(&fixmap_slots, cpu),
Do you really need this cast?
> };
>
> return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
> }
>
> -int hyp_create_pcpu_fixmap(void)
> +#ifndef CONFIG_ARM64_64K_PAGES
I don't have much faith in this symbol. We have changed the config
stuff so often over the years that I wouldn't trust it long term.
Using something like PAGE_SIZE or PAGE_SHIFT is likely to be more
robust.
> +static struct hyp_fixmap_slot hyp_fixblock_slot;
> +static DEFINE_HYP_SPINLOCK(hyp_fixblock_lock);
> +
> +void *hyp_fixblock_map(phys_addr_t phys)
> +{
> + hyp_spin_lock(&hyp_fixblock_lock);
> + return fixmap_map_slot(&hyp_fixblock_slot, phys);
> +}
> +
> +void hyp_fixblock_unmap(void)
> +{
> + fixmap_clear_slot(&hyp_fixblock_slot);
> + hyp_spin_unlock(&hyp_fixblock_lock);
> +}
> +
> +static int create_fixblock(void)
> +{
> + struct kvm_pgtable_walker walker = {
> + .cb = __create_fixmap_slot_cb,
> + .flags = KVM_PGTABLE_WALK_LEAF,
> + .arg = (void *)&hyp_fixblock_slot,
> + };
> + unsigned long addr;
> + phys_addr_t phys;
> + int ret, i;
> +
> + /* Find a RAM phys address, PMD aligned */
> + for (i = 0; i < hyp_memblock_nr; i++) {
> + phys = ALIGN(hyp_memory[i].base, PMD_SIZE);
> + if (phys + PMD_SIZE < (hyp_memory[i].base + hyp_memory[i].size))
> + break;
> + }
> +
> + if (i >= hyp_memblock_nr)
> + return -EINVAL;
> +
> + hyp_spin_lock(&pkvm_pgd_lock);
> + addr = ALIGN(__io_map_base, PMD_SIZE);
> + ret = __pkvm_alloc_private_va_range(addr, PMD_SIZE);
> + if (ret)
> + goto unlock;
> +
> + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PMD_SIZE, phys, PAGE_HYP);
> + if (ret)
> + goto unlock;
> +
> + ret = kvm_pgtable_walk(&pkvm_pgtable, addr, PMD_SIZE, &walker);
> +
> +unlock:
> + hyp_spin_unlock(&pkvm_pgd_lock);
> +
> + return ret;
> +}
> +#else
> +void hyp_fixblock_unmap(void) { WARN_ON(1); }
> +void *hyp_fixblock_map(phys_addr_t phys) { return NULL; }
> +static int create_fixblock(void) { return 0; }
> +#endif
I can't say I like this. Can't you have a fallback that does the
iteration rather than these placeholders that are only there to make
things catch fire?
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
> > +}
> > +
> > static void clean_dcache_guest_page(void *va, size_t size)
> > {
> > WARN_ON(!PAGE_ALIGNED(size));
> >
> > while (size) {
> > - __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
> > - PAGE_SIZE);
> > - hyp_fixmap_unmap();
> > - va += PAGE_SIZE;
> > - size -= PAGE_SIZE;
> > + size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
> > + void *addr = __fixmap_guest_page(va, &fixmap_size);
> > +
> > + __clean_dcache_guest_page(addr, fixmap_size);
> > + __fixunmap_guest_page(fixmap_size);
> > +
> > + size -= fixmap_size;
> > + va += fixmap_size;
>
> Can this ever be called with a *multiple* of PMD_SIZE? In this case
> you'd still end-up doing PAGE_SIZEd-bite CMOs until there is only
> PMD_SIZE left, ruining the optimisation.
>
> I think this needs fixing.
So this can be only called with size either equal to PAGE_SIZE or PMD_SIZE. I
wasn't sure if it was worth to make it more generic than it needs.
But like for the first patch, I can make it more future-proof by handling size >
PMD_SIZE.
>
> > }
> > }
> >
[...]
On Fri, 16 May 2025 19:03:14 +0100,
Vincent Donnefort <vdonnefort@google.com> wrote:
>
> > > +}
> > > +
> > > static void clean_dcache_guest_page(void *va, size_t size)
> > > {
> > > WARN_ON(!PAGE_ALIGNED(size));
> > >
> > > while (size) {
> > > - __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)),
> > > - PAGE_SIZE);
> > > - hyp_fixmap_unmap();
> > > - va += PAGE_SIZE;
> > > - size -= PAGE_SIZE;
> > > + size_t fixmap_size = size == PMD_SIZE ? size : PAGE_SIZE;
> > > + void *addr = __fixmap_guest_page(va, &fixmap_size);
> > > +
> > > + __clean_dcache_guest_page(addr, fixmap_size);
> > > + __fixunmap_guest_page(fixmap_size);
> > > +
> > > + size -= fixmap_size;
> > > + va += fixmap_size;
> >
> > Can this ever be called with a *multiple* of PMD_SIZE? In this case
> > you'd still end-up doing PAGE_SIZEd-bite CMOs until there is only
> > PMD_SIZE left, ruining the optimisation.
> >
> > I think this needs fixing.
>
> So this can be only called with size either equal to PAGE_SIZE or PMD_SIZE. I
> wasn't sure if it was worth to make it more generic than it needs.
>
> But like for the first patch, I can make it more future-proof by handling size >
> PMD_SIZE.
Yup. These things are hard enough to debug that we should try and make
it fool-proof, even if that's not immediately used (and fixing that is
pretty easy).
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
© 2016 - 2025 Red Hat, Inc.