In preparation for supporting stage-2 huge mappings for np-guest. Add a
nr_pages argument to the __pkvm_host_share_guest hypercall. This range
supports only two values: 1 or PMD_SIZE / PAGE_SIZE (that is 512 on a
4K-pages system).
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 978f38c386ee..1abbab5e2ff8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -39,7 +39,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
-int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot);
int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2c37680d954c..e71601746935 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -249,7 +249,8 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, pfn, host_ctxt, 1);
DECLARE_REG(u64, gfn, host_ctxt, 2);
- DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 3);
+ DECLARE_REG(u64, nr_pages, host_ctxt, 3);
+ DECLARE_REG(enum kvm_pgtable_prot, prot, host_ctxt, 4);
struct pkvm_hyp_vcpu *hyp_vcpu;
int ret = -EINVAL;
@@ -264,7 +265,7 @@ static void handle___pkvm_host_share_guest(struct kvm_cpu_context *host_ctxt)
if (ret)
goto out;
- ret = __pkvm_host_share_guest(pfn, gfn, hyp_vcpu, prot);
+ ret = __pkvm_host_share_guest(pfn, gfn, nr_pages, hyp_vcpu, prot);
out:
cpu_reg(host_ctxt, 1) = ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 63968c7740c3..7e3a249149a0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -60,6 +60,9 @@ static void hyp_unlock_component(void)
hyp_spin_unlock(&pkvm_pgd_lock);
}
+#define for_each_hyp_page(start, size, page) \
+ for (page = hyp_phys_to_page(start); page < hyp_phys_to_page((start) + (size)); page++)
+
static void *host_s2_zalloc_pages_exact(size_t size)
{
void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size));
@@ -509,10 +512,25 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
{
- phys_addr_t end = addr + size;
+ struct hyp_page *page;
- for (; addr < end; addr += PAGE_SIZE)
- hyp_phys_to_page(addr)->host_state = state;
+ for_each_hyp_page(addr, size, page)
+ page->host_state = state;
+}
+
+static void __host_update_share_guest_count(u64 phys, u64 size, bool inc)
+{
+ struct hyp_page *page;
+
+ for_each_hyp_page(phys, size, page) {
+ if (inc) {
+ WARN_ON(page->host_share_guest_count++ == U32_MAX);
+ } else {
+ WARN_ON(!page->host_share_guest_count--);
+ if (!page->host_share_guest_count)
+ page->host_state = PKVM_PAGE_OWNED;
+ }
+ }
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
@@ -627,16 +645,16 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
static int __host_check_page_state_range(u64 addr, u64 size,
enum pkvm_page_state state)
{
- u64 end = addr + size;
+ struct hyp_page *page;
int ret;
- ret = check_range_allowed_memory(addr, end);
+ ret = check_range_allowed_memory(addr, addr + size);
if (ret)
return ret;
hyp_assert_lock_held(&host_mmu.lock);
- for (; addr < end; addr += PAGE_SIZE) {
- if (hyp_phys_to_page(addr)->host_state != state)
+ for_each_hyp_page(addr, size, page) {
+ if (page->host_state != state)
return -EPERM;
}
@@ -686,10 +704,9 @@ static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte, u64 addr)
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
}
-static int __guest_check_page_state_range(struct pkvm_hyp_vcpu *vcpu, u64 addr,
+static int __guest_check_page_state_range(struct pkvm_hyp_vm *vm, u64 addr,
u64 size, enum pkvm_page_state state)
{
- struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
struct check_walk_data d = {
.desired = state,
.get_page_state = guest_get_page_state,
@@ -896,49 +913,83 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages)
return ret;
}
-int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
+static int __guest_check_transition_size(u64 phys, u64 ipa, u64 nr_pages, u64 *size)
+{
+ if (nr_pages == 1) {
+ *size = PAGE_SIZE;
+ return 0;
+ }
+
+ /* We solely support PMD_SIZE huge-pages */
+ if (nr_pages != (1 << (PMD_SHIFT - PAGE_SHIFT)))
+ return -EINVAL;
+
+ if (!IS_ALIGNED(phys | ipa, PMD_SIZE))
+ return -EINVAL;
+
+ *size = PMD_SIZE;
+ return 0;
+}
+
+int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 phys = hyp_pfn_to_phys(pfn);
u64 ipa = hyp_pfn_to_phys(gfn);
+ enum pkvm_page_state state;
struct hyp_page *page;
+ u64 size;
int ret;
if (prot & ~KVM_PGTABLE_PROT_RWX)
return -EINVAL;
- ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
+ ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
+ if (ret)
+ return ret;
+
+ ret = check_range_allowed_memory(phys, phys + size);
if (ret)
return ret;
host_lock_component();
guest_lock_component(vm);
- ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
+ ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
if (ret)
goto unlock;
- page = hyp_phys_to_page(phys);
- switch (page->host_state) {
+ state = hyp_phys_to_page(phys)->host_state;
+ for_each_hyp_page(phys, size, page) {
+ if (page->host_state != state) {
+ ret = -EPERM;
+ goto unlock;
+ }
+ }
+
+ switch (state) {
case PKVM_PAGE_OWNED:
- WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
+ WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
break;
case PKVM_PAGE_SHARED_OWNED:
- if (page->host_share_guest_count)
- break;
- /* Only host to np-guest multi-sharing is tolerated */
- WARN_ON(1);
- fallthrough;
+ for_each_hyp_page(phys, size, page) {
+ /* Only host to np-guest multi-sharing is tolerated */
+ if (WARN_ON(!page->host_share_guest_count)) {
+ ret = -EPERM;
+ goto unlock;
+ }
+ }
+ break;
default:
ret = -EPERM;
goto unlock;
}
- WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
+ WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
&vcpu->vcpu.arch.pkvm_memcache, 0));
- page->host_share_guest_count++;
+ __host_update_share_guest_count(phys, size, true);
unlock:
guest_unlock_component(vm);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 930b677eb9b0..00fd9a524bf7 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -361,7 +361,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
return -EINVAL;
lockdep_assert_held_write(&kvm->mmu_lock);
- ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
+ ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, 1, prot);
if (ret) {
/* Is the gfn already mapped due to a racing vCPU? */
if (ret == -EPERM)
--
2.48.1.711.g2feabab25a-goog
On Thursday 06 Mar 2025 at 11:00:31 (+0000), Vincent Donnefort wrote:
> +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
> enum kvm_pgtable_prot prot)
> {
> struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
> u64 phys = hyp_pfn_to_phys(pfn);
> u64 ipa = hyp_pfn_to_phys(gfn);
> + enum pkvm_page_state state;
> struct hyp_page *page;
> + u64 size;
> int ret;
>
> if (prot & ~KVM_PGTABLE_PROT_RWX)
> return -EINVAL;
>
> - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
> + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
> + if (ret)
> + return ret;
> +
> + ret = check_range_allowed_memory(phys, phys + size);
> if (ret)
> return ret;
>
> host_lock_component();
> guest_lock_component(vm);
>
> - ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
> + ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
> if (ret)
> goto unlock;
>
> - page = hyp_phys_to_page(phys);
> - switch (page->host_state) {
> + state = hyp_phys_to_page(phys)->host_state;
> + for_each_hyp_page(phys, size, page) {
> + if (page->host_state != state) {
> + ret = -EPERM;
> + goto unlock;
> + }
> + }
> +
> + switch (state) {
> case PKVM_PAGE_OWNED:
> - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
> + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
> break;
> case PKVM_PAGE_SHARED_OWNED:
> - if (page->host_share_guest_count)
> - break;
> - /* Only host to np-guest multi-sharing is tolerated */
> - WARN_ON(1);
> - fallthrough;
> + for_each_hyp_page(phys, size, page) {
> + /* Only host to np-guest multi-sharing is tolerated */
> + if (WARN_ON(!page->host_share_guest_count)) {
> + ret = -EPERM;
> + goto unlock;
> + }
> + }
> + break;
> default:
> ret = -EPERM;
> goto unlock;
> }
>
> - WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
> + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
> pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
> &vcpu->vcpu.arch.pkvm_memcache, 0));
> - page->host_share_guest_count++;
> + __host_update_share_guest_count(phys, size, true);
So we're walking the entire phys range 3 times;
1. to check the host_state is consistent with that of the first
page;
2. to set the state to SHARED_OWNED or to check the
host_share_guest_count;
3. and then again here to update the host share guest count
I feel like we could probably remove at least one loop with a pattern
like so:
for_each_hyp_page(phys, size, page) {
switch (page->state) {
case PKVM_PAGE_OWNED:
continue;
case PKVM_PAGE_SHARED_BORROWED:
if (page->host_shared_guest_count)
continue;
fallthrough;
default;
ret = -EPERM;
goto unlock;
}
}
for_each_hyp_page(phys, size, page) {
page->host_state = PKVM_PAGE_SHARED_OWNED;
page->host_share_guest_count++;
}
That would also tolerate a mix of OWNED and SHARED_OWNED page in the
range, which I'm not sure is needed but it doesn't cost us anything to
support so ... :-)
Wdyt?
> unlock:
> guest_unlock_component(vm);
> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> index 930b677eb9b0..00fd9a524bf7 100644
> --- a/arch/arm64/kvm/pkvm.c
> +++ b/arch/arm64/kvm/pkvm.c
> @@ -361,7 +361,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
> return -EINVAL;
>
> lockdep_assert_held_write(&kvm->mmu_lock);
> - ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
> + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, 1, prot);
> if (ret) {
> /* Is the gfn already mapped due to a racing vCPU? */
> if (ret == -EPERM)
> --
> 2.48.1.711.g2feabab25a-goog
>
On Thu, Apr 03, 2025 at 03:27:15PM +0000, Quentin Perret wrote:
> On Thursday 06 Mar 2025 at 11:00:31 (+0000), Vincent Donnefort wrote:
> > +int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu *vcpu,
> > enum kvm_pgtable_prot prot)
> > {
> > struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
> > u64 phys = hyp_pfn_to_phys(pfn);
> > u64 ipa = hyp_pfn_to_phys(gfn);
> > + enum pkvm_page_state state;
> > struct hyp_page *page;
> > + u64 size;
> > int ret;
> >
> > if (prot & ~KVM_PGTABLE_PROT_RWX)
> > return -EINVAL;
> >
> > - ret = check_range_allowed_memory(phys, phys + PAGE_SIZE);
> > + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);
> > + if (ret)
> > + return ret;
> > +
> > + ret = check_range_allowed_memory(phys, phys + size);
> > if (ret)
> > return ret;
> >
> > host_lock_component();
> > guest_lock_component(vm);
> >
> > - ret = __guest_check_page_state_range(vcpu, ipa, PAGE_SIZE, PKVM_NOPAGE);
> > + ret = __guest_check_page_state_range(vm, ipa, size, PKVM_NOPAGE);
> > if (ret)
> > goto unlock;
> >
> > - page = hyp_phys_to_page(phys);
> > - switch (page->host_state) {
> > + state = hyp_phys_to_page(phys)->host_state;
> > + for_each_hyp_page(phys, size, page) {
> > + if (page->host_state != state) {
> > + ret = -EPERM;
> > + goto unlock;
> > + }
> > + }
> > +
> > + switch (state) {
> > case PKVM_PAGE_OWNED:
> > - WARN_ON(__host_set_page_state_range(phys, PAGE_SIZE, PKVM_PAGE_SHARED_OWNED));
> > + WARN_ON(__host_set_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED));
> > break;
> > case PKVM_PAGE_SHARED_OWNED:
> > - if (page->host_share_guest_count)
> > - break;
> > - /* Only host to np-guest multi-sharing is tolerated */
> > - WARN_ON(1);
> > - fallthrough;
> > + for_each_hyp_page(phys, size, page) {
> > + /* Only host to np-guest multi-sharing is tolerated */
> > + if (WARN_ON(!page->host_share_guest_count)) {
> > + ret = -EPERM;
> > + goto unlock;
> > + }
> > + }
> > + break;
> > default:
> > ret = -EPERM;
> > goto unlock;
> > }
> >
> > - WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, PAGE_SIZE, phys,
> > + WARN_ON(kvm_pgtable_stage2_map(&vm->pgt, ipa, size, phys,
> > pkvm_mkstate(prot, PKVM_PAGE_SHARED_BORROWED),
> > &vcpu->vcpu.arch.pkvm_memcache, 0));
> > - page->host_share_guest_count++;
> > + __host_update_share_guest_count(phys, size, true);
>
> So we're walking the entire phys range 3 times;
>
> 1. to check the host_state is consistent with that of the first
> page;
>
> 2. to set the state to SHARED_OWNED or to check the
> host_share_guest_count;
>
> 3. and then again here to update the host share guest count
>
> I feel like we could probably remove at least one loop with a pattern
> like so:
>
> for_each_hyp_page(phys, size, page) {
> switch (page->state) {
> case PKVM_PAGE_OWNED:
> continue;
> case PKVM_PAGE_SHARED_BORROWED:
> if (page->host_shared_guest_count)
> continue;
> fallthrough;
> default;
> ret = -EPERM;
> goto unlock;
> }
> }
>
> for_each_hyp_page(phys, size, page) {
> page->host_state = PKVM_PAGE_SHARED_OWNED;
> page->host_share_guest_count++;
> }
>
> That would also tolerate a mix of OWNED and SHARED_OWNED page in the
> range, which I'm not sure is needed but it doesn't cost us anything to
> support so ... :-)
>
> Wdyt?
That sounds good, I'll drop __host_update_share_guest_count at the same
time to fold it directly into the share/unshare functions.
>
> > unlock:
> > guest_unlock_component(vm);
> > diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> > index 930b677eb9b0..00fd9a524bf7 100644
> > --- a/arch/arm64/kvm/pkvm.c
> > +++ b/arch/arm64/kvm/pkvm.c
> > @@ -361,7 +361,7 @@ int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
> > return -EINVAL;
> >
> > lockdep_assert_held_write(&kvm->mmu_lock);
> > - ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
> > + ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, 1, prot);
> > if (ret) {
> > /* Is the gfn already mapped due to a racing vCPU? */
> > if (ret == -EPERM)
> > --
> > 2.48.1.711.g2feabab25a-goog
> >
© 2016 - 2026 Red Hat, Inc.