[PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices

Sebastian Ene posted 14 patches 4 weeks, 1 day ago
[PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices
Posted by Sebastian Ene 4 weeks, 1 day ago
Introduce a mechanism to register callbacks for MMIO accesses to regions
unmapped from the host Stage-2 page tables.

This infrastructure allows the hypervisor to intercept host accesses to
protected or emulated devices. When a Stage-2 fault occurs on a
registered device region, the hypervisor will invoke the associated
callback to emulate the access.

Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
 arch/arm64/include/asm/kvm_arm.h      |  3 ++
 arch/arm64/include/asm/kvm_pkvm.h     |  6 ++++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 41 +++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/setup.c       |  3 ++
 4 files changed, 53 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 3f9233b5a130..8fe1e80ab3f4 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -304,6 +304,9 @@
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK	(~UL(0xf))
+
+#define FAR_MASK	GENMASK_ULL(11, 0)
+
 /*
  * We have
  *	PAR	[PA_Shift - 1	: 12] = PA	[PA_Shift - 1 : 12]
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 48ec7d519399..5321ced2f50a 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -19,9 +19,15 @@
 
 #define PKVM_PROTECTED_REGS_NUM	8
 
+struct pkvm_protected_reg;
+
+typedef void (pkvm_emulate_handler)(struct pkvm_protected_reg *region, u64 offset, bool write,
+				    u64 *reg, u8 reg_size);
+
 struct pkvm_protected_reg {
 	u64 start_pfn;
 	size_t num_pages;
+	pkvm_emulate_handler *cb;
 };
 
 extern struct pkvm_protected_reg kvm_nvhe_sym(pkvm_protected_regs)[];
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 7c125836b533..f405d2fbd88f 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -13,6 +13,7 @@
 #include <asm/stage2_pgtable.h>
 
 #include <hyp/fault.h>
+#include <hyp/adjust_pc.h>
 
 #include <nvhe/gfp.h>
 #include <nvhe/memory.h>
@@ -608,6 +609,41 @@ static int host_stage2_idmap(u64 addr)
 	return ret;
 }
 
+static bool handle_host_mmio_trap(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
+{
+	u64 offset, reg_value = 0, start, end;
+	u8 reg_size, reg_index;
+	bool write;
+	int i;
+
+	for (i = 0; i < num_protected_reg; i++) {
+		start = pkvm_protected_regs[i].start_pfn << PAGE_SHIFT;
+		end = start + (pkvm_protected_regs[i].num_pages << PAGE_SHIFT);
+
+		if (start > addr || addr > end)
+			continue;
+
+		reg_size = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
+		reg_index = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
+		write = (esr & ESR_ELx_WNR) == ESR_ELx_WNR;
+		offset = addr - start;
+
+		if (write)
+			reg_value = host_ctxt->regs.regs[reg_index];
+
+		pkvm_protected_regs[i].cb(&pkvm_protected_regs[i], offset, write,
+					  &reg_value, reg_size);
+
+		if (!write)
+			host_ctxt->regs.regs[reg_index] = reg_value;
+
+		kvm_skip_host_instr();
+		return true;
+	}
+
+	return false;
+}
+
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 {
 	struct kvm_vcpu_fault_info fault;
@@ -630,6 +666,11 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 	 */
 	BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
 	addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
+	addr |= fault.far_el2 & FAR_MASK;
+
+	if (ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW && !addr_is_memory(addr) &&
+	    handle_host_mmio_trap(host_ctxt, esr, addr))
+		return;
 
 	ret = host_stage2_idmap(addr);
 	BUG_ON(ret && ret != -EAGAIN);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index ad5b96085e1b..f91dfebe9980 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -296,6 +296,9 @@ static int unmap_protected_regions(void)
 			if (ret)
 				goto err_setup;
 		}
+
+		if (reg->cb)
+			reg->cb = kern_hyp_va(reg->cb);
 	}
 
 	return 0;
-- 
2.53.0.473.g4a7958ca14-goog
Re: [PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices
Posted by Vincent Donnefort 2 weeks, 1 day ago
On Tue, Mar 10, 2026 at 12:49:22PM +0000, Sebastian Ene wrote:
> Introduce a mechanism to register callbacks for MMIO accesses to regions
> unmapped from the host Stage-2 page tables.
> 
> This infrastructure allows the hypervisor to intercept host accesses to
> protected or emulated devices. When a Stage-2 fault occurs on a
> registered device region, the hypervisor will invoke the associated
> callback to emulate the access.
> 
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
>  arch/arm64/include/asm/kvm_arm.h      |  3 ++
>  arch/arm64/include/asm/kvm_pkvm.h     |  6 ++++
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 41 +++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp/nvhe/setup.c       |  3 ++
>  4 files changed, 53 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3f9233b5a130..8fe1e80ab3f4 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -304,6 +304,9 @@
>  
>  /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
>  #define HPFAR_MASK	(~UL(0xf))
> +
> +#define FAR_MASK	GENMASK_ULL(11, 0)
> +
>  /*
>   * We have
>   *	PAR	[PA_Shift - 1	: 12] = PA	[PA_Shift - 1 : 12]
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index 48ec7d519399..5321ced2f50a 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -19,9 +19,15 @@
>  
>  #define PKVM_PROTECTED_REGS_NUM	8
>  
> +struct pkvm_protected_reg;
> +
> +typedef void (pkvm_emulate_handler)(struct pkvm_protected_reg *region, u64 offset, bool write,
> +				    u64 *reg, u8 reg_size);
> +
>  struct pkvm_protected_reg {
>  	u64 start_pfn;
>  	size_t num_pages;
> +	pkvm_emulate_handler *cb;
>  };
>  
>  extern struct pkvm_protected_reg kvm_nvhe_sym(pkvm_protected_regs)[];
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 7c125836b533..f405d2fbd88f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -13,6 +13,7 @@
>  #include <asm/stage2_pgtable.h>
>  
>  #include <hyp/fault.h>
> +#include <hyp/adjust_pc.h>
>  
>  #include <nvhe/gfp.h>
>  #include <nvhe/memory.h>
> @@ -608,6 +609,41 @@ static int host_stage2_idmap(u64 addr)
>  	return ret;
>  }
>  
> +static bool handle_host_mmio_trap(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
> +{
> +	u64 offset, reg_value = 0, start, end;
> +	u8 reg_size, reg_index;
> +	bool write;
> +	int i;
> +
> +	for (i = 0; i < num_protected_reg; i++) {

This is potentially slow for a fast path. As this is an array, we could sort it
and do a binary search, just like find_mem_range?

> +		start = pkvm_protected_regs[i].start_pfn << PAGE_SHIFT;
> +		end = start + (pkvm_protected_regs[i].num_pages << PAGE_SHIFT);
> +
> +		if (start > addr || addr > end)
> +			continue;
> +
> +		reg_size = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
> +		reg_index = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
> +		write = (esr & ESR_ELx_WNR) == ESR_ELx_WNR;
> +		offset = addr - start;
> +
> +		if (write)
> +			reg_value = host_ctxt->regs.regs[reg_index];
> +
> +		pkvm_protected_regs[i].cb(&pkvm_protected_regs[i], offset, write,
> +					  &reg_value, reg_size);
> +
> +		if (!write)
> +			host_ctxt->regs.regs[reg_index] = reg_value;
> +
> +		kvm_skip_host_instr();
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
>  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>  {
>  	struct kvm_vcpu_fault_info fault;
> @@ -630,6 +666,11 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>  	 */
>  	BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
>  	addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
> +	addr |= fault.far_el2 & FAR_MASK;
> +
> +	if (ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW && !addr_is_memory(addr) &&
> +	    handle_host_mmio_trap(host_ctxt, esr, addr))
> +		return;
>  
>  	ret = host_stage2_idmap(addr);
>  	BUG_ON(ret && ret != -EAGAIN);
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index ad5b96085e1b..f91dfebe9980 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -296,6 +296,9 @@ static int unmap_protected_regions(void)
>  			if (ret)
>  				goto err_setup;
>  		}
> +
> +		if (reg->cb)
> +			reg->cb = kern_hyp_va(reg->cb);
>  	}
>  
>  	return 0;
> -- 
> 2.53.0.473.g4a7958ca14-goog
>
Re: [PATCH 03/14] KVM: arm64: Support host MMIO trap handlers for unmapped devices
Posted by Fuad Tabba 3 weeks, 5 days ago
Hi Sebastian,

On Tue, 10 Mar 2026 at 12:49, Sebastian Ene <sebastianene@google.com> wrote:
>
> Introduce a mechanism to register callbacks for MMIO accesses to regions
> unmapped from the host Stage-2 page tables.
>
> This infrastructure allows the hypervisor to intercept host accesses to
> protected or emulated devices. When a Stage-2 fault occurs on a
> registered device region, the hypervisor will invoke the associated
> callback to emulate the access.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
>  arch/arm64/include/asm/kvm_arm.h      |  3 ++
>  arch/arm64/include/asm/kvm_pkvm.h     |  6 ++++
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c | 41 +++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp/nvhe/setup.c       |  3 ++
>  4 files changed, 53 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 3f9233b5a130..8fe1e80ab3f4 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -304,6 +304,9 @@
>
>  /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
>  #define HPFAR_MASK     (~UL(0xf))
> +
> +#define FAR_MASK       GENMASK_ULL(11, 0)
> +
>  /*
>   * We have
>   *     PAR     [PA_Shift - 1   : 12] = PA      [PA_Shift - 1 : 12]
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index 48ec7d519399..5321ced2f50a 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -19,9 +19,15 @@
>
>  #define PKVM_PROTECTED_REGS_NUM        8
>
> +struct pkvm_protected_reg;
> +
> +typedef void (pkvm_emulate_handler)(struct pkvm_protected_reg *region, u64 offset, bool write,
> +                                   u64 *reg, u8 reg_size);
> +
>  struct pkvm_protected_reg {
>         u64 start_pfn;
>         size_t num_pages;
> +       pkvm_emulate_handler *cb;
>  };
>
>  extern struct pkvm_protected_reg kvm_nvhe_sym(pkvm_protected_regs)[];
> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 7c125836b533..f405d2fbd88f 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -13,6 +13,7 @@
>  #include <asm/stage2_pgtable.h>
>
>  #include <hyp/fault.h>
> +#include <hyp/adjust_pc.h>

Please sort includes alphabetically.

>
>  #include <nvhe/gfp.h>
>  #include <nvhe/memory.h>
> @@ -608,6 +609,41 @@ static int host_stage2_idmap(u64 addr)
>         return ret;
>  }
>
> +static bool handle_host_mmio_trap(struct kvm_cpu_context *host_ctxt, u64 esr, u64 addr)
> +{
> +       u64 offset, reg_value = 0, start, end;
> +       u8 reg_size, reg_index;
> +       bool write;
> +       int i;

What do you plan to do if there is no valid syndrome, i.e.,
ESR_EL2.ISV == 0? I am still reviewing, so maybe this is solved in a
future patch, or maybe you know that, in practice, all instructions
would have a valid syndrome. Regardless of which it is, you should
definitely add the following check to _this_ patch (or reconsider the
approach if it is possible to get legit accesses with ESR_EL2.ISV ==
0):

+      if (!(esr & ESR_ELx_ISV))
+              return false;

> +
> +       for (i = 0; i < num_protected_reg; i++) {
> +               start = pkvm_protected_regs[i].start_pfn << PAGE_SHIFT;
> +               end = start + (pkvm_protected_regs[i].num_pages << PAGE_SHIFT);
> +
> +               if (start > addr || addr > end)

Because end is calculated by adding the size, it represents the first
byte after the region, so this should be:
+               if (start > addr || addr >= end)
> +                       continue;

You also need to make sure that the entire access fits within the
protected region, to avoid a malicious or misaligned cross-boundary
access, i.e.:

+                if (addr + reg_size > end)
+                        return false;


> +               reg_size = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT);
> +               reg_index = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT;
> +               write = (esr & ESR_ELx_WNR) == ESR_ELx_WNR;
> +               offset = addr - start;
> +
> +               if (write)
> +                       reg_value = host_ctxt->regs.regs[reg_index];

You need to handle the zero register (index 31) for writes, e.g.:
+                       reg_value = (reg_index == 31) ? 0 :
host_ctxt->regs.regs[reg_index];

> +
> +               pkvm_protected_regs[i].cb(&pkvm_protected_regs[i], offset, write,
> +                                         &reg_value, reg_size);
> +
> +               if (!write)
> +                       host_ctxt->regs.regs[reg_index] = reg_value;

and for reads:
+               if (!write & reg_index != 31)

Cheers,
/fuad

> +
> +               kvm_skip_host_instr();
> +               return true;
> +       }
> +
> +       return false;
> +}
> +
>  void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>  {
>         struct kvm_vcpu_fault_info fault;
> @@ -630,6 +666,11 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
>          */
>         BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
>         addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;
> +       addr |= fault.far_el2 & FAR_MASK;
> +
> +       if (ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW && !addr_is_memory(addr) &&
> +           handle_host_mmio_trap(host_ctxt, esr, addr))
> +               return;
>
>         ret = host_stage2_idmap(addr);
>         BUG_ON(ret && ret != -EAGAIN);
> diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
> index ad5b96085e1b..f91dfebe9980 100644
> --- a/arch/arm64/kvm/hyp/nvhe/setup.c
> +++ b/arch/arm64/kvm/hyp/nvhe/setup.c
> @@ -296,6 +296,9 @@ static int unmap_protected_regions(void)
>                         if (ret)
>                                 goto err_setup;
>                 }
> +
> +               if (reg->cb)
> +                       reg->cb = kern_hyp_va(reg->cb);
>         }
>
>         return 0;
> --
> 2.53.0.473.g4a7958ca14-goog
>