[v7] KVM: arm64: Map GPU device memory as cacheable

[PATCH v7 2/5] KVM: arm64: Block cacheable PFNMAP mapping

Posted by ankita@nvidia.com 7 months, 3 weeks ago

From: Ankit Agrawal <ankita@nvidia.com>

Fixes a security bug due to mismatched attributes between S1 and
S2 mapping.

Currently, it is possible for a region to be cacheable in the userspace
VMA, but mapped non cached in S2. This creates a potential issue where
the VMM may sanitize cacheable memory across VMs using cacheable stores,
ensuring it is zeroed. However, if KVM subsequently assigns this memory
to a VM as uncached, the VM could end up accessing stale, non-zeroed data
from a previous VM, leading to unintended data exposure. This is a security
risk.

Block such mismatch attributes case by returning EINVAL when userspace
try to map PFNMAP cacheable. Only allow NORMAL_NC and DEVICE_*.

CC: Oliver Upton <oliver.upton@linux.dev>
CC: Sean Christopherson <seanjc@google.com>
CC: Catalin Marinas <catalin.marinas@arm.com>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Ankit Agrawal <ankita@nvidia.com>
---
 arch/arm64/kvm/mmu.c | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3d77a278fc4f..d6e0d5f46b45 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1470,6 +1470,22 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
+/*
+ * Determine the memory region cacheability from VMA's pgprot. This
+ * is used to set the stage 2 PTEs.
+ */
+static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
+{
+	switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
+	case MT_NORMAL_NC:
+	case MT_DEVICE_nGnRnE:
+	case MT_DEVICE_nGnRE:
+		return false;
+	default:
+		return true;
+	}
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_s2_trans *nested,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1477,7 +1493,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 {
 	int ret = 0;
 	bool write_fault, writable, force_pte = false;
-	bool exec_fault, mte_allowed;
+	bool exec_fault, mte_allowed, is_vma_cacheable = false;
 	bool disable_cmo = false, vfio_allow_any_uc = false;
 	unsigned long mmu_seq;
 	phys_addr_t ipa = fault_ipa;
@@ -1619,6 +1635,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 	vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
 
+	is_vma_cacheable = kvm_vma_is_cacheable(vma);
+
 	/* Don't use the VMA after the unlock -- it may have vanished */
 	vma = NULL;
 
@@ -1643,6 +1661,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		return -EFAULT;
 
 	if (!kvm_can_use_cmo_pfn(pfn)) {
+		if (is_vma_cacheable)
+			return -EINVAL;
+
 		/*
 		 * If the page was identified as device early by looking at
 		 * the VMA flags, vma_pagesize is already representing the
@@ -1726,6 +1747,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		prot |= KVM_PGTABLE_PROT_X;
 
 	if (disable_cmo) {
+		if (is_vma_cacheable) {
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
 		if (vfio_allow_any_uc)
 			prot |= KVM_PGTABLE_PROT_NORMAL_NC;
 		else
@@ -2221,6 +2247,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				ret = -EINVAL;
 				break;
 			}
+
+			/* Cacheable PFNMAP is not allowed */
+			if (kvm_vma_is_cacheable(vma)) {
+				ret = -EINVAL;
+				break;
+			}
 		}
 		hva = min(reg_end, vma->vm_end);
 	} while (hva < reg_end);
-- 
2.34.1

Re: [PATCH v7 2/5] KVM: arm64: Block cacheable PFNMAP mapping

Posted by Catalin Marinas 7 months, 3 weeks ago

On Wed, Jun 18, 2025 at 06:55:38AM +0000, ankita@nvidia.com wrote:
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 3d77a278fc4f..d6e0d5f46b45 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1470,6 +1470,22 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
>  	return vma->vm_flags & VM_MTE_ALLOWED;
>  }
>  
> +/*
> + * Determine the memory region cacheability from VMA's pgprot. This
> + * is used to set the stage 2 PTEs.
> + */
> +static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
> +{
> +	switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
> +	case MT_NORMAL_NC:
> +	case MT_DEVICE_nGnRnE:
> +	case MT_DEVICE_nGnRE:
> +		return false;
> +	default:
> +		return true;
> +	}
> +}
> +
>  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			  struct kvm_s2_trans *nested,
>  			  struct kvm_memory_slot *memslot, unsigned long hva,
> @@ -1477,7 +1493,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  {
>  	int ret = 0;
>  	bool write_fault, writable, force_pte = false;
> -	bool exec_fault, mte_allowed;
> +	bool exec_fault, mte_allowed, is_vma_cacheable = false;

Nit: do we need to initialise is_vma_cacheable here? It did not seem
used until the kvm_vma_is_cacheable() call. Anyway, it's harmless.

>  	bool disable_cmo = false, vfio_allow_any_uc = false;
>  	unsigned long mmu_seq;
>  	phys_addr_t ipa = fault_ipa;
> @@ -1619,6 +1635,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  
>  	vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
>  
> +	is_vma_cacheable = kvm_vma_is_cacheable(vma);
> +
>  	/* Don't use the VMA after the unlock -- it may have vanished */
>  	vma = NULL;
>  
> @@ -1643,6 +1661,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  		return -EFAULT;
>  
>  	if (!kvm_can_use_cmo_pfn(pfn)) {
> +		if (is_vma_cacheable)
> +			return -EINVAL;
> +
>  		/*
>  		 * If the page was identified as device early by looking at
>  		 * the VMA flags, vma_pagesize is already representing the

This block also sets 'disable_cmo' (originally 'device') to true.

> @@ -1726,6 +1747,11 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  		prot |= KVM_PGTABLE_PROT_X;
>  
>  	if (disable_cmo) {
> +		if (is_vma_cacheable) {
> +			ret = -EINVAL;
> +			goto out_unlock;
> +		}

so, is there anything else changing 'disable_cmo' up to this point? If
not, I'd drop the second is_vma_cacheable check.

-- 
Catalin

[PATCH v7 1/5] KVM: arm64: Rename symbols to reflect whether CMO may be used
[PATCH v7 2/5] KVM: arm64: Block cacheable PFNMAP mapping
[PATCH v7 3/5] KVM: arm64: New function to determine hardware cache management support
[PATCH v7 4/5] KVM: arm64: Allow cacheable stage 2 mapping using VMA flags
[PATCH v7 5/5] KVM: arm64: Expose new KVM cap for cacheable PFNMAP