[PATCH v4 16/28] iommu/arm-smmu-v3-kvm: Create array for hyp SMMUv3

Mostafa Saleh posted 28 patches 1 month, 2 weeks ago
[PATCH v4 16/28] iommu/arm-smmu-v3-kvm: Create array for hyp SMMUv3
Posted by Mostafa Saleh 1 month, 2 weeks ago
As the hypervisor has no access to firmware tables, the device discovery
is done from the kernel, where it parses firmware tables and populates a
list of devices to the hypervisor, which later takes over.

At the moment only the device tree is supported.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c   | 93 ++++++++++++++++++-
 .../iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h  | 13 +++
 2 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
index ac4eac6d567f..27ea39c0fb1f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-kvm.c
@@ -7,6 +7,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_pkvm.h>
 
+#include <linux/of_address.h>
 #include <linux/of_platform.h>
 
 #include "arm-smmu-v3.h"
@@ -14,6 +15,75 @@
 
 extern struct kvm_iommu_ops kvm_nvhe_sym(smmu_ops);
 
+static size_t				kvm_arm_smmu_count;
+static struct hyp_arm_smmu_v3_device	*kvm_arm_smmu_array;
+
+static void kvm_arm_smmu_array_free(void)
+{
+	int order;
+
+	order = get_order(kvm_arm_smmu_count * sizeof(*kvm_arm_smmu_array));
+	free_pages((unsigned long)kvm_arm_smmu_array, order);
+}
+
+/*
+ * The hypervisor have to know the basic information about the SMMUs
+ * from the firmware.
+ * This has to be done before the SMMUv3 probes and does anything meaningful
+ * with the hardware, otherwise it becomes harder to reason about the SMMU
+ * state and we'd require to hand-off the state to the hypervisor at certain point
+ * while devices are live, which is complicated and dangerous.
+ * Instead, the hypervisor is interested in a very small part of the probe path,
+ * so just add a separate logic for it.
+ */
+static int kvm_arm_smmu_array_alloc(void)
+{
+	int smmu_order;
+	struct device_node *np;
+	int ret;
+	int i = 0;
+
+	kvm_arm_smmu_count = 0;
+	for_each_compatible_node(np, NULL, "arm,smmu-v3")
+		kvm_arm_smmu_count++;
+
+	if (!kvm_arm_smmu_count)
+		return -ENODEV;
+
+	smmu_order = get_order(kvm_arm_smmu_count * sizeof(*kvm_arm_smmu_array));
+	kvm_arm_smmu_array = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, smmu_order);
+	if (!kvm_arm_smmu_array)
+		return -ENOMEM;
+
+	/* Basic device tree parsing. */
+	for_each_compatible_node(np, NULL, "arm,smmu-v3") {
+		struct resource res;
+
+		ret = of_address_to_resource(np, 0, &res);
+		if (ret)
+			goto out_err;
+		kvm_arm_smmu_array[i].mmio_addr = res.start;
+		kvm_arm_smmu_array[i].mmio_size = resource_size(&res);
+		if (kvm_arm_smmu_array[i].mmio_size < SZ_128K) {
+			pr_err("SMMUv3(%s) has unsupported size(0x%lx)\n", np->name,
+			       kvm_arm_smmu_array[i].mmio_size);
+			ret = -EINVAL;
+			goto out_err;
+		}
+
+		if (of_dma_is_coherent(np))
+			kvm_arm_smmu_array[i].features |= ARM_SMMU_FEAT_COHERENCY;
+
+		i++;
+	}
+
+	return 0;
+
+out_err:
+	kvm_arm_smmu_array_free();
+	return ret;
+}
+
 size_t smmu_hyp_pgt_pages(void)
 {
 	/*
@@ -27,10 +97,31 @@ size_t smmu_hyp_pgt_pages(void)
 
 static int kvm_arm_smmu_v3_register(void)
 {
+	int ret;
+
 	if (!is_protected_kvm_enabled())
 		return 0;
 
-	return kvm_iommu_register_driver(kern_hyp_va(lm_alias(&kvm_nvhe_sym(smmu_ops))));
+	ret = kvm_arm_smmu_array_alloc();
+	if (ret)
+		return ret;
+
+	ret = kvm_iommu_register_driver(kern_hyp_va(lm_alias(&kvm_nvhe_sym(smmu_ops))));
+	if (ret)
+		goto out_err;
+
+	/*
+	 * These variables are stored in the nVHE image, and won't be accessible
+	 * after KVM initialization. Ownership of kvm_arm_smmu_array will be
+	 * transferred to the hypervisor as well.
+	 */
+	kvm_hyp_arm_smmu_v3_smmus = kvm_arm_smmu_array;
+	kvm_hyp_arm_smmu_v3_count = kvm_arm_smmu_count;
+	return ret;
+
+out_err:
+	kvm_arm_smmu_array_free();
+	return ret;
 };
 
 core_initcall(kvm_arm_smmu_v3_register);
diff --git a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
index f6ad91d3fb85..744ee2b7f0b4 100644
--- a/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/pkvm/arm_smmu_v3.h
@@ -4,7 +4,20 @@
 
 #include <asm/kvm_asm.h>
 
+/*
+ * Parameters from the trusted host:
+ * @mmio_addr		base address of the SMMU registers
+ * @mmio_size		size of the registers resource
+ * @features		Features of SMMUv3, subset of the main driver
+ *
+ * Other members are filled and used at runtime by the SMMU driver.
+ * @base		Virtual address of SMMU registers
+ */
 struct hyp_arm_smmu_v3_device {
+	phys_addr_t		mmio_addr;
+	size_t			mmio_size;
+	void __iomem		*base;
+	u32			features;
 };
 
 extern size_t kvm_nvhe_sym(kvm_hyp_arm_smmu_v3_count);
-- 
2.51.0.rc1.167.g924127e9c0-goog
Re: [PATCH v4 16/28] iommu/arm-smmu-v3-kvm: Create array for hyp SMMUv3
Posted by Daniel Mentz 3 weeks, 3 days ago
On Tue, Aug 19, 2025 at 2:55 PM Mostafa Saleh <smostafa@google.com> wrote:
>
> +               if (kvm_arm_smmu_array[i].mmio_size < SZ_128K) {
> +                       pr_err("SMMUv3(%s) has unsupported size(0x%lx)\n", np->name,
> +                              kvm_arm_smmu_array[i].mmio_size);

Use format specifier %pOF to print device tree node.
If mmio_size is a size_t type, use format specifier %zx.
Align language of error message with kernel driver which prints "MMIO
region too small (%pr)\n".
I'm wondering if we should use kvm_err instead of pr_err.
Re: [PATCH v4 16/28] iommu/arm-smmu-v3-kvm: Create array for hyp SMMUv3
Posted by Mostafa Saleh 2 weeks, 3 days ago
On Tue, Sep 09, 2025 at 11:30:48AM -0700, Daniel Mentz wrote:
> On Tue, Aug 19, 2025 at 2:55 PM Mostafa Saleh <smostafa@google.com> wrote:
> >
> > +               if (kvm_arm_smmu_array[i].mmio_size < SZ_128K) {
> > +                       pr_err("SMMUv3(%s) has unsupported size(0x%lx)\n", np->name,
> > +                              kvm_arm_smmu_array[i].mmio_size);
> 
> Use format specifier %pOF to print device tree node.
> If mmio_size is a size_t type, use format specifier %zx.
> Align language of error message with kernel driver which prints "MMIO
> region too small (%pr)\n".

Thanks for catching that, I will fix it in v5.

> I'm wondering if we should use kvm_err instead of pr_err.

I am not sure, kvm_err seems to be used from core arch code only, but
I don't see why not.

Thanks,
Mostafa