hw/arm: Introduce Tegra241 CMDQV support for accelerated SMMUv3

[RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Shameer Kolothum 2 months ago

From: Nicolin Chen <nicolinc@nvidia.com>

Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
64 KB pages:
 - Page0: guest accessible control/status registers for all VCMDQs
 - Page1: configuration registers (queue GPA/size) that must be trapped
          by the VMM and translated before programming the HW queue.

This patch implements the Page0 handling in QEMU. Using the vintf offset
returned by IOMMUFD during VIOMMU allocation, QEMU maps Page0 into
guest physical address space and exposes it via two guest MMIO windows:
 - 0x10000 :VCMDQ register
 - 0x30000 :VINTF register

The mapping is lazily initialized on first read/write.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/tegra241-cmdqv.c | 60 +++++++++++++++++++++++++++++++++++++++++
 hw/arm/tegra241-cmdqv.h |  5 ++++
 2 files changed, 65 insertions(+)

diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
index 899325877e..d8858322dc 100644
--- a/hw/arm/tegra241-cmdqv.c
+++ b/hw/arm/tegra241-cmdqv.c
@@ -13,14 +13,74 @@
 #include "smmuv3-accel.h"
 #include "tegra241-cmdqv.h"
 
+static bool tegra241_cmdqv_init_vcmdq_page0(Tegra241CMDQV *cmdqv, Error **errp)
+{
+    SMMUv3State *smmu = cmdqv->smmu;
+    SMMUv3AccelState *s_accel = smmu->s_accel;
+    IOMMUFDViommu *viommu;
+    char *name;
+
+    if (!s_accel) {
+        return true;
+    }
+
+    viommu = &s_accel->viommu;
+    if (!iommufd_backend_viommu_mmap(viommu->iommufd, viommu->viommu_id,
+                                     VCMDQ_REG_PAGE_SIZE,
+                                     cmdqv->cmdqv_data.out_vintf_mmap_offset,
+                                     &cmdqv->vcmdq_page0, errp)) {
+        cmdqv->vcmdq_page0 = NULL;
+        return false;
+    }
+
+    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
+    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
+                                      memory_region_owner(&cmdqv->mmio_cmdqv),
+                                      name, 0x10000, cmdqv->vcmdq_page0);
+    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
+                                        &cmdqv->mmio_vcmdq_page, 1);
+    g_free(name);
+
+    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
+    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
+                                      memory_region_owner(&cmdqv->mmio_cmdqv),
+                                      name, 0x10000, cmdqv->vcmdq_page0);
+    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x30000,
+                                        &cmdqv->mmio_vintf_page, 1);
+    g_free(name);
+
+    return true;
+}
+
 static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
 {
+    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
+    Error *local_err = NULL;
+
+    if (!cmdqv->vcmdq_page0) {
+        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            local_err = NULL;
+        }
+    }
+
     return 0;
 }
 
 static void tegra241_cmdqv_write(void *opaque, hwaddr offset, uint64_t value,
                                  unsigned size)
 {
+    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
+    Error *local_err = NULL;
+
+    if (!cmdqv->vcmdq_page0) {
+        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            local_err = NULL;
+        }
+    }
 }
 
 static const MemoryRegionOps mmio_cmdqv_ops = {
diff --git a/hw/arm/tegra241-cmdqv.h b/hw/arm/tegra241-cmdqv.h
index 9bc72b24d9..ccdf0651be 100644
--- a/hw/arm/tegra241-cmdqv.h
+++ b/hw/arm/tegra241-cmdqv.h
@@ -19,8 +19,13 @@ typedef struct Tegra241CMDQV {
     SMMUv3State *smmu;
     MemoryRegion mmio_cmdqv;
     qemu_irq irq;
+    MemoryRegion mmio_vcmdq_page;
+    MemoryRegion mmio_vintf_page;
+    void *vcmdq_page0;
 } Tegra241CMDQV;
 
+#define VCMDQ_REG_PAGE_SIZE 0x10000
+
 #ifdef CONFIG_TEGRA241_CMDQV
 bool tegra241_cmdqv_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
                                  uint32_t *out_viommu_id, Error **errp);
-- 
2.43.0

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Eric Auger 2 weeks ago


On 12/10/25 2:37 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> 64 KB pages:
>  - Page0: guest accessible control/status registers for all VCMDQs
>  - Page1: configuration registers (queue GPA/size) that must be trapped
>           by the VMM and translated before programming the HW queue.
>
> This patch implements the Page0 handling in QEMU. Using the vintf offset
> returned by IOMMUFD during VIOMMU allocation, QEMU maps Page0 into
> guest physical address space and exposes it via two guest MMIO windows:
>  - 0x10000 :VCMDQ register
>  - 0x30000 :VINTF register

I would recommend to have a drawing showing the mr container, the
subregions, their offsets, and content/nature of each region

Eric
>
> The mapping is lazily initialized on first read/write.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
>  hw/arm/tegra241-cmdqv.c | 60 +++++++++++++++++++++++++++++++++++++++++
>  hw/arm/tegra241-cmdqv.h |  5 ++++
>  2 files changed, 65 insertions(+)
>
> diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
> index 899325877e..d8858322dc 100644
> --- a/hw/arm/tegra241-cmdqv.c
> +++ b/hw/arm/tegra241-cmdqv.c
> @@ -13,14 +13,74 @@
>  #include "smmuv3-accel.h"
>  #include "tegra241-cmdqv.h"
>  
> +static bool tegra241_cmdqv_init_vcmdq_page0(Tegra241CMDQV *cmdqv, Error **errp)
> +{
> +    SMMUv3State *smmu = cmdqv->smmu;
> +    SMMUv3AccelState *s_accel = smmu->s_accel;
> +    IOMMUFDViommu *viommu;
> +    char *name;
> +
> +    if (!s_accel) {
> +        return true;
> +    }
> +
> +    viommu = &s_accel->viommu;
> +    if (!iommufd_backend_viommu_mmap(viommu->iommufd, viommu->viommu_id,
> +                                     VCMDQ_REG_PAGE_SIZE,
> +                                     cmdqv->cmdqv_data.out_vintf_mmap_offset,
> +                                     &cmdqv->vcmdq_page0, errp)) {
> +        cmdqv->vcmdq_page0 = NULL;
> +        return false;
> +    }
> +
> +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
> +                                        &cmdqv->mmio_vcmdq_page, 1);
> +    g_free(name);
> +
> +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x30000,
> +                                        &cmdqv->mmio_vintf_page, 1);
> +    g_free(name);
> +
> +    return true;
> +}
> +
>  static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
>  {
> +    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
> +    Error *local_err = NULL;
> +
> +    if (!cmdqv->vcmdq_page0) {
> +        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            local_err = NULL;
> +        }
> +    }
> +
>      return 0;
>  }
>  
>  static void tegra241_cmdqv_write(void *opaque, hwaddr offset, uint64_t value,
>                                   unsigned size)
>  {
> +    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
> +    Error *local_err = NULL;
> +
> +    if (!cmdqv->vcmdq_page0) {
> +        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            local_err = NULL;
> +        }
> +    }
>  }
>  
>  static const MemoryRegionOps mmio_cmdqv_ops = {
> diff --git a/hw/arm/tegra241-cmdqv.h b/hw/arm/tegra241-cmdqv.h
> index 9bc72b24d9..ccdf0651be 100644
> --- a/hw/arm/tegra241-cmdqv.h
> +++ b/hw/arm/tegra241-cmdqv.h
> @@ -19,8 +19,13 @@ typedef struct Tegra241CMDQV {
>      SMMUv3State *smmu;
>      MemoryRegion mmio_cmdqv;
>      qemu_irq irq;
> +    MemoryRegion mmio_vcmdq_page;
> +    MemoryRegion mmio_vintf_page;
> +    void *vcmdq_page0;
>  } Tegra241CMDQV;
>  
> +#define VCMDQ_REG_PAGE_SIZE 0x10000
> +
>  #ifdef CONFIG_TEGRA241_CMDQV
>  bool tegra241_cmdqv_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
>                                   uint32_t *out_viommu_id, Error **errp);

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Eric Auger 2 weeks ago

Hi Shameer,

On 12/10/25 2:37 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> 64 KB pages:
>  - Page0: guest accessible control/status registers for all VCMDQs
>  - Page1: configuration registers (queue GPA/size) that must be trapped
>           by the VMM and translated before programming the HW queue.
>
> This patch implements the Page0 handling in QEMU. Using the vintf offset
> returned by IOMMUFD during VIOMMU allocation, QEMU maps Page0 into
> guest physical address space and exposes it via two guest MMIO windows:
>  - 0x10000 :VCMDQ register
>  - 0x30000 :VINTF register
>
> The mapping is lazily initialized on first read/write.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> ---
>  hw/arm/tegra241-cmdqv.c | 60 +++++++++++++++++++++++++++++++++++++++++
>  hw/arm/tegra241-cmdqv.h |  5 ++++
>  2 files changed, 65 insertions(+)
>
> diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
> index 899325877e..d8858322dc 100644
> --- a/hw/arm/tegra241-cmdqv.c
> +++ b/hw/arm/tegra241-cmdqv.c
> @@ -13,14 +13,74 @@
>  #include "smmuv3-accel.h"
>  #include "tegra241-cmdqv.h"
>  
> +static bool tegra241_cmdqv_init_vcmdq_page0(Tegra241CMDQV *cmdqv, Error **errp)
> +{
> +    SMMUv3State *smmu = cmdqv->smmu;
> +    SMMUv3AccelState *s_accel = smmu->s_accel;
> +    IOMMUFDViommu *viommu;
> +    char *name;
> +
> +    if (!s_accel) {
> +        return true;
> +    }
> +
> +    viommu = &s_accel->viommu;
> +    if (!iommufd_backend_viommu_mmap(viommu->iommufd, viommu->viommu_id,
> +                                     VCMDQ_REG_PAGE_SIZE,
> +                                     cmdqv->cmdqv_data.out_vintf_mmap_offset,
> +                                     &cmdqv->vcmdq_page0, errp)) {
> +        cmdqv->vcmdq_page0 = NULL;
> +        return false;
> +    }
> +
> +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
> +                                        &cmdqv->mmio_vcmdq_page, 1);
> +    g_free(name);
> +
> +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
I don't get why we need/have 2 RAM devices pointing to the same @ptr
= cmdqv->vcmdq_page0. Is 0x10000 ~ VCMDQ_REG_PAGE_SIZE?

The names of the MRs are quite confusing: If my understanding is correct
we have;

cmdqv->mmio_cmdqv (0x50000 sized) which as the container for the 2 subregions.
Then within that one we have 2 subregions, one at offset 0x10000 (mmio_vcmdq_page
), one at offset 0x30000 (cmdqv->mmio_vintf_page).

I have difficulties to link that with the commit message

"Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
64 KB pages:
 - Page0: guest accessible control/status registers for all VCMDQs
 - Page1:configuration registers  ../.."
Those 2 pages, are they part of  mmio_vcmdq_page?

Then you talk about 0x30000 :VINTF register I guess this is the second cmdqv->mmio_vintf_page

Well I am confused at this stage of the reading.

Also without any spec, this is difficult to understand. Is there any public doc?



> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x30000,
> +                                        &cmdqv->mmio_vintf_page, 1);
> +    g_free(name);
> +
> +    return true;
> +}
> +
>  static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
>  {
> +    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
> +    Error *local_err = NULL;
> +
> +    if (!cmdqv->vcmdq_page0) {
> +        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            local_err = NULL;
> +        }
> +    }
> +
>      return 0;
>  }
>  
>  static void tegra241_cmdqv_write(void *opaque, hwaddr offset, uint64_t value,
>                                   unsigned size)
>  {
> +    Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
> +    Error *local_err = NULL;
> +
> +    if (!cmdqv->vcmdq_page0) {
> +        tegra241_cmdqv_init_vcmdq_page0(cmdqv, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            local_err = NULL;
> +        }
> +    }
>  }
>  
>  static const MemoryRegionOps mmio_cmdqv_ops = {
> diff --git a/hw/arm/tegra241-cmdqv.h b/hw/arm/tegra241-cmdqv.h
> index 9bc72b24d9..ccdf0651be 100644
> --- a/hw/arm/tegra241-cmdqv.h
> +++ b/hw/arm/tegra241-cmdqv.h
> @@ -19,8 +19,13 @@ typedef struct Tegra241CMDQV {
>      SMMUv3State *smmu;
>      MemoryRegion mmio_cmdqv;
>      qemu_irq irq;
> +    MemoryRegion mmio_vcmdq_page;
please also use _mr suffix as in the rest of the code
> +    MemoryRegion mmio_vintf_page;
> +    void *vcmdq_page0;
>  } Tegra241CMDQV;
>  
> +#define VCMDQ_REG_PAGE_SIZE 0x10000
> +
>  #ifdef CONFIG_TEGRA241_CMDQV
>  bool tegra241_cmdqv_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
>                                   uint32_t *out_viommu_id, Error **errp);
Thanks

Eric

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Nicolin Chen via qemu development 1 week, 6 days ago

On Mon, Jan 26, 2026 at 02:48:55PM +0100, Eric Auger wrote:

> > +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
> > +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
> > +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> > +                                      name, 0x10000, cmdqv->vcmdq_page0);
> > +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
> > +                                        &cmdqv->mmio_vcmdq_page, 1);
> > +    g_free(name);
> > +
> > +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
> > +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
> > +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> > +                                      name, 0x10000, cmdqv->vcmdq_page0);

> I don't get why we need/have 2 RAM devices pointing to the same @ptr
> = cmdqv->vcmdq_page0. Is 0x10000 ~ VCMDQ_REG_PAGE_SIZE?

The first one is for "vcmdq" and the second one is for "vintf".
Explaining below...

> The names of the MRs are quite confusing: If my understanding is correct
> we have;
> 
> cmdqv->mmio_cmdqv (0x50000 sized) which as the container for the 2 subregions.
> Then within that one we have 2 subregions, one at offset 0x10000 (mmio_vcmdq_page
> ), one at offset 0x30000 (cmdqv->mmio_vintf_page).

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c?h=v6.19-rc7#n19
It's defined in the kernel driver (yea, we should clarify in the
QEMU code as well).

So, the MMIO regions look like this:
 1st 64 KB page -- Global CMDQV registers
 2nd 64 KB page -- Global VCMDQ registers Page0 (mmap)
 3rd 64 KB page -- Global VCMDQ registers Page1 (trap)
 4th 64 KB page -- VINTF0 Logical VCMDQ registers Page0 (mmap)
 5th 64 KB page -- VINTF0 Logical VCMDQ registers Page1 (trap)

In real hardware, there will be 6th 64KB and beyond, for VINTF1
and others. But, we're omitting here in QEMU as only VINTF0 will
be supported -- kernel only exposes one VINTF per VM as well.
(Yes, we should clarify this too.)

> I have difficulties to link that with the commit message
> 
> "Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> 64 KB pages:
>  - Page0: guest accessible control/status registers for all VCMDQs
>  - Page1:configuration registers  ../.."
> Those 2 pages, are they part of  mmio_vcmdq_page?

Not exactly. Both the global VCMDQ region and VINTF region have
their own pages (at 0x10000 and 0x30000).

Here, it duplicates the mapping to 0x10000 (Global VCMDQ page0)
and 0x30000 (VINTF0 page0) for simplification, because we only
support VINTF0 in this case.

There is a little catch in this implementation. The real physical
mapping between a global VCMDQ and a logical VCMDQ happens when
QEMU calls HW_QUEUE ioctl. So, the mmap'd page0 doesn't have any
real VCMDQ backing up the emulated VCMDQ. So, perhaps QEMU should
trap the page0 and delay the memory_region_init_ram_device_ptr()
until the HW_QUEUE ioctl is done?

There might be also a corner case: when the kernel exposes two
physical VCMDQs, but the guest OS only uses one, i.e. QEMU only
allocates one HW_QUEUE for VCMDQ0 but doesn't allocate VCMDQ1.
In such a case, the VTINF0 page0 should be able to control the
logical VCMDQ0 only, while the global page0 should control both.

We are getting away this corner case with any guest OS running
Linux kernel because it only accesses VTINF pages. But likely we
should do something about it..

> Then you talk about 0x30000 :VINTF register I guess this is the second cmdqv->mmio_vintf_page
> 
> Well I am confused at this stage of the reading.
> 
> Also without any spec, this is difficult to understand. Is there any public doc?

It seems that Red Hat can get the doc under NDA..

Thanks
Nicolin

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Eric Auger 1 week, 6 days ago

Hi Nicolin,

On 1/27/26 1:04 AM, Nicolin Chen wrote:
> On Mon, Jan 26, 2026 at 02:48:55PM +0100, Eric Auger wrote:
>
>>> +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
>>> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
>>> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
>>> +                                      name, 0x10000, cmdqv->vcmdq_page0);
>>> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
>>> +                                        &cmdqv->mmio_vcmdq_page, 1);
>>> +    g_free(name);
>>> +
>>> +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
>>> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
>>> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
>>> +                                      name, 0x10000, cmdqv->vcmdq_page0);
>> I don't get why we need/have 2 RAM devices pointing to the same @ptr
>> = cmdqv->vcmdq_page0. Is 0x10000 ~ VCMDQ_REG_PAGE_SIZE?
> The first one is for "vcmdq" and the second one is for "vintf".
> Explaining below...
>
>> The names of the MRs are quite confusing: If my understanding is correct
>> we have;
>>
>> cmdqv->mmio_cmdqv (0x50000 sized) which as the container for the 2 subregions.
>> Then within that one we have 2 subregions, one at offset 0x10000 (mmio_vcmdq_page
>> ), one at offset 0x30000 (cmdqv->mmio_vintf_page).
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c?h=v6.19-rc7#n19
> It's defined in the kernel driver (yea, we should clarify in the
> QEMU code as well).
>
> So, the MMIO regions look like this:
>  1st 64 KB page -- Global CMDQV registers
>  2nd 64 KB page -- Global VCMDQ registers Page0 (mmap)
>  3rd 64 KB page -- Global VCMDQ registers Page1 (trap)
>  4th 64 KB page -- VINTF0 Logical VCMDQ registers Page0 (mmap)
>  5th 64 KB page -- VINTF0 Logical VCMDQ registers Page1 (trap)

This kind of layout representation is really helpful
>
> In real hardware, there will be 6th 64KB and beyond, for VINTF1
> and others. But, we're omitting here in QEMU as only VINTF0 will
> be supported -- kernel only exposes one VINTF per VM as well.
> (Yes, we should clarify this too.)
OK
>
>> I have difficulties to link that with the commit message
>>
>> "Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
>> 64 KB pages:
>>  - Page0: guest accessible control/status registers for all VCMDQs
>>  - Page1:configuration registers  ../.."
>> Those 2 pages, are they part of  mmio_vcmdq_page?
> Not exactly. Both the global VCMDQ region and VINTF region have
> their own pages (at 0x10000 and 0x30000).
>
> Here, it duplicates the mapping to 0x10000 (Global VCMDQ page0)
> and 0x30000 (VINTF0 page0) for simplification, because we only
> support VINTF0 in this case.
so Global VCMDQ registers Page0 and VINTF0 Logical VCMDQ registers Page0
are basically the same?

I would recommend to use cmdqv->mmio_vcmdq_page0 and
cmdqv->mmio_vintf_page0 to avoid any misunderstanding
>
> There is a little catch in this implementation. The real physical
> mapping between a global VCMDQ and a logical VCMDQ happens when
> QEMU calls HW_QUEUE ioctl. So, the mmap'd page0 doesn't have any
> real VCMDQ backing up the emulated VCMDQ. So, perhaps QEMU should
> trap the page0 and delay the memory_region_init_ram_device_ptr()
> until the HW_QUEUE ioctl is done?
might be safer indeed.
>
> There might be also a corner case: when the kernel exposes two
> physical VCMDQs, but the guest OS only uses one, i.e. QEMU only
> allocates one HW_QUEUE for VCMDQ0 but doesn't allocate VCMDQ1.
> In such a case, the VTINF0 page0 should be able to control the
> logical VCMDQ0 only, while the global page0 should control both.

you lost me. Need to look at the kernel or spec ;-)
>
> We are getting away this corner case with any guest OS running
> Linux kernel because it only accesses VTINF pages. But likely we
> should do something about it..
>
>> Then you talk about 0x30000 :VINTF register I guess this is the second cmdqv->mmio_vintf_page
>>
>> Well I am confused at this stage of the reading.
>>
>> Also without any spec, this is difficult to understand. Is there any public doc?
> It seems that Red Hat can get the doc under NDA..
OK thanks

Eric
>
> Thanks
> Nicolin
>

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Nicolin Chen via qemu development 1 week, 5 days ago

On Tue, Jan 27, 2026 at 02:23:33PM +0100, Eric Auger wrote:
> On 1/27/26 1:04 AM, Nicolin Chen wrote:
> > On Mon, Jan 26, 2026 at 02:48:55PM +0100, Eric Auger wrote:
> > So, the MMIO regions look like this:
> >  1st 64 KB page -- Global CMDQV registers
> >  2nd 64 KB page -- Global VCMDQ registers Page0 (mmap)
> >  3rd 64 KB page -- Global VCMDQ registers Page1 (trap)
> >  4th 64 KB page -- VINTF0 Logical VCMDQ registers Page0 (mmap)
> >  5th 64 KB page -- VINTF0 Logical VCMDQ registers Page1 (trap)
[...]
> >> I have difficulties to link that with the commit message
> >>
> >> "Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> >> 64 KB pages:
> >>  - Page0: guest accessible control/status registers for all VCMDQs
> >>  - Page1:configuration registers  ../.."
> >> Those 2 pages, are they part of  mmio_vcmdq_page?
> > Not exactly. Both the global VCMDQ region and VINTF region have
> > their own pages (at 0x10000 and 0x30000).
> >
> > Here, it duplicates the mapping to 0x10000 (Global VCMDQ page0)
> > and 0x30000 (VINTF0 page0) for simplification, because we only
> > support VINTF0 in this case.

> so Global VCMDQ registers Page0 and VINTF0 Logical VCMDQ registers Page0
> are basically the same?

Not exactly the same.

The global page0 is programmable at any time so long as CMDQV_EN
is enabled.

The logical page0 is programmable only when SW allocates and maps
global vcmdq(s) to a VINTF. "logical" also means "local" to that
VINTF.

> I would recommend to use cmdqv->mmio_vcmdq_page0 and
> cmdqv->mmio_vintf_page0 to avoid any misunderstanding

Yea, that makes sense to me.

> > There is a little catch in this implementation. The real physical
> > mapping between a global VCMDQ and a logical VCMDQ happens when
> > QEMU calls HW_QUEUE ioctl. So, the mmap'd page0 doesn't have any
> > real VCMDQ backing up the emulated VCMDQ. So, perhaps QEMU should
> > trap the page0 and delay the memory_region_init_ram_device_ptr()
> > until the HW_QUEUE ioctl is done?
> might be safer indeed.
> >
> > There might be also a corner case: when the kernel exposes two
> > physical VCMDQs, but the guest OS only uses one, i.e. QEMU only
> > allocates one HW_QUEUE for VCMDQ0 but doesn't allocate VCMDQ1.
> > In such a case, the VTINF0 page0 should be able to control the
> > logical VCMDQ0 only, while the global page0 should control both.
> 
> you lost me. Need to look at the kernel or spec ;-)

That was for supporting a partial local vcmdq mapping with QEMU,
as guest OS is allowed to do so from HW simulation perspective.

E.g.

A VTINF exposed by the kernel supports maximum 2 VCMDQs. IOW, VM
owns 2 global vcmdqs. And mmio_vcmdq_page0 is supposed to have the
access to both vcmdqs.

Kernel only exposes physical VINTF's page0 via mmap, for security
reason, which starts with 0 logical vcmdq, i.e. no access to any
global vcmdq via mmio_vcmdq_page0 or mmio_vintf_page0.

A guest SW only allocates and maps one global vcmdq to the VINTF,
which means only one HW_QUEUE ioctl is invoked so that the kernel
only maps one global vcmdq accordingly. Then, both page0 only has
the access to one global vcmdq via its logical mapping.

In such a case, memory_region_init_ram_device_ptr() to 0x10000
for mmio_vcmdq_page0 is basically wrong, since it's supposed to
have access to both global vcmdqs.

So, this makes things uneasy to support.

What we could likely do:
  - only mmap mmio_vintf_page0 to 0x30000 (logical page0)
  - drop mmio_vcmdq_page0 and trap 0x10000 (global page0)
    a) if vcmdq is indexed to a mapped global vcmdq, read/write
       mmio_vintf_page0.
    b) if vcmdq is indexed to an unmapped global vcmdq, read/write
       the value in the register array cached by QEMU.

This would likely slow down the perf if guest OS is using a LVCMDQ
via the global page0. But functional wise it should be okay.

Nicolin

RE: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Shameer Kolothum 1 week, 6 days ago

Hi Eric,

> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 26 January 2026 13:49
> To: Shameer Kolothum <skolothumtho@nvidia.com>; qemu-
> arm@nongnu.org; qemu-devel@nongnu.org
> Cc: peter.maydell@linaro.org; Nicolin Chen <nicolinc@nvidia.com>; Nathan
> Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>; Jason
> Gunthorpe <jgg@nvidia.com>; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com; Krishnakant Jaju
> <kjaju@nvidia.com>
> Subject: Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0
> into guest
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 12/10/25 2:37 PM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> > 64 KB pages:
> >  - Page0: guest accessible control/status registers for all VCMDQs
> >  - Page1: configuration registers (queue GPA/size) that must be trapped
> >           by the VMM and translated before programming the HW queue.
> >
> > This patch implements the Page0 handling in QEMU. Using the vintf offset
> > returned by IOMMUFD during VIOMMU allocation, QEMU maps Page0 into
> > guest physical address space and exposes it via two guest MMIO windows:
> >  - 0x10000 :VCMDQ register
> >  - 0x30000 :VINTF register
> >
> > The mapping is lazily initialized on first read/write.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
> > ---
> >  hw/arm/tegra241-cmdqv.c | 60
> +++++++++++++++++++++++++++++++++++++++++
> >  hw/arm/tegra241-cmdqv.h |  5 ++++
> >  2 files changed, 65 insertions(+)
> >
> > diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
> > index 899325877e..d8858322dc 100644
> > --- a/hw/arm/tegra241-cmdqv.c
> > +++ b/hw/arm/tegra241-cmdqv.c
> > @@ -13,14 +13,74 @@
> >  #include "smmuv3-accel.h"
> >  #include "tegra241-cmdqv.h"
> >
> > +static bool tegra241_cmdqv_init_vcmdq_page0(Tegra241CMDQV *cmdqv,
> Error **errp)
> > +{
> > +    SMMUv3State *smmu = cmdqv->smmu;
> > +    SMMUv3AccelState *s_accel = smmu->s_accel;
> > +    IOMMUFDViommu *viommu;
> > +    char *name;
> > +
> > +    if (!s_accel) {
> > +        return true;
> > +    }
> > +
> > +    viommu = &s_accel->viommu;
> > +    if (!iommufd_backend_viommu_mmap(viommu->iommufd, viommu-
> >viommu_id,
> > +                                     VCMDQ_REG_PAGE_SIZE,
> > +                                     cmdqv->cmdqv_data.out_vintf_mmap_offset,
> > +                                     &cmdqv->vcmdq_page0, errp)) {
> > +        cmdqv->vcmdq_page0 = NULL;
> > +        return false;
> > +    }
> > +
> > +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv-
> >mmio_cmdqv));
> > +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
> > +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> > +                                      name, 0x10000, cmdqv->vcmdq_page0);
> > +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv,
> 0x10000,
> > +                                        &cmdqv->mmio_vcmdq_page, 1);
> > +    g_free(name);
> > +
> > +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv-
> >mmio_cmdqv));
> > +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
> > +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> > +                                      name, 0x10000, cmdqv->vcmdq_page0);
> I don't get why we need/have 2 RAM devices pointing to the same @ptr
> = cmdqv->vcmdq_page0. Is 0x10000 ~ VCMDQ_REG_PAGE_SIZE?

Looking at this again, I am now a bit unsure as well why we need these
memory_region_init_ram_device_ptr() calls here.

I will go back and discuss with Nicolin and come back. I think we need to
document the memory layout better here. That will avoid the confusion.

(Also, will check if we have a public link for the spec or not).

Thanks,
Shameer

Re: [RFC PATCH 06/16] hw/arm/tegra241-cmdqv: Map VINTF Page0 into guest

Posted by Nicolin Chen 1 month, 1 week ago

On Wed, Dec 10, 2025 at 01:37:27PM +0000, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> Tegra241 CMDQV assigns each VINTF a 128KB MMIO region split into two
> 64 KB pages:
>  - Page0: guest accessible control/status registers for all VCMDQs
>  - Page1: configuration registers (queue GPA/size) that must be trapped
>           by the VMM and translated before programming the HW queue.
> 
> This patch implements the Page0 handling in QEMU. Using the vintf offset
> returned by IOMMUFD during VIOMMU allocation, QEMU maps Page0 into
> guest physical address space and exposes it via two guest MMIO windows:
>  - 0x10000 :VCMDQ register

global VCMDQ MMIO pages.

>  - 0x30000 :VINTF register

private VINTF MMIO pages

> +static bool tegra241_cmdqv_init_vcmdq_page0(Tegra241CMDQV *cmdqv, Error **errp)
> +{
> +    SMMUv3State *smmu = cmdqv->smmu;
> +    SMMUv3AccelState *s_accel = smmu->s_accel;
> +    IOMMUFDViommu *viommu;
> +    char *name;
> +
> +    if (!s_accel) {
> +        return true;
> +    }

g_assert?

The entire thing can't work without s_accel, so returning true
doesn't seem to make sense.

> +    viommu = &s_accel->viommu;
> +    if (!iommufd_backend_viommu_mmap(viommu->iommufd, viommu->viommu_id,
> +                                     VCMDQ_REG_PAGE_SIZE,
> +                                     cmdqv->cmdqv_data.out_vintf_mmap_offset,
> +                                     &cmdqv->vcmdq_page0, errp)) {
> +        cmdqv->vcmdq_page0 = NULL;

We probably shouldn't nuke the vcmdq_page0.

And I think we should add g_assert(!cmdqv->vcmdq_page0) too. It
would be a bug if we pass in a valid page0 pointer.

> +    name = g_strdup_printf("%s vcmdq", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vcmdq_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x10000,
> +                                        &cmdqv->mmio_vcmdq_page, 1);
> +    g_free(name);
> +
> +    name = g_strdup_printf("%s vintf", memory_region_name(&cmdqv->mmio_cmdqv));
> +    memory_region_init_ram_device_ptr(&cmdqv->mmio_vintf_page,
> +                                      memory_region_owner(&cmdqv->mmio_cmdqv),
> +                                      name, 0x10000, cmdqv->vcmdq_page0);
> +    memory_region_add_subregion_overlap(&cmdqv->mmio_cmdqv, 0x30000,
> +                                        &cmdqv->mmio_vintf_page, 1);

Let's add some comments here (maybe something similar in commit log also):

    /*
     * Each VM can only own one VINTF exposed by the kernel via a VIOMMU object.
     * And all available VCMDQs are already preallocated in the VINTF. Thus, the
     * global VCMDQ MMIO page0 and the private VINTF MMIO page0 are effectively
     * the same, i.e. cmdqv->vcmdq_page0.
     */

Nicolin