[PATCH v3 15/32] hw/arm/tegra241-cmdqv: Emulate global and VINTF VCMDQ register reads

Shameer Kolothum posted 32 patches 3 days, 21 hours ago
[PATCH v3 15/32] hw/arm/tegra241-cmdqv: Emulate global and VINTF VCMDQ register reads
Posted by Shameer Kolothum 3 days, 21 hours ago
From: Nicolin Chen <nicolinc@nvidia.com>

Tegra241 CMDQV exposes per-VCMDQ register windows through two MMIO views:

  -Global VCMDQ registers at 0x10000/0x20000
  -VINTF VCMDQ (VI_VCMDQ) registers at 0x30000/0x40000

The VI_VCMDQ register ranges are an alias of the global VCMDQ registers
and are only meaningful when a VCMDQ is mapped to a VINTF via ioctl
IOMMU_HW_QUEUE_ALLOC.

Add read side emulation for both global VCMDQ and VI_VCMDQ register
ranges. MMIO accesses are decoded to extract the VCMDQ instance index
and normalized to a VCMDQ0_* register offset, allowing a single helper
to service all VCMDQ instances.

VI_VCMDQ accesses are translated to their equivalent global VCMDQ
offsets and reuse the same decoding path. All VCMDQ reads are currently
served from cached register state.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
---
 hw/arm/tegra241-cmdqv.h | 178 ++++++++++++++++++++++++++++++++++++++++
 hw/arm/tegra241-cmdqv.c |  77 +++++++++++++++++
 2 files changed, 255 insertions(+)

diff --git a/hw/arm/tegra241-cmdqv.h b/hw/arm/tegra241-cmdqv.h
index 50bcecee9d..d379b8860c 100644
--- a/hw/arm/tegra241-cmdqv.h
+++ b/hw/arm/tegra241-cmdqv.h
@@ -48,6 +48,14 @@ typedef struct Tegra241CMDQV {
     uint32_t vintf_sid_match[16];
     uint32_t vintf_sid_replace[16];
     uint32_t vintf_cmdq_err_map[4];
+    uint32_t vcmdq_cons_indx[TEGRA241_CMDQV_MAX_CMDQ];
+    uint32_t vcmdq_prod_indx[TEGRA241_CMDQV_MAX_CMDQ];
+    uint32_t vcmdq_config[TEGRA241_CMDQV_MAX_CMDQ];
+    uint32_t vcmdq_status[TEGRA241_CMDQV_MAX_CMDQ];
+    uint32_t vcmdq_gerror[TEGRA241_CMDQV_MAX_CMDQ];
+    uint32_t vcmdq_gerrorn[TEGRA241_CMDQV_MAX_CMDQ];
+    uint64_t vcmdq_base[TEGRA241_CMDQV_MAX_CMDQ];
+    uint64_t vcmdq_cons_indx_base[TEGRA241_CMDQV_MAX_CMDQ];
 } Tegra241CMDQV;
 
 /* Global CMDQV MMIO registers (offset 0x00000) */
@@ -141,6 +149,176 @@ A_VINTFi_LVCMDQ_ERR_MAP_(0, 0)
 /* Omitting [0][1~2] as not being directly called */
 A_VINTFi_LVCMDQ_ERR_MAP_(0, 3)
 
+/*
+ * VCMDQ register windows.
+ *
+ * Page 0 @ 0x10000: VCMDQ control and status registers
+ * Page 1 @ 0x20000: VCMDQ base and DRAM address registers
+ */
+#define A_VCMDQi_CONS_INDX(i)                       \
+    REG32(VCMDQ##i##_CONS_INDX, 0x10000 + i * 0x80) \
+    FIELD(VCMDQ##i##_CONS_INDX, RD, 0, 20)          \
+    FIELD(VCMDQ##i##_CONS_INDX, ERR, 24, 7)
+
+A_VCMDQi_CONS_INDX(0)
+A_VCMDQi_CONS_INDX(1)
+
+#define V_VCMDQ_CONS_INDX_ERR_CERROR_NONE 0
+#define V_VCMDQ_CONS_INDX_ERR_CERROR_ILL_OPCODE 1
+#define V_VCMDQ_CONS_INDX_ERR_CERROR_ABT 2
+#define V_VCMDQ_CONS_INDX_ERR_CERROR_ATC_INV_SYNC 3
+#define V_VCMDQ_CONS_INDX_ERR_CERROR_ILL_ACCESS 4
+
+#define A_VCMDQi_PROD_INDX(i)                             \
+    REG32(VCMDQ##i##_PROD_INDX, 0x10000 + 0x4 + i * 0x80) \
+    FIELD(VCMDQ##i##_PROD_INDX, WR, 0, 20)
+
+A_VCMDQi_PROD_INDX(0)
+A_VCMDQi_PROD_INDX(1)
+
+#define A_VCMDQi_CONFIG(i)                             \
+    REG32(VCMDQ##i##_CONFIG, 0x10000 + 0x8 + i * 0x80) \
+    FIELD(VCMDQ##i##_CONFIG, CMDQ_EN, 0, 1)
+
+A_VCMDQi_CONFIG(0)
+A_VCMDQi_CONFIG(1)
+
+#define A_VCMDQi_STATUS(i)                             \
+    REG32(VCMDQ##i##_STATUS, 0x10000 + 0xc + i * 0x80) \
+    FIELD(VCMDQ##i##_STATUS, CMDQ_EN_OK, 0, 1)
+
+A_VCMDQi_STATUS(0)
+A_VCMDQi_STATUS(1)
+
+#define A_VCMDQi_GERROR(i)                               \
+    REG32(VCMDQ##i##_GERROR, 0x10000 + 0x10 + i * 0x80)  \
+    FIELD(VCMDQ##i##_GERROR, CMDQ_ERR, 0, 1)             \
+    FIELD(VCMDQ##i##_GERROR, CONS_DRAM_WR_ABT_ERR, 1, 1) \
+    FIELD(VCMDQ##i##_GERROR, CMDQ_INIT_ERR, 2, 1)
+
+A_VCMDQi_GERROR(0)
+A_VCMDQi_GERROR(1)
+
+#define A_VCMDQi_GERRORN(i)                               \
+    REG32(VCMDQ##i##_GERRORN, 0x10000 + 0x14 + i * 0x80)  \
+    FIELD(VCMDQ##i##_GERRORN, CMDQ_ERR, 0, 1)             \
+    FIELD(VCMDQ##i##_GERRORN, CONS_DRAM_WR_ABT_ERR, 1, 1) \
+    FIELD(VCMDQ##i##_GERRORN, CMDQ_INIT_ERR, 2, 1)
+
+A_VCMDQi_GERRORN(0)
+A_VCMDQi_GERRORN(1)
+
+#define A_VCMDQi_BASE_L(i)                       \
+    REG32(VCMDQ##i##_BASE_L, 0x20000 + i * 0x80) \
+    FIELD(VCMDQ##i##_BASE_L, LOG2SIZE, 0, 5)     \
+    FIELD(VCMDQ##i##_BASE_L, ADDR, 5, 27)
+
+A_VCMDQi_BASE_L(0)
+A_VCMDQi_BASE_L(1)
+
+#define A_VCMDQi_BASE_H(i)                             \
+    REG32(VCMDQ##i##_BASE_H, 0x20000 + 0x4 + i * 0x80) \
+    FIELD(VCMDQ##i##_BASE_H, ADDR, 0, 16)
+
+A_VCMDQi_BASE_H(0)
+A_VCMDQi_BASE_H(1)
+
+#define A_VCMDQi_CONS_INDX_BASE_DRAM_L(i)                             \
+    REG32(VCMDQ##i##_CONS_INDX_BASE_DRAM_L, 0x20000 + 0x8 + i * 0x80) \
+    FIELD(VCMDQ##i##_CONS_INDX_BASE_DRAM_L, ADDR, 0, 32)
+
+A_VCMDQi_CONS_INDX_BASE_DRAM_L(0)
+A_VCMDQi_CONS_INDX_BASE_DRAM_L(1)
+
+#define A_VCMDQi_CONS_INDX_BASE_DRAM_H(i)                             \
+    REG32(VCMDQ##i##_CONS_INDX_BASE_DRAM_H, 0x20000 + 0xc + i * 0x80) \
+    FIELD(VCMDQ##i##_CONS_INDX_BASE_DRAM_H, ADDR, 0, 16)
+
+A_VCMDQi_CONS_INDX_BASE_DRAM_H(0)
+A_VCMDQi_CONS_INDX_BASE_DRAM_H(1)
+
+/*
+ * VI_VCMDQ register windows (VCMDQs mapped via VINTF).
+ *
+ * Page 0 @ 0x30000: VI_VCMDQ control and status registers
+ * Page 1 @ 0x40000: VI_VCMDQ base and DRAM address registers
+ */
+#define A_VI_VCMDQi_CONS_INDX(i)                       \
+    REG32(VI_VCMDQ##i##_CONS_INDX, 0x30000 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_CONS_INDX, RD, 0, 20)          \
+    FIELD(VI_VCMDQ##i##_CONS_INDX, ERR, 24, 7)
+
+A_VI_VCMDQi_CONS_INDX(0)
+A_VI_VCMDQi_CONS_INDX(1)
+
+#define A_VI_VCMDQi_PROD_INDX(i)                             \
+    REG32(VI_VCMDQ##i##_PROD_INDX, 0x30000 + 0x4 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_PROD_INDX, WR, 0, 20)
+
+A_VI_VCMDQi_PROD_INDX(0)
+A_VI_VCMDQi_PROD_INDX(1)
+
+#define A_VI_VCMDQi_CONFIG(i)                             \
+    REG32(VI_VCMDQ##i##_CONFIG, 0x30000 + 0x8 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_CONFIG, CMDQ_EN, 0, 1)
+
+A_VI_VCMDQi_CONFIG(0)
+A_VI_VCMDQi_CONFIG(1)
+
+#define A_VI_VCMDQi_STATUS(i)                             \
+    REG32(VI_VCMDQ##i##_STATUS, 0x30000 + 0xc + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_STATUS, CMDQ_EN_OK, 0, 1)
+
+A_VI_VCMDQi_STATUS(0)
+A_VI_VCMDQi_STATUS(1)
+
+#define A_VI_VCMDQi_GERROR(i)                               \
+    REG32(VI_VCMDQ##i##_GERROR, 0x30000 + 0x10 + i * 0x80)  \
+    FIELD(VI_VCMDQ##i##_GERROR, CMDQ_ERR, 0, 1)             \
+    FIELD(VI_VCMDQ##i##_GERROR, CONS_DRAM_WR_ABT_ERR, 1, 1) \
+    FIELD(VI_VCMDQ##i##_GERROR, CMDQ_INIT_ERR, 2, 1)
+
+A_VI_VCMDQi_GERROR(0)
+A_VI_VCMDQi_GERROR(1)
+
+#define A_VI_VCMDQi_GERRORN(i)                               \
+    REG32(VI_VCMDQ##i##_GERRORN, 0x30000 + 0x14 + i * 0x80)  \
+    FIELD(VI_VCMDQ##i##_GERRORN, CMDQ_ERR, 0, 1)             \
+    FIELD(VI_VCMDQ##i##_GERRORN, CONS_DRAM_WR_ABT_ERR, 1, 1) \
+    FIELD(VI_VCMDQ##i##_GERRORN, CMDQ_INIT_ERR, 2, 1)
+
+A_VI_VCMDQi_GERRORN(0)
+A_VI_VCMDQi_GERRORN(1)
+
+#define A_VI_VCMDQi_BASE_L(i)                       \
+    REG32(VI_VCMDQ##i##_BASE_L, 0x40000 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_BASE_L, LOG2SIZE, 0, 5)     \
+    FIELD(VI_VCMDQ##i##_BASE_L, ADDR, 5, 27)
+
+A_VI_VCMDQi_BASE_L(0)
+A_VI_VCMDQi_BASE_L(1)
+
+#define A_VI_VCMDQi_BASE_H(i)                             \
+    REG32(VI_VCMDQ##i##_BASE_H, 0x40000 + 0x4 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_BASE_H, ADDR, 0, 16)
+
+A_VI_VCMDQi_BASE_H(0)
+A_VI_VCMDQi_BASE_H(1)
+
+#define A_VI_VCMDQi_CONS_INDX_BASE_DRAM_L(i)                             \
+    REG32(VI_VCMDQ##i##_CONS_INDX_BASE_DRAM_L, 0x40000 + 0x8 + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_CONS_INDX_BASE_DRAM_L, ADDR, 0, 32)
+
+A_VI_VCMDQi_CONS_INDX_BASE_DRAM_L(0)
+A_VI_VCMDQi_CONS_INDX_BASE_DRAM_L(1)
+
+#define A_VI_VCMDQi_CONS_INDX_BASE_DRAM_H(i)                             \
+    REG32(VI_VCMDQ##i##_CONS_INDX_BASE_DRAM_H, 0x40000 + 0xc + i * 0x80) \
+    FIELD(VI_VCMDQ##i##_CONS_INDX_BASE_DRAM_H, ADDR, 0, 16)
+
+A_VI_VCMDQi_CONS_INDX_BASE_DRAM_H(0)
+A_VI_VCMDQi_CONS_INDX_BASE_DRAM_H(1)
+
 const SMMUv3AccelCmdqvOps *tegra241_cmdqv_get_ops(void);
 
 #endif /* HW_ARM_TEGRA241_CMDQV_H */
diff --git a/hw/arm/tegra241-cmdqv.c b/hw/arm/tegra241-cmdqv.c
index a3830a02d6..d2e6938e44 100644
--- a/hw/arm/tegra241-cmdqv.c
+++ b/hw/arm/tegra241-cmdqv.c
@@ -14,6 +14,46 @@
 #include "smmuv3-accel.h"
 #include "tegra241-cmdqv.h"
 
+/*
+ * Read a VCMDQ register using VCMDQ0_* offsets.
+ *
+ * The caller normalizes the MMIO offset such that @offset0 always refers
+ * to a VCMDQ0_* register, while @index selects the VCMDQ instance.
+ *
+ * All VCMDQ accesses return cached registers.
+ */
+static uint64_t tegra241_cmdqv_read_vcmdq(Tegra241CMDQV *cmdqv, hwaddr offset0,
+                                          int index)
+{
+    switch (offset0) {
+    case A_VCMDQ0_CONS_INDX:
+        return cmdqv->vcmdq_cons_indx[index];
+    case A_VCMDQ0_PROD_INDX:
+        return cmdqv->vcmdq_prod_indx[index];
+    case A_VCMDQ0_CONFIG:
+        return cmdqv->vcmdq_config[index];
+    case A_VCMDQ0_STATUS:
+        return cmdqv->vcmdq_status[index];
+    case A_VCMDQ0_GERROR:
+        return cmdqv->vcmdq_gerror[index];
+    case A_VCMDQ0_GERRORN:
+        return cmdqv->vcmdq_gerrorn[index];
+    case A_VCMDQ0_BASE_L:
+        return cmdqv->vcmdq_base[index];
+    case A_VCMDQ0_BASE_H:
+        return cmdqv->vcmdq_base[index] >> 32;
+    case A_VCMDQ0_CONS_INDX_BASE_DRAM_L:
+        return cmdqv->vcmdq_cons_indx_base[index];
+    case A_VCMDQ0_CONS_INDX_BASE_DRAM_H:
+        return cmdqv->vcmdq_cons_indx_base[index] >> 32;
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s unhandled read access at 0x%" PRIx64 "\n",
+                      __func__, offset0);
+        return 0;
+    }
+}
+
 static uint64_t tegra241_cmdqv_read_vintf(Tegra241CMDQV *cmdqv, hwaddr offset)
 {
     int i;
@@ -42,6 +82,7 @@ static uint64_t tegra241_cmdqv_read_vintf(Tegra241CMDQV *cmdqv, hwaddr offset)
 static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
 {
     Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
+    int index;
 
     if (offset >= TEGRA241_CMDQV_IO_LEN) {
         qemu_log_mask(LOG_UNIMP,
@@ -67,6 +108,42 @@ static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
         return cmdqv->cmdq_alloc_map[(offset - A_CMDQ_ALLOC_MAP_0) / 4];
     case A_VINTF0_CONFIG ... A_VINTF0_LVCMDQ_ERR_MAP_3:
         return tegra241_cmdqv_read_vintf(cmdqv, offset);
+    case A_VI_VCMDQ0_CONS_INDX ... A_VI_VCMDQ1_GERRORN:
+        /*
+         * VI_VCMDQ registers (VINTF logical view) have the same per-VCMDQ
+         * layout as the global VCMDQ registers, but are based at 0x30000
+         * instead of 0x10000.
+         *
+         * Subtract 0x20000 to translate a VI_VCMDQ offset into the equivalent
+         * global VCMDQ offset, then fall through to reuse the common VCMDQ
+         * decoding logic below.
+         */
+        offset -= 0x20000;
+        QEMU_FALLTHROUGH;
+    case A_VCMDQ0_CONS_INDX ... A_VCMDQ1_GERRORN:
+        /*
+         * Decode a per-VCMDQ register access.
+         *
+         * The hardware supports up to 128 identical VCMDQ instances; we
+         * currently expose TEGRA241_CMDQV_MAX_CMDQ (= 2). Each VCMDQ
+         * occupies a 0x80-byte window starting at 0x10000.
+         *
+         * The MMIO offset is decoded to extract the VCMDQ index and normalized
+         * to the corresponding VCMDQ0_* register by subtracting index * 0x80.
+         *
+         * A single helper then services all VCMDQs, with @index selecting the
+         * instance.
+         */
+        index = (offset - 0x10000) / 0x80;
+        return tegra241_cmdqv_read_vcmdq(cmdqv, offset - index * 0x80, index);
+    case A_VI_VCMDQ0_BASE_L ... A_VI_VCMDQ1_CONS_INDX_BASE_DRAM_H:
+        /* Same decode logic as A_VI_VCMDQx_CONS_INDX case above */
+        offset -= 0x20000;
+        QEMU_FALLTHROUGH;
+    case A_VCMDQ0_BASE_L ... A_VCMDQ1_CONS_INDX_BASE_DRAM_H:
+        /* Same decode logic as A_VCMDQx_CONS_INDX case above */
+        index = (offset - 0x20000) / 0x80;
+        return tegra241_cmdqv_read_vcmdq(cmdqv, offset - index * 0x80, index);
     default:
         qemu_log_mask(LOG_UNIMP, "%s unhandled read access at 0x%" PRIx64 "\n",
                       __func__, offset);
-- 
2.43.0
Re: [PATCH v3 15/32] hw/arm/tegra241-cmdqv: Emulate global and VINTF VCMDQ register reads
Posted by Jonathan Cameron via qemu development 2 days, 16 hours ago
On Thu, 26 Feb 2026 10:50:39 +0000
Shameer Kolothum <skolothumtho@nvidia.com> wrote:

> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> Tegra241 CMDQV exposes per-VCMDQ register windows through two MMIO views:
> 
>   -Global VCMDQ registers at 0x10000/0x20000
>   -VINTF VCMDQ (VI_VCMDQ) registers at 0x30000/0x40000
> 
> The VI_VCMDQ register ranges are an alias of the global VCMDQ registers
> and are only meaningful when a VCMDQ is mapped to a VINTF via ioctl
> IOMMU_HW_QUEUE_ALLOC.
> 
> Add read side emulation for both global VCMDQ and VI_VCMDQ register
> ranges. MMIO accesses are decoded to extract the VCMDQ instance index
> and normalized to a VCMDQ0_* register offset, allowing a single helper
> to service all VCMDQ instances.
> 
> VI_VCMDQ accesses are translated to their equivalent global VCMDQ
> offsets and reuse the same decoding path. All VCMDQ reads are currently
> served from cached register state.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <skolothumtho@nvidia.com>
Hi Shameer,

As noted below there are a lot of repeats of 0x80 and the register window offsets in here.
Maybe some defines would make things clearer?

>  static uint64_t tegra241_cmdqv_read_vintf(Tegra241CMDQV *cmdqv, hwaddr offset)
>  {
>      int i;
> @@ -42,6 +82,7 @@ static uint64_t tegra241_cmdqv_read_vintf(Tegra241CMDQV *cmdqv, hwaddr offset)
>  static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
>  {
>      Tegra241CMDQV *cmdqv = (Tegra241CMDQV *)opaque;
> +    int index;
>  
>      if (offset >= TEGRA241_CMDQV_IO_LEN) {
>          qemu_log_mask(LOG_UNIMP,
> @@ -67,6 +108,42 @@ static uint64_t tegra241_cmdqv_read(void *opaque, hwaddr offset, unsigned size)
>          return cmdqv->cmdq_alloc_map[(offset - A_CMDQ_ALLOC_MAP_0) / 4];
>      case A_VINTF0_CONFIG ... A_VINTF0_LVCMDQ_ERR_MAP_3:
>          return tegra241_cmdqv_read_vintf(cmdqv, offset);
> +    case A_VI_VCMDQ0_CONS_INDX ... A_VI_VCMDQ1_GERRORN:
> +        /*
> +         * VI_VCMDQ registers (VINTF logical view) have the same per-VCMDQ
> +         * layout as the global VCMDQ registers, but are based at 0x30000
> +         * instead of 0x10000.
> +         *
> +         * Subtract 0x20000 to translate a VI_VCMDQ offset into the equivalent
> +         * global VCMDQ offset, then fall through to reuse the common VCMDQ
> +         * decoding logic below.
> +         */
> +        offset -= 0x20000;
There are a lot of repeated numeric values of offsets and sizes in here.
I'm a bit in two minds about whether they are clearer as numbers or you should add
a few more defines.

Jonathan