[v3] hw/arm/virt: Add support for user-creatable accelerated SMMUv3

[RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Shameer Kolothum via 4 months ago

From: Nicolin Chen <nicolinc@nvidia.com>

Use the provided smmuv3-accel helper functions to issue the
invalidation commands to host SMMUv3.

Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 hw/arm/smmuv3-internal.h | 11 +++++++++++
 hw/arm/smmuv3.c          | 28 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index 8cb6a9238a..f3aeaf6375 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -233,6 +233,17 @@ static inline bool smmuv3_gerror_irq_enabled(SMMUv3State *s)
 #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)->log2size)
 #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)->log2size)
 
+static inline int smmuv3_q_ncmds(SMMUQueue *q)
+{
+    uint32_t prod = Q_PROD(q);
+    uint32_t cons = Q_CONS(q);
+
+    if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
+        return prod - cons;
+    else
+        return WRAP_MASK(q) - cons + prod;
+}
+
 static inline bool smmuv3_q_full(SMMUQueue *q)
 {
     return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) == WRAP_MASK(q);
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index c94bfe6564..97ecca0764 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -1285,10 +1285,17 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
     SMMUCmdError cmd_error = SMMU_CERROR_NONE;
     SMMUQueue *q = &s->cmdq;
     SMMUCommandType type = 0;
+    SMMUCommandBatch batch = {};
+    uint32_t ncmds;
 
     if (!smmuv3_cmdq_enabled(s)) {
         return 0;
     }
+
+    ncmds = smmuv3_q_ncmds(q);
+    batch.cmds = g_new0(Cmd, ncmds);
+    batch.cons = g_new0(uint32_t, ncmds);
+
     /*
      * some commands depend on register values, typically CR0. In case those
      * register values change while handling the command, spec says it
@@ -1383,6 +1390,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
 
             trace_smmuv3_cmdq_cfgi_cd(sid);
             smmuv3_flush_config(sdev);
+            smmuv3_accel_batch_cmd(sdev->smmu, sdev, &batch, &cmd, &q->cons);
             break;
         }
         case SMMU_CMD_TLBI_NH_ASID:
@@ -1406,6 +1414,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             trace_smmuv3_cmdq_tlbi_nh_asid(asid);
             smmu_inv_notifiers_all(&s->smmu_state);
             smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
+            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
             break;
         }
         case SMMU_CMD_TLBI_NH_ALL:
@@ -1433,6 +1442,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
             trace_smmuv3_cmdq_tlbi_nsnh();
             smmu_inv_notifiers_all(&s->smmu_state);
             smmu_iotlb_inv_all(bs);
+            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
             break;
         case SMMU_CMD_TLBI_NH_VAA:
         case SMMU_CMD_TLBI_NH_VA:
@@ -1441,6 +1451,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
                 break;
             }
             smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
+            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
             break;
         case SMMU_CMD_TLBI_S12_VMALL:
         {
@@ -1499,12 +1510,29 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
         queue_cons_incr(q);
     }
 
+    qemu_mutex_lock(&s->mutex);
+    if (!cmd_error && batch.ncmds) {
+        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
+            if (batch.ncmds) {
+                q->cons = batch.cons[batch.ncmds - 1];
+            } else {
+                q->cons = batch.cons[0]; /* FIXME: Check */
+            }
+            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
+                          CMD_TYPE(&batch.cmds[batch.ncmds]));
+            cmd_error = SMMU_CERROR_ILL;
+        }
+    }
+    qemu_mutex_unlock(&s->mutex);
+
     if (cmd_error) {
         trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type), cmd_error);
         smmu_write_cmdq_err(s, cmd_error);
         smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_CMDQ_ERR_MASK);
     }
 
+    g_free(batch.cmds);
+    g_free(batch.cons);
     trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
                                   Q_PROD_WRAP(q), Q_CONS_WRAP(q));
 
-- 
2.34.1

Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Eric Auger 2 months, 1 week ago

Hi Shameer,

On 7/14/25 5:59 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <nicolinc@nvidia.com>
>
> Use the provided smmuv3-accel helper functions to issue the
> invalidation commands to host SMMUv3.
>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  hw/arm/smmuv3-internal.h | 11 +++++++++++
>  hw/arm/smmuv3.c          | 28 ++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+)
>
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index 8cb6a9238a..f3aeaf6375 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -233,6 +233,17 @@ static inline bool smmuv3_gerror_irq_enabled(SMMUv3State *s)
>  #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)->log2size)
>  #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)->log2size)
>  
> +static inline int smmuv3_q_ncmds(SMMUQueue *q)
> +{
> +    uint32_t prod = Q_PROD(q);
> +    uint32_t cons = Q_CONS(q);
> +
> +    if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
> +        return prod - cons;
> +    else
> +        return WRAP_MASK(q) - cons + prod;
> +}
> +
>  static inline bool smmuv3_q_full(SMMUQueue *q)
>  {
>      return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) == WRAP_MASK(q);
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index c94bfe6564..97ecca0764 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -1285,10 +1285,17 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
>      SMMUQueue *q = &s->cmdq;
>      SMMUCommandType type = 0;
> +    SMMUCommandBatch batch = {};
> +    uint32_t ncmds;
>  
>      if (!smmuv3_cmdq_enabled(s)) {
>          return 0;
>      }
> +
> +    ncmds = smmuv3_q_ncmds(q);
> +    batch.cmds = g_new0(Cmd, ncmds);
> +    batch.cons = g_new0(uint32_t, ncmds);
so you are provisionning space for n commands found in the queue,
independently on knowing whether they will be batched, ie. only
invalidation commands are. Then commands are added in the batch one by
one and you increment batch->ncmds in smmuv3_accel_batch_cmd. I agree
with Jonathan. This looks weird. AT least I would introduce a kelper
that inits a Back of ncmds and I would make all the batch fields
private. You you end up with the init +
smmuv3_accel_add_cmd_to_batch(batch, cmd). Then independently on the
ncmds you can issue a smmuv3_accel_issue_cmd_batch that would return if
there is nothing in the batch. You also need a batch deallocation
helper. I remember I expressed in the past my concern about having
commands executed out of order. I don't remember out conclusion on that
but this shall be clearly studied and conclusion shall be put in the
commit message.
> +
>      /*
>       * some commands depend on register values, typically CR0. In case those
>       * register values change while handling the command, spec says it
> @@ -1383,6 +1390,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>  
>              trace_smmuv3_cmdq_cfgi_cd(sid);
>              smmuv3_flush_config(sdev);
> +            smmuv3_accel_batch_cmd(sdev->smmu, sdev, &batch, &cmd, &q->cons);
>              break;
>          }
>          case SMMU_CMD_TLBI_NH_ASID:
> @@ -1406,6 +1414,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>              trace_smmuv3_cmdq_tlbi_nh_asid(asid);
>              smmu_inv_notifiers_all(&s->smmu_state);
>              smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>              break;
>          }
>          case SMMU_CMD_TLBI_NH_ALL:
> @@ -1433,6 +1442,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>              trace_smmuv3_cmdq_tlbi_nsnh();
>              smmu_inv_notifiers_all(&s->smmu_state);
>              smmu_iotlb_inv_all(bs);
> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>              break;
>          case SMMU_CMD_TLBI_NH_VAA:
>          case SMMU_CMD_TLBI_NH_VA:
> @@ -1441,6 +1451,7 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>                  break;
>              }
>              smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>              break;
>          case SMMU_CMD_TLBI_S12_VMALL:
>          {
> @@ -1499,12 +1510,29 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>          queue_cons_incr(q);
>      }
>  
> +    qemu_mutex_lock(&s->mutex);
> +    if (!cmd_error && batch.ncmds) {
> +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
> +            if (batch.ncmds) {
> +                q->cons = batch.cons[batch.ncmds - 1];
> +            } else {
> +                q->cons = batch.cons[0]; /* FIXME: Check */
> +            }
> +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
> +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
Can't you have other error types returned?

Thanks

Eric
> +            cmd_error = SMMU_CERROR_ILL;
> +        }
> +    }
> +    qemu_mutex_unlock(&s->mutex);
> +
>      if (cmd_error) {
>          trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type), cmd_error);
>          smmu_write_cmdq_err(s, cmd_error);
>          smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_CMDQ_ERR_MASK);
>      }
>  
> +    g_free(batch.cmds);
> +    g_free(batch.cons);
>      trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
>                                    Q_PROD_WRAP(q), Q_CONS_WRAP(q));
>

RE: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Shameer Kolothum 2 months, 1 week ago

Hi Eric,

> -----Original Message-----
> From: Eric Auger <eric.auger@redhat.com>
> Sent: 05 September 2025 13:46
> To: qemu-arm@nongnu.org; qemu-devel@nongnu.org; Shameer Kolothum
> <skolothumtho@nvidia.com>
> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
> smostafa@google.com; linuxarm@huawei.com; wangzhou1@hisilicon.com;
> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com;
> shameerkolothum@gmail.com
> Subject: Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation
> commands to hw
> 
> External email: Use caution opening links or attachments
> 
> 
> Hi Shameer,
> 
> On 7/14/25 5:59 PM, Shameer Kolothum wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> >
> > Use the provided smmuv3-accel helper functions to issue the
> > invalidation commands to host SMMUv3.
> >
> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  hw/arm/smmuv3-internal.h | 11 +++++++++++
> >  hw/arm/smmuv3.c          | 28 ++++++++++++++++++++++++++++
> >  2 files changed, 39 insertions(+)
> >
> > diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> > index 8cb6a9238a..f3aeaf6375 100644
> > --- a/hw/arm/smmuv3-internal.h
> > +++ b/hw/arm/smmuv3-internal.h
> > @@ -233,6 +233,17 @@ static inline bool
> smmuv3_gerror_irq_enabled(SMMUv3State *s)
> >  #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)-
> >log2size)
> >  #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)-
> >log2size)
> >
> > +static inline int smmuv3_q_ncmds(SMMUQueue *q)
> > +{
> > +    uint32_t prod = Q_PROD(q);
> > +    uint32_t cons = Q_CONS(q);
> > +
> > +    if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
> > +        return prod - cons;
> > +    else
> > +        return WRAP_MASK(q) - cons + prod;
> > +}
> > +
> >  static inline bool smmuv3_q_full(SMMUQueue *q)
> >  {
> >      return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) ==
> WRAP_MASK(q);
> > diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> > index c94bfe6564..97ecca0764 100644
> > --- a/hw/arm/smmuv3.c
> > +++ b/hw/arm/smmuv3.c
> > @@ -1285,10 +1285,17 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
> >      SMMUQueue *q = &s->cmdq;
> >      SMMUCommandType type = 0;
> > +    SMMUCommandBatch batch = {};
> > +    uint32_t ncmds;
> >
> >      if (!smmuv3_cmdq_enabled(s)) {
> >          return 0;
> >      }
> > +
> > +    ncmds = smmuv3_q_ncmds(q);
> > +    batch.cmds = g_new0(Cmd, ncmds);
> > +    batch.cons = g_new0(uint32_t, ncmds);
> so you are provisionning space for n commands found in the queue,
> independently on knowing whether they will be batched, ie. only
> invalidation commands are. Then commands are added in the batch one by
> one and you increment batch->ncmds in smmuv3_accel_batch_cmd. I agree
> with Jonathan. This looks weird. AT least I would introduce a kelper
> that inits a Back of ncmds and I would make all the batch fields
> private. You you end up with the init +
> smmuv3_accel_add_cmd_to_batch(batch, cmd). Then independently on the
> ncmds you can issue a smmuv3_accel_issue_cmd_batch that would return if
> there is nothing in the batch. You also need a batch deallocation
> helper. 

Agree, at present we pre-allocate irrespective of whether there will any
Invalidation cmds or not. I will take another look and incorporate your above
suggestion to improve this. 

I remember I expressed in the past my concern about having
> commands executed out of order. I don't remember out conclusion on that
> but this shall be clearly studied and conclusion shall be put in the
> commit message.

Yes, you did, and I missed it. Sorry about that.

I think it is safe to honour the execution order of Guest here. From a quick glance, I
couldn’t find anything related to a safe out of order execution guidance from
SMMUv3 specification. Also, we can't be sure how Guest will be modified/optimised
in the future to completely rule out problems if we do out-of-order executions. 

Hence, my plan for next is to start batching if we see Invalidation cmds and submit
the batch If any non-invalidation commands are encountered in between.

@Nicolin, do you foresee any issues with above approach? From the current
Host SMMUV3 driver, batching of commands is mainly used for invalidations
(except for certain arm_smmu_cmdq_issue_cmd_with_sync() cases). So 
I guess we are good from a performance optimisation point of view if we can
cover invalidations as above.

> > +
> >      /*
> >       * some commands depend on register values, typically CR0. In case those
> >       * register values change while handling the command, spec says it
> > @@ -1383,6 +1390,7 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >
> >              trace_smmuv3_cmdq_cfgi_cd(sid);
> >              smmuv3_flush_config(sdev);
> > +            smmuv3_accel_batch_cmd(sdev->smmu, sdev, &batch, &cmd, &q-
> >cons);
> >              break;
> >          }
> >          case SMMU_CMD_TLBI_NH_ASID:
> > @@ -1406,6 +1414,7 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >              trace_smmuv3_cmdq_tlbi_nh_asid(asid);
> >              smmu_inv_notifiers_all(&s->smmu_state);
> >              smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
> > +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
> >              break;
> >          }
> >          case SMMU_CMD_TLBI_NH_ALL:
> > @@ -1433,6 +1442,7 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >              trace_smmuv3_cmdq_tlbi_nsnh();
> >              smmu_inv_notifiers_all(&s->smmu_state);
> >              smmu_iotlb_inv_all(bs);
> > +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
> >              break;
> >          case SMMU_CMD_TLBI_NH_VAA:
> >          case SMMU_CMD_TLBI_NH_VA:
> > @@ -1441,6 +1451,7 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >                  break;
> >              }
> >              smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
> > +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
> >              break;
> >          case SMMU_CMD_TLBI_S12_VMALL:
> >          {
> > @@ -1499,12 +1510,29 @@ static int
> smmuv3_cmdq_consume(SMMUv3State *s)
> >          queue_cons_incr(q);
> >      }
> >
> > +    qemu_mutex_lock(&s->mutex);
> > +    if (!cmd_error && batch.ncmds) {
> > +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
> > +            if (batch.ncmds) {
> > +                q->cons = batch.cons[batch.ncmds - 1];
> > +            } else {
> > +                q->cons = batch.cons[0]; /* FIXME: Check */
> > +            }
> > +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
> > +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
> Can't you have other error types returned?

Kernel can return EOPNOTSUPP/EINVAL/ENOMEM/EFAULT/ ETIMEDOUT errors.
Of these, only ETIMEDOUT is related to the actual host Queue when an attempted
SYNC results in timeout.

So, between CERROR_ILL/ _ABT/ _ATC_INV_SYNC I think it is best to set CERROR_ILL
here.

Thanks,
Shameer

> 
> Thanks
> 
> Eric
> > +            cmd_error = SMMU_CERROR_ILL;
> > +        }
> > +    }
> > +    qemu_mutex_unlock(&s->mutex);
> > +
> >      if (cmd_error) {
> >          trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type),
> cmd_error);
> >          smmu_write_cmdq_err(s, cmd_error);
> >          smmuv3_trigger_irq(s, SMMU_IRQ_GERROR,
> R_GERROR_CMDQ_ERR_MASK);
> >      }
> >
> > +    g_free(batch.cmds);
> > +    g_free(batch.cons);
> >      trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
> >                                    Q_PROD_WRAP(q), Q_CONS_WRAP(q));
> >

Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Eric Auger 2 months ago


On 9/8/25 2:22 PM, Shameer Kolothum wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: 05 September 2025 13:46
>> To: qemu-arm@nongnu.org; qemu-devel@nongnu.org; Shameer Kolothum
>> <skolothumtho@nvidia.com>
>> Cc: peter.maydell@linaro.org; Jason Gunthorpe <jgg@nvidia.com>; Nicolin
>> Chen <nicolinc@nvidia.com>; ddutile@redhat.com; berrange@redhat.com;
>> Nathan Chen <nathanc@nvidia.com>; Matt Ochs <mochs@nvidia.com>;
>> smostafa@google.com; linuxarm@huawei.com; wangzhou1@hisilicon.com;
>> jiangkunkun@huawei.com; jonathan.cameron@huawei.com;
>> zhangfei.gao@linaro.org; zhenzhong.duan@intel.com;
>> shameerkolothum@gmail.com
>> Subject: Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation
>> commands to hw
>>
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shameer,
>>
>> On 7/14/25 5:59 PM, Shameer Kolothum wrote:
>>> From: Nicolin Chen <nicolinc@nvidia.com>
>>>
>>> Use the provided smmuv3-accel helper functions to issue the
>>> invalidation commands to host SMMUv3.
>>>
>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Shameer Kolothum
>> <shameerali.kolothum.thodi@huawei.com>
>>> ---
>>>  hw/arm/smmuv3-internal.h | 11 +++++++++++
>>>  hw/arm/smmuv3.c          | 28 ++++++++++++++++++++++++++++
>>>  2 files changed, 39 insertions(+)
>>>
>>> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
>>> index 8cb6a9238a..f3aeaf6375 100644
>>> --- a/hw/arm/smmuv3-internal.h
>>> +++ b/hw/arm/smmuv3-internal.h
>>> @@ -233,6 +233,17 @@ static inline bool
>> smmuv3_gerror_irq_enabled(SMMUv3State *s)
>>>  #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)-
>>> log2size)
>>>  #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)-
>>> log2size)
>>>
>>> +static inline int smmuv3_q_ncmds(SMMUQueue *q)
>>> +{
>>> +    uint32_t prod = Q_PROD(q);
>>> +    uint32_t cons = Q_CONS(q);
>>> +
>>> +    if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
>>> +        return prod - cons;
>>> +    else
>>> +        return WRAP_MASK(q) - cons + prod;
>>> +}
>>> +
>>>  static inline bool smmuv3_q_full(SMMUQueue *q)
>>>  {
>>>      return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) ==
>> WRAP_MASK(q);
>>> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
>>> index c94bfe6564..97ecca0764 100644
>>> --- a/hw/arm/smmuv3.c
>>> +++ b/hw/arm/smmuv3.c
>>> @@ -1285,10 +1285,17 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
>>>      SMMUQueue *q = &s->cmdq;
>>>      SMMUCommandType type = 0;
>>> +    SMMUCommandBatch batch = {};
>>> +    uint32_t ncmds;
>>>
>>>      if (!smmuv3_cmdq_enabled(s)) {
>>>          return 0;
>>>      }
>>> +
>>> +    ncmds = smmuv3_q_ncmds(q);
>>> +    batch.cmds = g_new0(Cmd, ncmds);
>>> +    batch.cons = g_new0(uint32_t, ncmds);
>> so you are provisionning space for n commands found in the queue,
>> independently on knowing whether they will be batched, ie. only
>> invalidation commands are. Then commands are added in the batch one by
>> one and you increment batch->ncmds in smmuv3_accel_batch_cmd. I agree
>> with Jonathan. This looks weird. AT least I would introduce a kelper
>> that inits a Back of ncmds and I would make all the batch fields
>> private. You you end up with the init +
>> smmuv3_accel_add_cmd_to_batch(batch, cmd). Then independently on the
>> ncmds you can issue a smmuv3_accel_issue_cmd_batch that would return if
>> there is nothing in the batch. You also need a batch deallocation
>> helper. 
> Agree, at present we pre-allocate irrespective of whether there will any
> Invalidation cmds or not. I will take another look and incorporate your above
> suggestion to improve this. 
>
> I remember I expressed in the past my concern about having
>> commands executed out of order. I don't remember out conclusion on that
>> but this shall be clearly studied and conclusion shall be put in the
>> commit message.
> Yes, you did, and I missed it. Sorry about that.
>
> I think it is safe to honour the execution order of Guest here. From a quick glance, I
> couldn’t find anything related to a safe out of order execution guidance from
> SMMUv3 specification. Also, we can't be sure how Guest will be modified/optimised
> in the future to completely rule out problems if we do out-of-order executions. 
What about if you receive a sync cmd. It is supposed to assure all the
preceding commands were consumed. However in that case you will have
commands that were submitted before that can be executed after. This
looks wrong to me.

This optimization is not requested in the first enablement series. I
would postpone it personally.

>
> Hence, my plan for next is to start batching if we see Invalidation cmds and submit
> the batch If any non-invalidation commands are encountered in between.
looks safe indeed. But again this can come as a follow up optimization.

Eric
>
> @Nicolin, do you foresee any issues with above approach? From the current
> Host SMMUV3 driver, batching of commands is mainly used for invalidations
> (except for certain arm_smmu_cmdq_issue_cmd_with_sync() cases). So 
> I guess we are good from a performance optimisation point of view if we can
> cover invalidations as above.
>
>>> +
>>>      /*
>>>       * some commands depend on register values, typically CR0. In case those
>>>       * register values change while handling the command, spec says it
>>> @@ -1383,6 +1390,7 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>              trace_smmuv3_cmdq_cfgi_cd(sid);
>>>              smmuv3_flush_config(sdev);
>>> +            smmuv3_accel_batch_cmd(sdev->smmu, sdev, &batch, &cmd, &q-
>>> cons);
>>>              break;
>>>          }
>>>          case SMMU_CMD_TLBI_NH_ASID:
>>> @@ -1406,6 +1414,7 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>              trace_smmuv3_cmdq_tlbi_nh_asid(asid);
>>>              smmu_inv_notifiers_all(&s->smmu_state);
>>>              smmu_iotlb_inv_asid_vmid(bs, asid, vmid);
>>> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>>>              break;
>>>          }
>>>          case SMMU_CMD_TLBI_NH_ALL:
>>> @@ -1433,6 +1442,7 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>              trace_smmuv3_cmdq_tlbi_nsnh();
>>>              smmu_inv_notifiers_all(&s->smmu_state);
>>>              smmu_iotlb_inv_all(bs);
>>> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>>>              break;
>>>          case SMMU_CMD_TLBI_NH_VAA:
>>>          case SMMU_CMD_TLBI_NH_VA:
>>> @@ -1441,6 +1451,7 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>                  break;
>>>              }
>>>              smmuv3_range_inval(bs, &cmd, SMMU_STAGE_1);
>>> +            smmuv3_accel_batch_cmd(bs, NULL, &batch, &cmd, &q->cons);
>>>              break;
>>>          case SMMU_CMD_TLBI_S12_VMALL:
>>>          {
>>> @@ -1499,12 +1510,29 @@ static int
>> smmuv3_cmdq_consume(SMMUv3State *s)
>>>          queue_cons_incr(q);
>>>      }
>>>
>>> +    qemu_mutex_lock(&s->mutex);
>>> +    if (!cmd_error && batch.ncmds) {
>>> +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
>>> +            if (batch.ncmds) {
>>> +                q->cons = batch.cons[batch.ncmds - 1];
>>> +            } else {
>>> +                q->cons = batch.cons[0]; /* FIXME: Check */
>>> +            }
>>> +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
>>> +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
>> Can't you have other error types returned?
> Kernel can return EOPNOTSUPP/EINVAL/ENOMEM/EFAULT/ ETIMEDOUT errors.
> Of these, only ETIMEDOUT is related to the actual host Queue when an attempted
> SYNC results in timeout.
>
> So, between CERROR_ILL/ _ABT/ _ATC_INV_SYNC I think it is best to set CERROR_ILL
> here.
>
> Thanks,
> Shameer
>
>> Thanks
>>
>> Eric
>>> +            cmd_error = SMMU_CERROR_ILL;
>>> +        }
>>> +    }
>>> +    qemu_mutex_unlock(&s->mutex);
>>> +
>>>      if (cmd_error) {
>>>          trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type),
>> cmd_error);
>>>          smmu_write_cmdq_err(s, cmd_error);
>>>          smmuv3_trigger_irq(s, SMMU_IRQ_GERROR,
>> R_GERROR_CMDQ_ERR_MASK);
>>>      }
>>>
>>> +    g_free(batch.cmds);
>>> +    g_free(batch.cons);
>>>      trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
>>>                                    Q_PROD_WRAP(q), Q_CONS_WRAP(q));
>>>

Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Nicolin Chen 2 months, 1 week ago

On Mon, Sep 08, 2025 at 05:22:35AM -0700, Shameer Kolothum wrote:
> > -----Original Message-----
> > From: Eric Auger <eric.auger@redhat.com>
> > smmuv3_cmdq_consume(SMMUv3State *s)
> > >      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
> > >      SMMUQueue *q = &s->cmdq;
> > >      SMMUCommandType type = 0;
> > > +    SMMUCommandBatch batch = {};
> > > +    uint32_t ncmds;
> > >
> > >      if (!smmuv3_cmdq_enabled(s)) {
> > >          return 0;
> > >      }
> > > +
> > > +    ncmds = smmuv3_q_ncmds(q);
> > > +    batch.cmds = g_new0(Cmd, ncmds);
> > > +    batch.cons = g_new0(uint32_t, ncmds);

> > so you are provisionning space for n commands found in the queue,
> > independently on knowing whether they will be batched, ie. only
> > invalidation commands are. Then commands are added in the batch one by
> > one and you increment batch->ncmds in smmuv3_accel_batch_cmd. I agree
> > with Jonathan. This looks weird. AT least I would introduce a kelper
> > that inits a Back of ncmds and I would make all the batch fields
> > private. You you end up with the init +
> > smmuv3_accel_add_cmd_to_batch(batch, cmd). Then independently on the
> > ncmds you can issue a smmuv3_accel_issue_cmd_batch that would return if
> > there is nothing in the batch. You also need a batch deallocation
> > helper. 
> 
> Agree, at present we pre-allocate irrespective of whether there will any
> Invalidation cmds or not. I will take another look and incorporate your above
> suggestion to improve this. 

Perhaps we can try a different data structure for those two batch
elements, e.g. using a GArray.

This allows an unknown "ncmd" at the beginning, so an invalidation
command will be just appended to the array in the _batch().

Then, in the _submit(), we can convert it to regular array:
    Cmd *cmds = (Cmd *)g_array_free(batch->cmds, FALSE);
    // passing cmds to ioctl
    g_free(cmds);

> I remember I expressed in the past my concern about having
> > commands executed out of order. I don't remember out conclusion on that
> > but this shall be clearly studied and conclusion shall be put in the
> > commit message.
> 
> Yes, you did, and I missed it. Sorry about that.
> 
> I think it is safe to honour the execution order of Guest here. From a quick glance, I
> couldn’t find anything related to a safe out of order execution guidance from
> SMMUv3 specification. Also, we can't be sure how Guest will be modified/optimised
> in the future to completely rule out problems if we do out-of-order executions. 
> 
> Hence, my plan for next is to start batching if we see Invalidation cmds and submit
> the batch If any non-invalidation commands are encountered in between.

That sounds good to me.

> @Nicolin, do you foresee any issues with above approach? From the current
> Host SMMUV3 driver, batching of commands is mainly used for invalidations
> (except for certain arm_smmu_cmdq_issue_cmd_with_sync() cases). So 
> I guess we are good from a performance optimisation point of view if we can
> cover invalidations as above.

Yes. It shouldn't impact perf so long as the guest OS is sane.

Nicolin

Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Jonathan Cameron via 4 months ago

On Mon, 14 Jul 2025 16:59:39 +0100
Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> wrote:

> From: Nicolin Chen <nicolinc@nvidia.com>
> 
> Use the provided smmuv3-accel helper functions to issue the
> invalidation commands to host SMMUv3.
> 
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  hw/arm/smmuv3-internal.h | 11 +++++++++++
>  hw/arm/smmuv3.c          | 28 ++++++++++++++++++++++++++++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index 8cb6a9238a..f3aeaf6375 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -233,6 +233,17 @@ static inline bool smmuv3_gerror_irq_enabled(SMMUv3State *s)
>  #define Q_CONS_WRAP(q) (((q)->cons & WRAP_MASK(q)) >> (q)->log2size)
>  #define Q_PROD_WRAP(q) (((q)->prod & WRAP_MASK(q)) >> (q)->log2size)
>  
> +static inline int smmuv3_q_ncmds(SMMUQueue *q)
> +{
> +    uint32_t prod = Q_PROD(q);
> +    uint32_t cons = Q_CONS(q);
> +
> +    if (Q_PROD_WRAP(q) == Q_CONS_WRAP(q))
> +        return prod - cons;
> +    else

Else doesn't add anything, also, it's qemu so {}

> +        return WRAP_MASK(q) - cons + prod;
> +}
> +
>  static inline bool smmuv3_q_full(SMMUQueue *q)
>  {
>      return ((q->cons ^ q->prod) & WRAP_INDEX_MASK(q)) == WRAP_MASK(q);
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index c94bfe6564..97ecca0764 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -1285,10 +1285,17 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
>      SMMUQueue *q = &s->cmdq;
>      SMMUCommandType type = 0;
> +    SMMUCommandBatch batch = {};
> +    uint32_t ncmds;
>  
>      if (!smmuv3_cmdq_enabled(s)) {
>          return 0;
>      }
> +
> +    ncmds = smmuv3_q_ncmds(q);
> +    batch.cmds = g_new0(Cmd, ncmds);
> +    batch.cons = g_new0(uint32_t, ncmds);

Where is batch.ncmds set?  It is cleared but I'm missing it being set to anything.

> +

> +    qemu_mutex_lock(&s->mutex);
> +    if (!cmd_error && batch.ncmds) {
> +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
> +            if (batch.ncmds) {
> +                q->cons = batch.cons[batch.ncmds - 1];
> +            } else {
> +                q->cons = batch.cons[0]; /* FIXME: Check */
> +            }

Totally non obvious that a return of false from the issue call means
illegal command type.  Maybe that will be obvious form comments
requested in previous patch review.


> +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
> +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
> +            cmd_error = SMMU_CERROR_ILL;
> +        }
> +    }
> +    qemu_mutex_unlock(&s->mutex);
> +
>      if (cmd_error) {
>          trace_smmuv3_cmdq_consume_error(smmu_cmd_string(type), cmd_error);
>          smmu_write_cmdq_err(s, cmd_error);
>          smmuv3_trigger_irq(s, SMMU_IRQ_GERROR, R_GERROR_CMDQ_ERR_MASK);
>      }
>  
> +    g_free(batch.cmds);
> +    g_free(batch.cons);
>      trace_smmuv3_cmdq_consume_out(Q_PROD(q), Q_CONS(q),
>                                    Q_PROD_WRAP(q), Q_CONS_WRAP(q));
>

Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Nicolin Chen 4 months ago

On Tue, Jul 15, 2025 at 11:46:09AM +0100, Jonathan Cameron wrote:
> >      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
> >      SMMUQueue *q = &s->cmdq;
> >      SMMUCommandType type = 0;
> > +    SMMUCommandBatch batch = {};
> > +    uint32_t ncmds;
> >  
> >      if (!smmuv3_cmdq_enabled(s)) {
> >          return 0;
> >      }
> > +
> > +    ncmds = smmuv3_q_ncmds(q);
> > +    batch.cmds = g_new0(Cmd, ncmds);
> > +    batch.cons = g_new0(uint32_t, ncmds);
> 
> Where is batch.ncmds set?  It is cleared but I'm missing it being set to anything.

smmuv3_accel_batch_cmd() internally sets that, every time it's
invoked to add a new command in the batch.

Shameer, let's add some comments explaining the batch function.

> > +
> 
> > +    qemu_mutex_lock(&s->mutex);
> > +    if (!cmd_error && batch.ncmds) {
> > +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
> > +            if (batch.ncmds) {
> > +                q->cons = batch.cons[batch.ncmds - 1];
> > +            } else {
> > +                q->cons = batch.cons[0]; /* FIXME: Check */
> > +            }
> 
> Totally non obvious that a return of false from the issue call means
> illegal command type.  Maybe that will be obvious form comments
> requested in previous patch review.

That's a good point. Shameer, I think we need some fine-grinding
here, validating the return value from the ioctl, for which the
kernel will only return -EIO or -ETIMEOUT on failure, indicating
either an SMMU_CERROR_ILL or an SMMU_CERROR_ATC_INV_SYNC.
 
> > +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type: %d\n",
> > +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
> > +            cmd_error = SMMU_CERROR_ILL;

Thanks
Nicolin

RE: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation commands to hw

Posted by Shameerali Kolothum Thodi via 4 months ago


> -----Original Message-----
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, July 15, 2025 6:23 PM
> To: Jonathan Cameron <jonathan.cameron@huawei.com>
> Cc: Shameerali Kolothum Thodi
> <shameerali.kolothum.thodi@huawei.com>; Linuxarm
> <linuxarm@huawei.com>; qemu-arm@nongnu.org; qemu-
> devel@nongnu.org; eric.auger@redhat.com; peter.maydell@linaro.org;
> jgg@nvidia.com; ddutile@redhat.com; berrange@redhat.com;
> nathanc@nvidia.com; mochs@nvidia.com; smostafa@google.com;
> Wangzhou (B) <wangzhou1@hisilicon.com>; jiangkunkun
> <jiangkunkun@huawei.com>; zhangfei.gao@linaro.org;
> zhenzhong.duan@intel.com; shameerkolothum@gmail.com
> Subject: Re: [RFC PATCH v3 13/15] hw/arm/smmuv3: Forward invalidation
> commands to hw
> 
> On Tue, Jul 15, 2025 at 11:46:09AM +0100, Jonathan Cameron wrote:
> > >      SMMUCmdError cmd_error = SMMU_CERROR_NONE;
> > >      SMMUQueue *q = &s->cmdq;
> > >      SMMUCommandType type = 0;
> > > +    SMMUCommandBatch batch = {};
> > > +    uint32_t ncmds;
> > >
> > >      if (!smmuv3_cmdq_enabled(s)) {
> > >          return 0;
> > >      }
> > > +
> > > +    ncmds = smmuv3_q_ncmds(q);
> > > +    batch.cmds = g_new0(Cmd, ncmds);
> > > +    batch.cons = g_new0(uint32_t, ncmds);
> >
> > Where is batch.ncmds set?  It is cleared but I'm missing it being set to
> anything.
> 
> smmuv3_accel_batch_cmd() internally sets that, every time it's
> invoked to add a new command in the batch.
> 
> Shameer, let's add some comments explaining the batch function.

Yes. Will add.

> 
> > > +
> >
> > > +    qemu_mutex_lock(&s->mutex);
> > > +    if (!cmd_error && batch.ncmds) {
> > > +        if (!smmuv3_accel_issue_cmd_batch(bs, &batch)) {
> > > +            if (batch.ncmds) {
> > > +                q->cons = batch.cons[batch.ncmds - 1];
> > > +            } else {
> > > +                q->cons = batch.cons[0]; /* FIXME: Check */
> > > +            }
> >
> > Totally non obvious that a return of false from the issue call means
> > illegal command type.  Maybe that will be obvious form comments
> > requested in previous patch review.
> 
> That's a good point. Shameer, I think we need some fine-grinding
> here, validating the return value from the ioctl, for which the
> kernel will only return -EIO or -ETIMEOUT on failure, indicating
> either an SMMU_CERROR_ILL or an SMMU_CERROR_ATC_INV_SYNC.

Yeah. I was not sure on this. Also on setting current cons pointer in case IOCTL 
return for some reason other than attempting the CMD. I will double check
this.
 
Thanks,
Shameer

> > > +            qemu_log_mask(LOG_GUEST_ERROR, "Illegal command type:
> %d\n",
> > > +                          CMD_TYPE(&batch.cmds[batch.ncmds]));
> > > +            cmd_error = SMMU_CERROR_ILL;
> 
> Thanks
> Nicolin