[PATCH v4] iommu/amd: Enhance "Completion-wait Time-out" error message

Dheeraj Kumar Srivastava posted 1 patch 3 months ago
There is a newer version of this series
drivers/iommu/amd/amd_iommu_types.h |  4 ++++
drivers/iommu/amd/iommu.c           | 28 +++++++++++++++++++++++++++-
2 files changed, 31 insertions(+), 1 deletion(-)
[PATCH v4] iommu/amd: Enhance "Completion-wait Time-out" error message
Posted by Dheeraj Kumar Srivastava 3 months ago
Current IOMMU driver prints "Completion-wait Time-out" error message with
insufficient information to further debug the issue.

Enhancing the error message as following:
1. Log IOMMU PCI device ID in the error message.
2. With "amd_iommu_dump=1" kernel command line option, dump entire
   command buffer entries including Head and Tail offset.

Dump the entire command buffer only on the first 'Completion-wait Time-out'
to avoid dmesg spam.

Signed-off-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
---
Changes since v3:
-> Dump the entire command buffer only on the first 'Completion-wait Time-out'
   when amd_iommu_dump=1, instead of dumping it on every occurrence.

 drivers/iommu/amd/amd_iommu_types.h |  4 ++++
 drivers/iommu/amd/iommu.c           | 28 +++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 95f63c5f6159..7576814f944d 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -247,6 +247,10 @@
 #define CMD_BUFFER_ENTRIES 512
 #define MMIO_CMD_SIZE_SHIFT 56
 #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
+#define MMIO_CMD_HEAD_MASK	GENMASK_ULL(18, 4)
+#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
+#define MMIO_CMD_TAIL_MASK	GENMASK_ULL(18, 4)
+#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
 
 /* constants for event buffer handling */
 #define EVT_BUFFER_SIZE		8192 /* 512 entries */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index eb348c63a8d0..abce078d2323 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1156,6 +1156,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
  *
  ****************************************************************************/
 
+static void dump_command_buffer(struct amd_iommu *iommu)
+{
+	struct iommu_cmd *cmd;
+	int head, tail, i;
+
+	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
+	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+
+	pr_err("CMD Buffer head=%d tail=%d\n", (int)(MMIO_CMD_BUFFER_HEAD(head)),
+	       (int)(MMIO_CMD_BUFFER_TAIL(tail)));
+
+	for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
+		cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
+		pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
+		       cmd->data[3]);
+	}
+}
+
+
 static int wait_on_sem(struct amd_iommu *iommu, u64 data)
 {
 	int i = 0;
@@ -1166,7 +1185,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
 	}
 
 	if (i == LOOP_TIMEOUT) {
-		pr_alert("Completion-Wait loop timed out\n");
+
+		pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
+			 iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
+			 PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
+
+		if (amd_iommu_dump)
+			DO_ONCE_LITE(dump_command_buffer, iommu);
+
 		return -EIO;
 	}
 
-- 
2.25.1
Re: [PATCH v4] iommu/amd: Enhance "Completion-wait Time-out" error message
Posted by Ankit Soni 3 months ago
Hello Dheeraj,

On Wed, Nov 05, 2025 at 01:33:42PM +0530, Dheeraj Kumar Srivastava wrote:
> Current IOMMU driver prints "Completion-wait Time-out" error message with
> insufficient information to further debug the issue.
> 
> Enhancing the error message as following:
> 1. Log IOMMU PCI device ID in the error message.
> 2. With "amd_iommu_dump=1" kernel command line option, dump entire
>    command buffer entries including Head and Tail offset.
> 
> Dump the entire command buffer only on the first 'Completion-wait Time-out'
> to avoid dmesg spam.
> 
> Signed-off-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
> ---
> Changes since v3:
> -> Dump the entire command buffer only on the first 'Completion-wait Time-out'
>    when amd_iommu_dump=1, instead of dumping it on every occurrence.
> 
>  drivers/iommu/amd/amd_iommu_types.h |  4 ++++
>  drivers/iommu/amd/iommu.c           | 28 +++++++++++++++++++++++++++-
>  2 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> index 95f63c5f6159..7576814f944d 100644
> --- a/drivers/iommu/amd/amd_iommu_types.h
> +++ b/drivers/iommu/amd/amd_iommu_types.h
> @@ -247,6 +247,10 @@
>  #define CMD_BUFFER_ENTRIES 512
>  #define MMIO_CMD_SIZE_SHIFT 56
>  #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)

It's worth adding comment for MASK 4-18.

> +#define MMIO_CMD_HEAD_MASK	GENMASK_ULL(18, 4)
> +#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
> +#define MMIO_CMD_TAIL_MASK	GENMASK_ULL(18, 4)

Since HEAD and TAIL masks are same, may be, you can use something like
MMIO_CMD_HEAD_TAIL_MASK().

> +#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
>  
>  /* constants for event buffer handling */
>  #define EVT_BUFFER_SIZE		8192 /* 512 entries */
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index eb348c63a8d0..abce078d2323 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -1156,6 +1156,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
>   *
>   ****************************************************************************/
>  
> +static void dump_command_buffer(struct amd_iommu *iommu)
> +{
> +	struct iommu_cmd *cmd;
> +	int head, tail, i;

Since readl returns u32, prefer u32 for head/tail to avoid any surprises
or sign-ext issues.
and similarly use %u in pr_err() below.

> +
> +	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
> +	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
> +
> +	pr_err("CMD Buffer head=%d tail=%d\n", (int)(MMIO_CMD_BUFFER_HEAD(head)),
> +	       (int)(MMIO_CMD_BUFFER_TAIL(tail)));
> +
> +	for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
> +		cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
> +		pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
> +		       cmd->data[3]);
> +	}
> +}
> +
> +

Remove extra line.

Best,
Ankit

>  static int wait_on_sem(struct amd_iommu *iommu, u64 data)
>  {
>  	int i = 0;
> @@ -1166,7 +1185,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
>  	}
>  
>  	if (i == LOOP_TIMEOUT) {
> -		pr_alert("Completion-Wait loop timed out\n");
> +
> +		pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
> +			 iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
> +			 PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
> +
> +		if (amd_iommu_dump)
> +			DO_ONCE_LITE(dump_command_buffer, iommu);
> +
>  		return -EIO;
>  	}
>  
> -- 
> 2.25.1
>
Re: [PATCH v4] iommu/amd: Enhance "Completion-wait Time-out" error message
Posted by Dheeraj Kumar Srivastava 2 months, 4 weeks ago
Hi Ankit,

Thanks for reviewing the patch.

On 11/7/2025 7:36 PM, Ankit Soni wrote:
> Hello Dheeraj,
> 
> On Wed, Nov 05, 2025 at 01:33:42PM +0530, Dheeraj Kumar Srivastava wrote:
>> Current IOMMU driver prints "Completion-wait Time-out" error message with
>> insufficient information to further debug the issue.
>>
>> Enhancing the error message as following:
>> 1. Log IOMMU PCI device ID in the error message.
>> 2. With "amd_iommu_dump=1" kernel command line option, dump entire
>>     command buffer entries including Head and Tail offset.
>>
>> Dump the entire command buffer only on the first 'Completion-wait Time-out'
>> to avoid dmesg spam.
>>
>> Signed-off-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
>> ---
>> Changes since v3:
>> -> Dump the entire command buffer only on the first 'Completion-wait Time-out'
>>     when amd_iommu_dump=1, instead of dumping it on every occurrence.
>>
>>   drivers/iommu/amd/amd_iommu_types.h |  4 ++++
>>   drivers/iommu/amd/iommu.c           | 28 +++++++++++++++++++++++++++-
>>   2 files changed, 31 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
>> index 95f63c5f6159..7576814f944d 100644
>> --- a/drivers/iommu/amd/amd_iommu_types.h
>> +++ b/drivers/iommu/amd/amd_iommu_types.h
>> @@ -247,6 +247,10 @@
>>   #define CMD_BUFFER_ENTRIES 512
>>   #define MMIO_CMD_SIZE_SHIFT 56
>>   #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
> 
> It's worth adding comment for MASK 4-18.
> 

Sure.

>> +#define MMIO_CMD_HEAD_MASK	GENMASK_ULL(18, 4)
>> +#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
>> +#define MMIO_CMD_TAIL_MASK	GENMASK_ULL(18, 4)
> 
> Since HEAD and TAIL masks are same, may be, you can use something like
> MMIO_CMD_HEAD_TAIL_MASK().
> 

Agree, but I’d prefer to keep them as separate definitions for clarity 
and potential future changes. Let me know if you have any further comments.

>> +#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
>>   
>>   /* constants for event buffer handling */
>>   #define EVT_BUFFER_SIZE		8192 /* 512 entries */
>> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
>> index eb348c63a8d0..abce078d2323 100644
>> --- a/drivers/iommu/amd/iommu.c
>> +++ b/drivers/iommu/amd/iommu.c
>> @@ -1156,6 +1156,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
>>    *
>>    ****************************************************************************/
>>   
>> +static void dump_command_buffer(struct amd_iommu *iommu)
>> +{
>> +	struct iommu_cmd *cmd;
>> +	int head, tail, i;
> 
> Since readl returns u32, prefer u32 for head/tail to avoid any surprises
> or sign-ext issues.

Sure.

> and similarly use %u in pr_err() below.

I’d prefer dumping the head and tail values as integers, so they can be 
easily mapped to the command buffer entries indexed by integer offsets. 
Let me know if you have any further comments?

> 
>> +
>> +	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
>> +	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
>> +
>> +	pr_err("CMD Buffer head=%d tail=%d\n", (int)(MMIO_CMD_BUFFER_HEAD(head)),
>> +	       (int)(MMIO_CMD_BUFFER_TAIL(tail)));
>> +
>> +	for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
>> +		cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
>> +		pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
>> +		       cmd->data[3]);
>> +	}
>> +}
>> +
>> +
> 
> Remove extra line.
> 

Sure.

Thanks
Dheeraj

> Best,
> Ankit
> 
>>   static int wait_on_sem(struct amd_iommu *iommu, u64 data)
>>   {
>>   	int i = 0;
>> @@ -1166,7 +1185,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
>>   	}
>>   
>>   	if (i == LOOP_TIMEOUT) {
>> -		pr_alert("Completion-Wait loop timed out\n");
>> +
>> +		pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
>> +			 iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
>> +			 PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
>> +
>> +		if (amd_iommu_dump)
>> +			DO_ONCE_LITE(dump_command_buffer, iommu);
>> +
>>   		return -EIO;
>>   	}
>>   
>> -- 
>> 2.25.1
>>

Re: [PATCH v4] iommu/amd: Enhance "Completion-wait Time-out" error message
Posted by Ankit Soni 2 months, 4 weeks ago
On Tue, Nov 11, 2025 at 04:54:40PM +0530, Dheeraj Kumar Srivastava wrote:
> Hi Ankit,
> 
> Thanks for reviewing the patch.
> 
> On 11/7/2025 7:36 PM, Ankit Soni wrote:
> > Hello Dheeraj,
> > 
> > On Wed, Nov 05, 2025 at 01:33:42PM +0530, Dheeraj Kumar Srivastava wrote:
> > > Current IOMMU driver prints "Completion-wait Time-out" error message with
> > > insufficient information to further debug the issue.
> > > 
> > > Enhancing the error message as following:
> > > 1. Log IOMMU PCI device ID in the error message.
> > > 2. With "amd_iommu_dump=1" kernel command line option, dump entire
> > >     command buffer entries including Head and Tail offset.
> > > 
> > > Dump the entire command buffer only on the first 'Completion-wait Time-out'
> > > to avoid dmesg spam.
> > > 
> > > Signed-off-by: Dheeraj Kumar Srivastava <dheerajkumar.srivastava@amd.com>
> > > ---
> > > Changes since v3:
> > > -> Dump the entire command buffer only on the first 'Completion-wait Time-out'
> > >     when amd_iommu_dump=1, instead of dumping it on every occurrence.
> > > 
> > >   drivers/iommu/amd/amd_iommu_types.h |  4 ++++
> > >   drivers/iommu/amd/iommu.c           | 28 +++++++++++++++++++++++++++-
> > >   2 files changed, 31 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
> > > index 95f63c5f6159..7576814f944d 100644
> > > --- a/drivers/iommu/amd/amd_iommu_types.h
> > > +++ b/drivers/iommu/amd/amd_iommu_types.h
> > > @@ -247,6 +247,10 @@
> > >   #define CMD_BUFFER_ENTRIES 512
> > >   #define MMIO_CMD_SIZE_SHIFT 56
> > >   #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT)
> > 
> > It's worth adding comment for MASK 4-18.
> > 
> 
> Sure.
> 
> > > +#define MMIO_CMD_HEAD_MASK	GENMASK_ULL(18, 4)
> > > +#define MMIO_CMD_BUFFER_HEAD(x) FIELD_GET(MMIO_CMD_HEAD_MASK, (x))
> > > +#define MMIO_CMD_TAIL_MASK	GENMASK_ULL(18, 4)
> > 
> > Since HEAD and TAIL masks are same, may be, you can use something like
> > MMIO_CMD_HEAD_TAIL_MASK().
> > 
> 
> Agree, but I’d prefer to keep them as separate definitions for clarity and
> potential future changes. Let me know if you have any further comments.
> 

Okay, that's fine.

> > > +#define MMIO_CMD_BUFFER_TAIL(x) FIELD_GET(MMIO_CMD_TAIL_MASK, (x))
> > >   /* constants for event buffer handling */
> > >   #define EVT_BUFFER_SIZE		8192 /* 512 entries */
> > > diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> > > index eb348c63a8d0..abce078d2323 100644
> > > --- a/drivers/iommu/amd/iommu.c
> > > +++ b/drivers/iommu/amd/iommu.c
> > > @@ -1156,6 +1156,25 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
> > >    *
> > >    ****************************************************************************/
> > > +static void dump_command_buffer(struct amd_iommu *iommu)
> > > +{
> > > +	struct iommu_cmd *cmd;
> > > +	int head, tail, i;
> > 
> > Since readl returns u32, prefer u32 for head/tail to avoid any surprises
> > or sign-ext issues.
> 
> Sure.
> 
> > and similarly use %u in pr_err() below.
> 
> I’d prefer dumping the head and tail values as integers, so they can be
> easily mapped to the command buffer entries indexed by integer offsets. Let
> me know if you have any further comments?
> 

%u and %d will print same value for mask above. Since you were seeking
non negative value, it will be good to keep %u.

Otherwise patch looks good to me.

Reviewed-by: Ankit Soni <Ankit.Soni@amd.com>

- Ankit
> > 
> > > +
> > > +	head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
> > > +	tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
> > > +
> > > +	pr_err("CMD Buffer head=%d tail=%d\n", (int)(MMIO_CMD_BUFFER_HEAD(head)),
> > > +	       (int)(MMIO_CMD_BUFFER_TAIL(tail)));
> > > +
> > > +	for (i = 0; i < CMD_BUFFER_ENTRIES; i++) {
> > > +		cmd = (struct iommu_cmd *)(iommu->cmd_buf + i * sizeof(*cmd));
> > > +		pr_err("%3d: %08x %08x %08x %08x\n", i, cmd->data[0], cmd->data[1], cmd->data[2],
> > > +		       cmd->data[3]);
> > > +	}
> > > +}
> > > +
> > > +
> > 
> > Remove extra line.
> > 
> 
> Sure.
> 
> Thanks
> Dheeraj
> 
> > Best,
> > Ankit
> > 
> > >   static int wait_on_sem(struct amd_iommu *iommu, u64 data)
> > >   {
> > >   	int i = 0;
> > > @@ -1166,7 +1185,14 @@ static int wait_on_sem(struct amd_iommu *iommu, u64 data)
> > >   	}
> > >   	if (i == LOOP_TIMEOUT) {
> > > -		pr_alert("Completion-Wait loop timed out\n");
> > > +
> > > +		pr_alert("IOMMU %04x:%02x:%02x.%01x: Completion-Wait loop timed out\n",
> > > +			 iommu->pci_seg->id, PCI_BUS_NUM(iommu->devid),
> > > +			 PCI_SLOT(iommu->devid), PCI_FUNC(iommu->devid));
> > > +
> > > +		if (amd_iommu_dump)
> > > +			DO_ONCE_LITE(dump_command_buffer, iommu);
> > > +
> > >   		return -EIO;
> > >   	}
> > > -- 
> > > 2.25.1
> > > 
>