Interrupt-remapping-related fields are in the top 128-bit of the Device
Table Entry (DTE), which should be updated using 128-bit write based on the
AMD I/O Virtualization Techonology (IOMMU) Specification.
Therefore, modify set_dte_irq_entry() to use 128-bit cmpxchg. Also, use
struct dev_data->dte_sem to synchronize DTE access.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/iommu.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index a24986c2478b..4eb53bd40487 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3020,17 +3020,24 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
struct irq_remap_table *table)
{
- u64 dte;
- struct dev_table_entry *dev_table = get_dev_table(iommu);
+ u128 new, old;
+ struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
+ struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
+
+ if (dev_data)
+ down_write(&dev_data->dte_sem);
+
+ old = new = dte->data128[1];
+ new &= ~DTE_IRQ_PHYS_ADDR_MASK;
+ new |= iommu_virt_to_phys(table->table);
+ new |= DTE_IRQ_REMAP_INTCTL;
+ new |= DTE_INTTABLEN;
+ new |= DTE_IRQ_REMAP_ENABLE;
- dte = dev_table[devid].data[2];
- dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
- dte |= iommu_virt_to_phys(table->table);
- dte |= DTE_IRQ_REMAP_INTCTL;
- dte |= DTE_INTTABLEN;
- dte |= DTE_IRQ_REMAP_ENABLE;
+ WARN_ON(!try_cmpxchg128(&dte->data128[1], &old, new));
- dev_table[devid].data[2] = dte;
+ if (dev_data)
+ up_write(&dev_data->dte_sem);
}
static struct irq_remap_table *get_irq_table(struct amd_iommu *iommu, u16 devid)
--
2.34.1
On Thu, Aug 29, 2024 at 06:07:26PM +0000, Suravee Suthikulpanit wrote:
> Interrupt-remapping-related fields are in the top 128-bit of the Device
> Table Entry (DTE), which should be updated using 128-bit write based on the
> AMD I/O Virtualization Techonology (IOMMU) Specification.
>
> Therefore, modify set_dte_irq_entry() to use 128-bit cmpxchg. Also, use
> struct dev_data->dte_sem to synchronize DTE access.
>
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
> drivers/iommu/amd/iommu.c | 25 ++++++++++++++++---------
> 1 file changed, 16 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index a24986c2478b..4eb53bd40487 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -3020,17 +3020,24 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
> static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
> struct irq_remap_table *table)
> {
> - u64 dte;
> - struct dev_table_entry *dev_table = get_dev_table(iommu);
> + u128 new, old;
> + struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
> + struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
> +
> + if (dev_data)
> + down_write(&dev_data->dte_sem);
> +
> + old = new = dte->data128[1];
> + new &= ~DTE_IRQ_PHYS_ADDR_MASK;
> + new |= iommu_virt_to_phys(table->table);
> + new |= DTE_IRQ_REMAP_INTCTL;
> + new |= DTE_INTTABLEN;
> + new |= DTE_IRQ_REMAP_ENABLE;
>
> - dte = dev_table[devid].data[2];
> - dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
> - dte |= iommu_virt_to_phys(table->table);
> - dte |= DTE_IRQ_REMAP_INTCTL;
> - dte |= DTE_INTTABLEN;
> - dte |= DTE_IRQ_REMAP_ENABLE;
> + WARN_ON(!try_cmpxchg128(&dte->data128[1], &old, new));
This probably doesn't need cmpxchg because it is only touching 64 bit
quanta [2], just a lock is good enough and avoids the "what to do if
cmpxchg fails" question.
> - dev_table[devid].data[2] = dte;
But this should be
WRITE_ONCE(dev_table[devid].data[2], dte);
Beaucse it is writing to memory read back by HW.
Required for all the DTE touches everywhere.
Jason
Hi
On 8/30/2024 2:40 AM, Jason Gunthorpe wrote:
> On Thu, Aug 29, 2024 at 06:07:26PM +0000, Suravee Suthikulpanit wrote:
>> Interrupt-remapping-related fields are in the top 128-bit of the Device
>> Table Entry (DTE), which should be updated using 128-bit write based on the
>> AMD I/O Virtualization Techonology (IOMMU) Specification.
>>
>> Therefore, modify set_dte_irq_entry() to use 128-bit cmpxchg. Also, use
>> struct dev_data->dte_sem to synchronize DTE access.
>>
>> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> ---
>> drivers/iommu/amd/iommu.c | 25 ++++++++++++++++---------
>> 1 file changed, 16 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
>> index a24986c2478b..4eb53bd40487 100644
>> --- a/drivers/iommu/amd/iommu.c
>> +++ b/drivers/iommu/amd/iommu.c
>> @@ -3020,17 +3020,24 @@ static void iommu_flush_irt_and_complete(struct amd_iommu *iommu, u16 devid)
>> static void set_dte_irq_entry(struct amd_iommu *iommu, u16 devid,
>> struct irq_remap_table *table)
>> {
>> - u64 dte;
>> - struct dev_table_entry *dev_table = get_dev_table(iommu);
>> + u128 new, old;
>> + struct dev_table_entry *dte = &get_dev_table(iommu)[devid];
>> + struct iommu_dev_data *dev_data = search_dev_data(iommu, devid);
>> +
>> + if (dev_data)
>> + down_write(&dev_data->dte_sem);
>> +
>> + old = new = dte->data128[1];
>> + new &= ~DTE_IRQ_PHYS_ADDR_MASK;
>> + new |= iommu_virt_to_phys(table->table);
>> + new |= DTE_IRQ_REMAP_INTCTL;
>> + new |= DTE_INTTABLEN;
>> + new |= DTE_IRQ_REMAP_ENABLE;
>>
>> - dte = dev_table[devid].data[2];
>> - dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
>> - dte |= iommu_virt_to_phys(table->table);
>> - dte |= DTE_IRQ_REMAP_INTCTL;
>> - dte |= DTE_INTTABLEN;
>> - dte |= DTE_IRQ_REMAP_ENABLE;
>> + WARN_ON(!try_cmpxchg128(&dte->data128[1], &old, new));
>
> This probably doesn't need cmpxchg because it is only touching 64 bit
> quanta [2], just a lock is good enough and avoids the "what to do if
> cmpxchg fails" question.
Okay
>> - dev_table[devid].data[2] = dte;
>
> But this should be
>
> WRITE_ONCE(dev_table[devid].data[2], dte);
>
> Beaucse it is writing to memory read back by HW.
>
> Required for all the DTE touches everywhere.
Good point.
Thanks,
Suravee
© 2016 - 2026 Red Hat, Inc.