It appears unhelpful to me for flush_command_buffer() to block all
progress elsewhere for the given IOMMU by holding its lock while
waiting for command completion. Unless the lock is already held,
acquire it in send_iommu_command(). Release it in all cases in
flush_command_buffer(), before actually starting the wait loop.
Some of the involved functions did/do get called with the lock already
held: For amd_iommu_flush_intremap() we can simply move the locking
inside. For amd_iommu_flush_device() and amd_iommu_flush_all_caches()
the lock now gets dropped in the course of the function's operation.
Where touching function headers anyway, also adjust types used to be
better in line with our coding style and, where applicable, the
functions' callers.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Paul Durrant <paul@xen.org>
---
v2: Add comments. Adjust parameter types when function headers get
touched anyway.
--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -253,9 +253,10 @@ void amd_iommu_flush_pages(struct domain
unsigned int order);
void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
uint64_t gaddr, unsigned int order);
-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf);
+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
+ unsigned long flags);
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf);
-void amd_iommu_flush_all_caches(struct amd_iommu *iommu);
+void amd_iommu_flush_all_caches(struct amd_iommu *iommu, unsigned long flags);
/* find iommu for bdf */
struct amd_iommu *find_iommu_for_device(int seg, int bdf);
--- a/xen/drivers/passthrough/amd/iommu_cmd.c
+++ b/xen/drivers/passthrough/amd/iommu_cmd.c
@@ -23,11 +23,20 @@
#define CMD_COMPLETION_INIT 0
#define CMD_COMPLETION_DONE 1
+/*
+ * When @flags is non-NULL, the function will acquire the IOMMU lock,
+ * transferring lock ownership to the caller. When @flags is NULL,
+ * the lock is assumed to be already held.
+ */
static void send_iommu_command(struct amd_iommu *iommu,
- const uint32_t cmd[4])
+ const uint32_t cmd[4],
+ unsigned long *flags)
{
uint32_t tail;
+ if ( flags )
+ spin_lock_irqsave(&iommu->lock, *flags);
+
tail = iommu->cmd_buffer.tail + sizeof(cmd_entry_t);
if ( tail == iommu->cmd_buffer.size )
tail = 0;
@@ -49,8 +58,13 @@ static void send_iommu_command(struct am
writel(tail, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
}
+/*
+ * Callers need to hold the IOMMU lock, which will be released here before
+ * entering the loop to await command completion.
+ */
static void flush_command_buffer(struct amd_iommu *iommu,
- unsigned int timeout_base)
+ unsigned int timeout_base,
+ unsigned long flags)
{
static DEFINE_PER_CPU(uint64_t, poll_slot);
uint64_t *this_poll_slot = &this_cpu(poll_slot);
@@ -72,7 +86,9 @@ static void flush_command_buffer(struct
IOMMU_CMD_OPCODE_SHIFT, &cmd[1]);
cmd[2] = CMD_COMPLETION_DONE;
cmd[3] = 0;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, NULL);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
start = NOW();
timeout = start + (timeout_base ?: 100) * MILLISECS(threshold);
@@ -99,12 +115,19 @@ static void flush_command_buffer(struct
}
/* Build low level iommu command messages */
-static void invalidate_iommu_pages(struct amd_iommu *iommu,
- u64 io_addr, u16 domain_id, u16 order)
+
+/*
+ * The function will acquire the IOMMU lock, via its call to
+ * send_iommu_command(), and then transfer lock ownership to the caller.
+ */
+static unsigned long invalidate_iommu_pages(struct amd_iommu *iommu,
+ daddr_t io_addr, domid_t domain_id,
+ unsigned int order)
{
u64 addr_lo, addr_hi;
u32 cmd[4], entry;
int sflag = 0, pde = 0;
+ unsigned long flags;
ASSERT ( order == 0 || order == 9 || order == 18 );
@@ -152,16 +175,27 @@ static void invalidate_iommu_pages(struc
cmd[3] = entry;
cmd[0] = 0;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, &flags);
+
+ return flags;
}
-static void invalidate_iotlb_pages(struct amd_iommu *iommu,
- u16 maxpend, u32 pasid, u16 queueid,
- u64 io_addr, u16 dev_id, u16 order)
+/*
+ * The function will acquire the IOMMU lock, via its call to
+ * send_iommu_command(), and then transfer lock ownership to the caller.
+ */
+static unsigned long invalidate_iotlb_pages(struct amd_iommu *iommu,
+ unsigned int maxpend,
+ unsigned int pasid,
+ unsigned int queueid,
+ daddr_t io_addr,
+ unsigned int dev_id,
+ unsigned int order)
{
u64 addr_lo, addr_hi;
u32 cmd[4], entry;
int sflag = 0;
+ unsigned long flags;
ASSERT ( order == 0 || order == 9 || order == 18 );
@@ -222,9 +256,12 @@ static void invalidate_iotlb_pages(struc
IOMMU_INV_IOTLB_PAGES_ADDR_HIGH_SHIFT, &entry);
cmd[3] = entry;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, &flags);
+
+ return flags;
}
+/* Callers need to hold the IOMMU lock. */
static void invalidate_dev_table_entry(struct amd_iommu *iommu,
u16 device_id)
{
@@ -241,12 +278,18 @@ static void invalidate_dev_table_entry(s
&entry);
cmd[1] = entry;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, NULL);
}
-static void invalidate_interrupt_table(struct amd_iommu *iommu, u16 device_id)
+/*
+ * The function will acquire the IOMMU lock, via its call to
+ * send_iommu_command(), and then transfer lock ownership to the caller.
+ */
+static unsigned long invalidate_interrupt_table(struct amd_iommu *iommu,
+ uint16_t device_id)
{
u32 cmd[4], entry;
+ unsigned long flags;
cmd[3] = cmd[2] = 0;
set_field_in_reg_u32(device_id, 0,
@@ -257,9 +300,12 @@ static void invalidate_interrupt_table(s
IOMMU_CMD_OPCODE_MASK, IOMMU_CMD_OPCODE_SHIFT,
&entry);
cmd[1] = entry;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, &flags);
+
+ return flags;
}
+/* Callers need to hold the IOMMU lock. */
static void invalidate_iommu_all(struct amd_iommu *iommu)
{
u32 cmd[4], entry;
@@ -271,7 +317,7 @@ static void invalidate_iommu_all(struct
&entry);
cmd[1] = entry;
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, NULL);
}
void amd_iommu_flush_iotlb(u8 devfn, const struct pci_dev *pdev,
@@ -304,10 +350,9 @@ void amd_iommu_flush_iotlb(u8 devfn, con
maxpend = pdev->ats.queue_depth & 0xff;
/* send INVALIDATE_IOTLB_PAGES command */
- spin_lock_irqsave(&iommu->lock, flags);
- invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr, req_id, order);
- flush_command_buffer(iommu, iommu_dev_iotlb_timeout);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ flags = invalidate_iotlb_pages(iommu, maxpend, 0, queueid, daddr,
+ req_id, order);
+ flush_command_buffer(iommu, iommu_dev_iotlb_timeout, flags);
}
static void amd_iommu_flush_all_iotlbs(struct domain *d, daddr_t daddr,
@@ -336,15 +381,12 @@ static void _amd_iommu_flush_pages(struc
{
unsigned long flags;
struct amd_iommu *iommu;
- unsigned int dom_id = d->domain_id;
/* send INVALIDATE_IOMMU_PAGES command */
for_each_amd_iommu ( iommu )
{
- spin_lock_irqsave(&iommu->lock, flags);
- invalidate_iommu_pages(iommu, daddr, dom_id, order);
- flush_command_buffer(iommu, 0);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ flags = invalidate_iommu_pages(iommu, daddr, d->domain_id, order);
+ flush_command_buffer(iommu, 0, flags);
}
if ( ats_enabled )
@@ -362,39 +404,44 @@ void amd_iommu_flush_pages(struct domain
_amd_iommu_flush_pages(d, __dfn_to_daddr(dfn), order);
}
-void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf)
+/*
+ * Callers need to hold the IOMMU lock, which will be released here by
+ * calling flush_command_buffer().
+ */
+void amd_iommu_flush_device(struct amd_iommu *iommu, uint16_t bdf,
+ unsigned long flags)
{
ASSERT( spin_is_locked(&iommu->lock) );
invalidate_dev_table_entry(iommu, bdf);
- flush_command_buffer(iommu, 0);
+ flush_command_buffer(iommu, 0, flags);
}
void amd_iommu_flush_intremap(struct amd_iommu *iommu, uint16_t bdf)
{
- ASSERT( spin_is_locked(&iommu->lock) );
+ unsigned long flags;
- invalidate_interrupt_table(iommu, bdf);
- flush_command_buffer(iommu, 0);
+ flags = invalidate_interrupt_table(iommu, bdf);
+ flush_command_buffer(iommu, 0, flags);
}
-void amd_iommu_flush_all_caches(struct amd_iommu *iommu)
+/*
+ * Callers need to hold the IOMMU lock, which will be released here by
+ * calling flush_command_buffer().
+ */
+void amd_iommu_flush_all_caches(struct amd_iommu *iommu, unsigned long flags)
{
ASSERT( spin_is_locked(&iommu->lock) );
invalidate_iommu_all(iommu);
- flush_command_buffer(iommu, 0);
+ flush_command_buffer(iommu, 0, flags);
}
void amd_iommu_send_guest_cmd(struct amd_iommu *iommu, u32 cmd[])
{
unsigned long flags;
- spin_lock_irqsave(&iommu->lock, flags);
-
- send_iommu_command(iommu, cmd);
+ send_iommu_command(iommu, cmd, &flags);
/* TBD: Timeout selection may require peeking into cmd[]. */
- flush_command_buffer(iommu, 0);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ flush_command_buffer(iommu, 0, flags);
}
--- a/xen/drivers/passthrough/amd/iommu_guest.c
+++ b/xen/drivers/passthrough/amd/iommu_guest.c
@@ -449,8 +449,7 @@ static int do_invalidate_dte(struct doma
spin_lock_irqsave(&iommu->lock, flags);
dte_set_gcr3_table(mdte, hdom_id, gcr3_mfn << PAGE_SHIFT, gv, glx);
- amd_iommu_flush_device(iommu, req_id);
- spin_unlock_irqrestore(&iommu->lock, flags);
+ amd_iommu_flush_device(iommu, req_id, flags);
return 0;
}
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -921,13 +921,13 @@ static void enable_iommu(struct amd_iomm
set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
- if ( iommu->features.flds.ia_sup )
- amd_iommu_flush_all_caches(iommu);
-
iommu->enabled = 1;
+ if ( iommu->features.flds.ia_sup )
+ amd_iommu_flush_all_caches(iommu, flags);
+ else
out:
- spin_unlock_irqrestore(&iommu->lock, flags);
+ spin_unlock_irqrestore(&iommu->lock, flags);
}
static void disable_iommu(struct amd_iommu *iommu)
@@ -1554,9 +1554,8 @@ static int _invalidate_all_devices(
if ( iommu )
{
spin_lock_irqsave(&iommu->lock, flags);
- amd_iommu_flush_device(iommu, req_id);
+ amd_iommu_flush_device(iommu, req_id, flags);
amd_iommu_flush_intremap(iommu, req_id);
- spin_unlock_irqrestore(&iommu->lock, flags);
}
}
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -310,9 +310,7 @@ static int update_intremap_entry_from_io
entry.ptr32->flds.remap_en = false;
spin_unlock(lock);
- spin_lock(&iommu->lock);
amd_iommu_flush_intremap(iommu, req_id);
- spin_unlock(&iommu->lock);
spin_lock(lock);
}
@@ -527,11 +525,9 @@ static int update_intremap_entry_from_ms
if ( iommu->enabled )
{
- spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
if ( alias_id != req_id )
amd_iommu_flush_intremap(iommu, alias_id);
- spin_unlock_irqrestore(&iommu->lock, flags);
}
return 0;
@@ -567,11 +563,9 @@ static int update_intremap_entry_from_ms
entry.ptr32->flds.remap_en = false;
spin_unlock(lock);
- spin_lock(&iommu->lock);
amd_iommu_flush_intremap(iommu, req_id);
if ( alias_id != req_id )
amd_iommu_flush_intremap(iommu, alias_id);
- spin_unlock(&iommu->lock);
spin_lock(lock);
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -129,7 +129,7 @@ static void amd_iommu_setup_domain_devic
iommu_has_cap(iommu, PCI_CAP_IOTLB_SHIFT) )
dte->i = ats_enabled;
- amd_iommu_flush_device(iommu, req_id);
+ amd_iommu_flush_device(iommu, req_id, flags);
AMD_IOMMU_DEBUG("Setup I/O page table: device id = %#x, type = %#x, "
"root table = %#"PRIx64", "
@@ -138,8 +138,8 @@ static void amd_iommu_setup_domain_devic
page_to_maddr(hd->arch.amd.root_table),
domain->domain_id, hd->arch.amd.paging_mode);
}
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ else
+ spin_unlock_irqrestore(&iommu->lock, flags);
ASSERT(pcidevs_locked());
@@ -307,14 +307,15 @@ static void amd_iommu_disable_domain_dev
smp_wmb();
dte->v = true;
- amd_iommu_flush_device(iommu, req_id);
+ amd_iommu_flush_device(iommu, req_id, flags);
AMD_IOMMU_DEBUG("Disable: device id = %#x, "
"domain = %d, paging mode = %d\n",
req_id, domain->domain_id,
dom_iommu(domain)->arch.amd.paging_mode);
}
- spin_unlock_irqrestore(&iommu->lock, flags);
+ else
+ spin_unlock_irqrestore(&iommu->lock, flags);
ASSERT(pcidevs_locked());
@@ -455,9 +456,7 @@ static int amd_iommu_add_device(u8 devfn
iommu->dev_table.buffer + (bdf * IOMMU_DEV_TABLE_ENTRY_SIZE),
ivrs_mappings[bdf].intremap_table, iommu, iommu_intremap);
- amd_iommu_flush_device(iommu, bdf);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ amd_iommu_flush_device(iommu, bdf, flags);
}
amd_iommu_setup_domain_device(pdev->domain, iommu, devfn, pdev);
On 09/06/2021 10:27, Jan Beulich wrote: > It appears unhelpful to me for flush_command_buffer() to block all > progress elsewhere for the given IOMMU by holding its lock while > waiting for command completion. Unless the lock is already held, > acquire it in send_iommu_command(). Release it in all cases in > flush_command_buffer(), before actually starting the wait loop. > > Some of the involved functions did/do get called with the lock already > held: For amd_iommu_flush_intremap() we can simply move the locking > inside. For amd_iommu_flush_device() and amd_iommu_flush_all_caches() > the lock now gets dropped in the course of the function's operation. > > Where touching function headers anyway, also adjust types used to be > better in line with our coding style and, where applicable, the > functions' callers. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > Reviewed-by: Paul Durrant <paul@xen.org> Honestly, I'm -2 to this. It is horrible obfuscation of the logic. I agree with the premise of not holding the lock when we don't need to, but moving the lock/unlocks into different functions makes it impossible to follow. (Also, the static analysers are going to scream at this patch, and rightfully so IMO.) send_iommu_command() is static, as is flush_command_buffer(), so there is no need to split the locking like this AFAICT. Instead, each amd_iommu_flush_* external accessor knows exactly what it is doing, and whether a wait descriptor is wanted. flush_command_buffer() wants merging into send_iommu_command() as a "bool wait" parameter, at which point the locking and unlocking moves entirely into send_iommu_command() with no pointer games. ~Andrew
On 09.06.2021 12:53, Andrew Cooper wrote: > On 09/06/2021 10:27, Jan Beulich wrote: >> It appears unhelpful to me for flush_command_buffer() to block all >> progress elsewhere for the given IOMMU by holding its lock while >> waiting for command completion. Unless the lock is already held, >> acquire it in send_iommu_command(). Release it in all cases in >> flush_command_buffer(), before actually starting the wait loop. >> >> Some of the involved functions did/do get called with the lock already >> held: For amd_iommu_flush_intremap() we can simply move the locking >> inside. For amd_iommu_flush_device() and amd_iommu_flush_all_caches() >> the lock now gets dropped in the course of the function's operation. >> >> Where touching function headers anyway, also adjust types used to be >> better in line with our coding style and, where applicable, the >> functions' callers. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> Reviewed-by: Paul Durrant <paul@xen.org> > > Honestly, I'm -2 to this. It is horrible obfuscation of the logic. > > I agree with the premise of not holding the lock when we don't need to, > but moving the lock/unlocks into different functions makes it impossible > to follow. (Also, the static analysers are going to scream at this > patch, and rightfully so IMO.) Just to make it explicit - the immediate goal isn't so much to shrink the locked regions as much as possible, but first of all to avoid spin-waiting in flush_command_buffer() while holding the lock (and having IRQs off). > send_iommu_command() is static, as is flush_command_buffer(), so there > is no need to split the locking like this AFAICT. > > Instead, each amd_iommu_flush_* external accessor knows exactly what it > is doing, and whether a wait descriptor is wanted. > flush_command_buffer() wants merging into send_iommu_command() as a > "bool wait" parameter, at which point the locking and unlocking moves > entirely into send_iommu_command() with no pointer games. Then I can only guess you didn't look closely at the pci_amd_iommu.c part of the change? You may rest assured that I wouldn't have taken the chosen route if there was a reasonable alternative (within the current overall code structure). In fact I had tried first with what you suggest, and had to make it the way it was posted because of the requirements of these callers. I'm also pretty certain Paul wouldn't have given his R-b if there was this simple an alternative. Jan
On 09.06.2021 12:53, Andrew Cooper wrote: > On 09/06/2021 10:27, Jan Beulich wrote: >> It appears unhelpful to me for flush_command_buffer() to block all >> progress elsewhere for the given IOMMU by holding its lock while >> waiting for command completion. Unless the lock is already held, >> acquire it in send_iommu_command(). Release it in all cases in >> flush_command_buffer(), before actually starting the wait loop. >> >> Some of the involved functions did/do get called with the lock already >> held: For amd_iommu_flush_intremap() we can simply move the locking >> inside. For amd_iommu_flush_device() and amd_iommu_flush_all_caches() >> the lock now gets dropped in the course of the function's operation. >> >> Where touching function headers anyway, also adjust types used to be >> better in line with our coding style and, where applicable, the >> functions' callers. >> >> Signed-off-by: Jan Beulich <jbeulich@suse.com> >> Reviewed-by: Paul Durrant <paul@xen.org> > > Honestly, I'm -2 to this. It is horrible obfuscation of the logic. > > I agree with the premise of not holding the lock when we don't need to, > but moving the lock/unlocks into different functions makes it impossible > to follow. (Also, the static analysers are going to scream at this > patch, and rightfully so IMO.) > > send_iommu_command() is static, as is flush_command_buffer(), so there > is no need to split the locking like this AFAICT. > > Instead, each amd_iommu_flush_* external accessor knows exactly what it > is doing, and whether a wait descriptor is wanted. > flush_command_buffer() wants merging into send_iommu_command() as a > "bool wait" parameter, A further remark on this particular suggestion: While this is likely doable, the result will presumably look a little odd: Besides the various code paths calling send_iommu_command() and then flush_command_buffer(), the former is also called _by_ the latter. I can give this a try, but I'd like to be halfway certain I won't be asked to undo that later on. And of course this won't help with the split locking, only with some of the passing around of the saved / to-be-restored eflags. As an aside, the suggested "bool wait" parameter would (right now) only ever get passed a "true" argument, so I'm not convinced it's useful to have at this point, as then we'd also need to deal with the "false" case (requiring a completion interrupt to be arranged for, which we have no handler for) despite that code path being unused (and hence also un-testable). Jan
On 10.06.2021 13:58, Jan Beulich wrote: > On 09.06.2021 12:53, Andrew Cooper wrote: >> On 09/06/2021 10:27, Jan Beulich wrote: >>> It appears unhelpful to me for flush_command_buffer() to block all >>> progress elsewhere for the given IOMMU by holding its lock while >>> waiting for command completion. Unless the lock is already held, >>> acquire it in send_iommu_command(). Release it in all cases in >>> flush_command_buffer(), before actually starting the wait loop. >>> >>> Some of the involved functions did/do get called with the lock already >>> held: For amd_iommu_flush_intremap() we can simply move the locking >>> inside. For amd_iommu_flush_device() and amd_iommu_flush_all_caches() >>> the lock now gets dropped in the course of the function's operation. >>> >>> Where touching function headers anyway, also adjust types used to be >>> better in line with our coding style and, where applicable, the >>> functions' callers. >>> >>> Signed-off-by: Jan Beulich <jbeulich@suse.com> >>> Reviewed-by: Paul Durrant <paul@xen.org> >> >> Honestly, I'm -2 to this. It is horrible obfuscation of the logic. >> >> I agree with the premise of not holding the lock when we don't need to, >> but moving the lock/unlocks into different functions makes it impossible >> to follow. (Also, the static analysers are going to scream at this >> patch, and rightfully so IMO.) >> >> send_iommu_command() is static, as is flush_command_buffer(), so there >> is no need to split the locking like this AFAICT. >> >> Instead, each amd_iommu_flush_* external accessor knows exactly what it >> is doing, and whether a wait descriptor is wanted. >> flush_command_buffer() wants merging into send_iommu_command() as a >> "bool wait" parameter, > > A further remark on this particular suggestion: While this is likely > doable, the result will presumably look a little odd: Besides the > various code paths calling send_iommu_command() and then > flush_command_buffer(), the former is also called _by_ the latter. > I can give this a try, but I'd like to be halfway certain I won't > be asked to undo that later on. > > And of course this won't help with the split locking, only with some > of the passing around of the saved / to-be-restored eflags. Actually, different observation: I don't think there really is a need for either amd_iommu_flush_device() or amd_iommu_flush_all_caches() to be called with the lock held. The callers can drop the lock, and then all locking in iommu_cmd.c can likely be contained to send_iommu_command() alone, without any need to fold in flush_command_buffer(). Jan
© 2016 - 2025 Red Hat, Inc.