Attempt to reduce the MSI entry writes, and the associated checking whether
memory decoding and MSI-X is enabled for the PCI device, when the MSI data
hasn't changed.
When using Interrupt Remapping the MSI entry will contain an index into
the remapping table, and it's in such remapping table where the MSI vector
and destination CPU is stored. As such, when using interrupt remapping,
changes to the interrupt affinity shouldn't result in changes to the MSI
entry, and the MSI entry update can be avoided.
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Ross Lagerwall <ross.lagerwall@citrix.com>
---
Changes since v1:
- Add more comments.
- Simplify dma_msi_set_affinity().
---
xen/arch/x86/include/asm/msi.h | 5 +++-
xen/arch/x86/msi.c | 45 ++++++++++++++++++++---------
xen/drivers/passthrough/vtd/iommu.c | 11 +++----
3 files changed, 39 insertions(+), 22 deletions(-)
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index 378b85ee947b..4301c58c7c4d 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -124,7 +124,8 @@ struct msi_desc {
int irq;
int remap_index; /* index in interrupt remapping table */
- struct msi_msg msg; /* Last set MSI message */
+ /* Last set MSI message in remappable format if applicable */
+ struct msi_msg msg;
};
/*
@@ -236,6 +237,8 @@ struct arch_msix {
};
void early_msi_init(void);
+
+/* If cpu_mask is NULL msg->dest32 is used as the destination APIC ID. */
void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask,
struct msi_msg *msg);
void __msi_set_enable(pci_sbdf_t sbdf, int pos, int enable);
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 8efe41b1526c..3c9fdc9b7bf5 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -152,11 +152,13 @@ static bool msix_memory_decoded(const struct pci_dev *dev, unsigned int pos)
}
/*
- * MSI message composition
+ * MSI message composition.
+ * If cpu_mask is NULL msg->dest32 is used as the destination APIC ID.
*/
void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg *msg)
{
- memset(msg, 0, sizeof(*msg));
+ msg->address = 0;
+ msg->data = 0;
if ( vector < FIRST_DYNAMIC_VECTOR )
return;
@@ -186,8 +188,6 @@ void msi_compose_msg(unsigned vector, const cpumask_t *cpu_mask, struct msi_msg
static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
- entry->msg = *msg;
-
if ( iommu_intremap != iommu_intremap_off )
{
int rc;
@@ -198,6 +198,20 @@ static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
return rc;
}
+ /*
+ * Avoid updating the MSI entry if the address and data fields haven't
+ * changed. When using interrupt remapping changing the MSI affinity
+ * shouldn't change the interrupt remapping table index, and hence the MSI
+ * address and data fields should remain the same.
+ */
+ if ( entry->msg.address == msg->address && entry->msg.data == msg->data )
+ {
+ entry->msg.dest32 = msg->dest32;
+ return 0;
+ }
+
+ entry->msg = *msg;
+
switch ( entry->msi_attrib.type )
{
case PCI_CAP_ID_MSI:
@@ -243,21 +257,15 @@ static int write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
void cf_check set_msi_affinity(struct irq_desc *desc, const cpumask_t *mask)
{
struct msi_msg msg;
- unsigned int dest;
struct msi_desc *msi_desc = desc->msi_desc;
- dest = set_desc_affinity(desc, mask);
- if ( dest == BAD_APICID || !msi_desc )
+ msg.dest32 = set_desc_affinity(desc, mask);
+ if ( msg.dest32 == BAD_APICID || !msi_desc )
return;
ASSERT(spin_is_locked(&desc->lock));
- msg = msi_desc->msg;
- msg.data &= ~MSI_DATA_VECTOR_MASK;
- msg.data |= MSI_DATA_VECTOR(desc->arch.vector);
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- msg.dest32 = dest;
+ msi_compose_msg(desc->arch.vector, NULL, &msg);
write_msi_msg(msi_desc, &msg);
}
@@ -1402,7 +1410,16 @@ int pci_restore_msi_state(struct pci_dev *pdev)
}
type = entry->msi_attrib.type;
- msg = entry->msg;
+ /*
+ * Instead of re-writing the current MSI message, regenerate it and
+ * force a write, as that's simpler than adding support for writing
+ * already translated messages.
+ */
+ msg.dest32 = entry->msg.dest32;
+ /* Pass NULL as the cpu mask to use the value in msg.dest32. */
+ msi_compose_msg(desc->arch.vector, NULL, &msg);
+ /* Zero the cached message to unconditionally trigger a re-write. */
+ entry->msg = (typeof(entry->msg)){};
write_msi_msg(entry, &msg);
for ( i = 0; ; )
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index a1927d9f126d..e03e9dfbc595 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1183,22 +1183,19 @@ static void cf_check dma_msi_set_affinity(
struct irq_desc *desc, const cpumask_t *mask)
{
struct msi_msg msg;
- unsigned int dest;
unsigned long flags;
struct vtd_iommu *iommu = desc->action->dev_id;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID){
+ msg.dest32 = set_desc_affinity(desc, mask);
+ if ( msg.dest32 == BAD_APICID )
+ {
dprintk(XENLOG_ERR VTDPREFIX, "Set iommu interrupt affinity error!\n");
return;
}
msi_compose_msg(desc->arch.vector, NULL, &msg);
- msg.dest32 = dest;
if (x2apic_enabled)
- msg.address_hi = dest & 0xFFFFFF00;
- ASSERT(!(msg.address_lo & MSI_ADDR_DEST_ID_MASK));
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = msg.dest32 & 0xFFFFFF00;
iommu->msi.msg = msg;
spin_lock_irqsave(&iommu->register_lock, flags);
--
2.48.1
© 2016 - 2025 Red Hat, Inc.