[PATCH v3 4/7] vpci: allow queueing of mapping operations

Mykyta Poturai posted 7 patches 3 hours ago
[PATCH v3 4/7] vpci: allow queueing of mapping operations
Posted by Mykyta Poturai 3 hours ago
From: Stewart Hildebrand <stewart.hildebrand@amd.com>

Introduce vPCI BAR mapping task queue. Store information needed to
map/unmap BARs in struct vpci_map_task. Allow queueing of BAR map/unmap
operations in a list, thus making it possible to perform multiple p2m
operations associated with single PCI device.

This is preparatory work for further changes that need to perform
multiple unmap/map operations before returning to guest.

At the moment, only a single operation will be queued. However, when
multiple operations are queued, there is a check in modify_bars() to
skip BARs already in the requested state that will no longer be
accurate. Remove this check in preparation of upcoming changes.

Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
Message-ID: <20260406191203.97662-3-stewart.hildebrand@amd.com>
---
v2->v3:
* new patch in this series, borrowed from [1]

[1]: https://patchew.org/Xen/20260406191203.97662-1-stewart.hildebrand@amd.com/20260406191203.97662-3-stewart.hildebrand@amd.com/
---
 xen/common/domain.c        |   5 +-
 xen/drivers/vpci/header.c  | 227 ++++++++++++++++++++++++++-----------
 xen/drivers/vpci/vpci.c    |  28 +----
 xen/include/xen/rangeset.h |   7 --
 xen/include/xen/vpci.h     |  21 ++--
 5 files changed, 179 insertions(+), 109 deletions(-)

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 5ef7db8f09..b1931be987 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -455,8 +455,6 @@ static int vcpu_teardown(struct vcpu *v)
  */
 static void vcpu_destroy(struct vcpu *v)
 {
-    vpci_vcpu_destroy(v);
-
     free_vcpu_struct(v);
 }
 
@@ -514,8 +512,7 @@ struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
     if ( arch_vcpu_create(v) != 0 )
         goto fail_sched;
 
-    if ( vpci_vcpu_init(v) )
-        goto fail_sched;
+    vpci_vcpu_init(v);
 
     d->vcpu[vcpu_id] = v;
     if ( vcpu_id != 0 )
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index 5bfb541b6a..451cdd3a6f 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -23,6 +23,7 @@
 #include <xen/lib.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
+#include <xen/xvmalloc.h>
 
 #include <xsm/xsm.h>
 
@@ -35,7 +36,7 @@
 
 struct map_data {
     struct domain *d;
-    const struct vpci_bar *bar;
+    const struct vpci_bar_map *bar;
     bool map;
 };
 
@@ -174,32 +175,20 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
         ASSERT_UNREACHABLE();
 }
 
-bool vpci_process_pending(struct vcpu *v)
+static int vpci_process_map_task(const struct pci_dev *pdev,
+                                 struct vpci_map_task *task)
 {
-    const struct pci_dev *pdev = v->vpci.pdev;
-    struct vpci_header *header = NULL;
     unsigned int i;
 
-    if ( !pdev )
-        return false;
-
-    read_lock(&v->domain->pci_lock);
-
-    if ( !pdev->vpci || (v->domain != pdev->domain) )
-    {
-        v->vpci.pdev = NULL;
-        read_unlock(&v->domain->pci_lock);
-        return false;
-    }
+    ASSERT(rw_is_locked(&pdev->domain->pci_lock));
 
-    header = &pdev->vpci->header;
-    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
     {
-        struct vpci_bar *bar = &header->bars[i];
-        struct rangeset *mem = v->vpci.mem[i];
+        struct vpci_bar_map *bar = &task->bars[i];
+        struct rangeset *mem = bar->mem;
         struct map_data data = {
-            .d = v->domain,
-            .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
+            .d = pdev->domain,
+            .map = task->cmd & PCI_COMMAND_MEMORY,
             .bar = bar,
         };
         int rc;
@@ -210,58 +199,116 @@ bool vpci_process_pending(struct vcpu *v)
         rc = rangeset_consume_ranges(mem, map_range, &data);
 
         if ( rc == -ERESTART )
-        {
-            read_unlock(&v->domain->pci_lock);
-            return true;
-        }
+            return rc;
 
         if ( rc )
         {
             spin_lock(&pdev->vpci->lock);
             /* Disable memory decoding unconditionally on failure. */
-            modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY,
-                            false);
+            modify_decoding(pdev, task->cmd & ~PCI_COMMAND_MEMORY, false);
             spin_unlock(&pdev->vpci->lock);
 
-            /* Clean all the rangesets */
-            for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
-                if ( !rangeset_is_empty(v->vpci.mem[i]) )
-                     rangeset_purge(v->vpci.mem[i]);
+            if ( !is_hardware_domain(pdev->domain) )
+                domain_crash(pdev->domain);
+
+            return rc;
+        }
+    }
+
+    spin_lock(&pdev->vpci->lock);
+    modify_decoding(pdev, task->cmd, task->rom_only);
+    spin_unlock(&pdev->vpci->lock);
+
+    return 0;
+}
+
+static void destroy_map_task(struct vpci_map_task *task)
+{
+    unsigned int i;
+
+    if ( !task )
+    {
+        ASSERT_UNREACHABLE();
+        return;
+    }
+
+    for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
+        rangeset_destroy(task->bars[i].mem);
+
+    xvfree(task);
+}
+
+static void clear_map_queue(struct vcpu *v)
+{
+    struct vpci_map_task *task;
+
+    while ( (task = list_first_entry_or_null(&v->vpci.task_queue,
+                                             struct vpci_map_task,
+                                             next)) != NULL )
+    {
+        list_del(&task->next);
+        destroy_map_task(task);
+    }
+}
+
+bool vpci_process_pending(struct vcpu *v)
+{
+    const struct pci_dev *pdev = v->vpci.pdev;
+    struct vpci_map_task *task;
 
-            v->vpci.pdev = NULL;
+    if ( !pdev )
+        return false;
 
+    read_lock(&v->domain->pci_lock);
+
+    if ( !pdev->vpci || (v->domain != pdev->domain) )
+    {
+        clear_map_queue(v);
+        v->vpci.pdev = NULL;
+        read_unlock(&v->domain->pci_lock);
+        return false;
+    }
+
+    while ( (task = list_first_entry_or_null(&v->vpci.task_queue,
+                                             struct vpci_map_task,
+                                             next)) != NULL )
+    {
+        int rc = vpci_process_map_task(pdev, task);
+
+        if ( rc == -ERESTART )
+        {
             read_unlock(&v->domain->pci_lock);
+            return true;
+        }
 
-            if ( !is_hardware_domain(v->domain) )
-                domain_crash(v->domain);
+        list_del(&task->next);
+        destroy_map_task(task);
 
-            return false;
+        if ( rc )
+        {
+            clear_map_queue(v);
+            break;
         }
     }
     v->vpci.pdev = NULL;
 
-    spin_lock(&pdev->vpci->lock);
-    modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only);
-    spin_unlock(&pdev->vpci->lock);
-
     read_unlock(&v->domain->pci_lock);
 
     return false;
 }
 
 static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
-                            uint16_t cmd)
+                            struct vpci_map_task *task)
 {
-    struct vpci_header *header = &pdev->vpci->header;
     int rc = 0;
     unsigned int i;
 
     ASSERT(rw_is_write_locked(&d->pci_lock));
 
-    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
     {
-        struct vpci_bar *bar = &header->bars[i];
-        struct rangeset *mem = current->vpci.mem[i];
+        struct vpci_bar_map *bar = &task->bars[i];
+        struct rangeset *mem = bar->mem;
         struct map_data data = { .d = d, .map = true, .bar = bar };
 
         if ( rangeset_is_empty(mem) )
@@ -281,15 +328,52 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
         }
     }
     if ( !rc )
-        modify_decoding(pdev, cmd, false);
+        modify_decoding(pdev, task->cmd, false);
 
     return rc;
 }
 
-static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
+static struct vpci_map_task *alloc_map_task(const struct pci_dev *pdev,
+                                            uint16_t cmd, bool rom_only)
+{
+    struct vpci_map_task *task;
+    unsigned int i;
+
+    task = xvzalloc(struct vpci_map_task);
+
+    if ( !task )
+        return NULL;
+
+    for ( i = 0; i < ARRAY_SIZE(task->bars); i++ )
+    {
+        if ( !MAPPABLE_BAR(&pdev->vpci->header.bars[i]) )
+            continue;
+
+        task->bars[i].mem = rangeset_new(pdev->domain, NULL,
+                                         RANGESETF_no_print);
+
+        if ( !task->bars[i].mem )
+        {
+            destroy_map_task(task);
+            return NULL;
+        }
+
+        task->bars[i].addr = pdev->vpci->header.bars[i].addr;
+        task->bars[i].guest_addr = pdev->vpci->header.bars[i].guest_addr;
+    }
+
+    task->cmd = cmd;
+    task->rom_only = rom_only;
+
+    return task;
+}
+
+static void defer_map(const struct pci_dev *pdev, struct vpci_map_task *task)
 {
     struct vcpu *curr = current;
 
+    ASSERT(!curr->vpci.pdev || curr->vpci.pdev == pdev);
+
     /*
      * FIXME: when deferring the {un}map the state of the device should not
      * be trusted. For example the enable bit is toggled after the device
@@ -297,8 +381,8 @@ static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
      * started for the same device if the domain is not well-behaved.
      */
     curr->vpci.pdev = pdev;
-    curr->vpci.cmd = cmd;
-    curr->vpci.rom_only = rom_only;
+    list_add_tail(&task->next, &curr->vpci.task_queue);
+
     /*
      * Raise a scheduler softirq in order to prevent the guest from resuming
      * execution with pending mapping operations, to trigger the invocation
@@ -313,11 +397,17 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
     struct pci_dev *tmp;
     const struct domain *d;
     const struct vpci_msix *msix = pdev->vpci->msix;
+    struct vpci_map_task *task;
     unsigned int i, j;
     int rc;
 
     ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
 
+    task = alloc_map_task(pdev, cmd, rom_only);
+
+    if ( !task )
+        return -ENOMEM;
+
     /*
      * Create a rangeset per BAR that represents the current device memory
      * region and compare it against all the currently active BAR memory
@@ -333,19 +423,18 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
     for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
     {
         struct vpci_bar *bar = &header->bars[i];
-        struct rangeset *mem = current->vpci.mem[i];
+        struct rangeset *mem = task->bars[i].mem;
         unsigned long start = PFN_DOWN(bar->addr);
         unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
         unsigned long start_guest = PFN_DOWN(bar->guest_addr);
         unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1);
 
-        ASSERT(mem);
+        if ( !mem )
+            continue;
 
         if ( !MAPPABLE_BAR(bar) ||
              (rom_only ? bar->type != VPCI_BAR_ROM
-                       : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) ||
-             /* Skip BARs already in the requested state. */
-             bar->enabled == !!(cmd & PCI_COMMAND_MEMORY) )
+                       : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) )
             continue;
 
         if ( !pci_check_bar(pdev, _mfn(start), _mfn(end)) )
@@ -368,7 +457,8 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
             gprintk(XENLOG_G_WARNING,
                     "%pp: can't map BAR%u - offset mismatch: %#lx vs %#lx\n",
                     &pdev->sbdf, i, bar->guest_addr, bar->addr);
-            return -EINVAL;
+            rc = -EINVAL;
+            goto fail;
         }
 
         rc = rangeset_add_range(mem, start_guest, end_guest);
@@ -376,13 +466,13 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
         {
             printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
                    start_guest, end_guest, rc);
-            return rc;
+            goto fail;
         }
 
         /* Check for overlap with the already setup BAR ranges. */
         for ( j = 0; j < i; j++ )
         {
-            struct rangeset *prev_mem = current->vpci.mem[j];
+            struct rangeset *prev_mem = task->bars[j].mem;
 
             if ( rangeset_is_empty(prev_mem) )
                 continue;
@@ -393,7 +483,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
                 gprintk(XENLOG_WARNING,
                        "%pp: failed to remove overlapping range [%lx, %lx]: %d\n",
                         &pdev->sbdf, start_guest, end_guest, rc);
-                return rc;
+                goto fail;
             }
         }
 
@@ -403,7 +493,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
             gprintk(XENLOG_WARNING,
                     "%pp: failed to sanitize BAR#%u memory: %d\n",
                     &pdev->sbdf, i, rc);
-            return rc;
+            goto fail;
         }
     }
 
@@ -414,9 +504,9 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
         unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
                                      vmsix_table_size(pdev->vpci, i) - 1);
 
-        for ( j = 0; j < ARRAY_SIZE(current->vpci.mem); j++ )
+        for ( j = 0; j < ARRAY_SIZE(task->bars); j++ )
         {
-            struct rangeset *mem = current->vpci.mem[j];
+            struct rangeset *mem = task->bars[j].mem;
 
             if ( rangeset_is_empty(mem) )
                 continue;
@@ -427,7 +517,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
                 gprintk(XENLOG_WARNING,
                        "%pp: failed to remove MSIX table [%lx, %lx]: %d\n",
                         &pdev->sbdf, start, end, rc);
-                return rc;
+                goto fail;
             }
         }
     }
@@ -471,7 +561,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
                 for ( j = 0; j < ARRAY_SIZE(header->bars); j++)
                 {
                     const struct vpci_bar *bar = &header->bars[j];
-                    struct rangeset *mem = current->vpci.mem[j];
+                    struct rangeset *mem = task->bars[j].mem;
 
                     if ( !rangeset_overlaps_range(mem, start, end) ||
                          /*
@@ -490,7 +580,7 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
                         gprintk(XENLOG_WARNING,
                                 "%pp: failed to remove [%lx, %lx]: %d\n",
                                 &pdev->sbdf, start, end, rc);
-                        return rc;
+                        goto fail;
                     }
                 }
             }
@@ -513,12 +603,19 @@ int vpci_modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
          * will always be to establish mappings and process all the BARs.
          */
         ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
-        return apply_map(pdev->domain, pdev, cmd);
+        rc = apply_map(pdev->domain, pdev, task);
+        destroy_map_task(task);
+        return rc;
     }
 
-    defer_map(pdev, cmd, rom_only);
+    defer_map(pdev, task);
 
     return 0;
+
+ fail:
+    destroy_map_task(task);
+
+    return rc;
 }
 
 static void cf_check cmd_write(
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index d069ca6d9c..ce9fb5b357 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -24,33 +24,9 @@
 
 #ifdef __XEN__
 
-void vpci_vcpu_destroy(struct vcpu *v)
+void vpci_vcpu_init(struct vcpu *v)
 {
-    if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
-        return;
-
-    for ( unsigned int i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
-        RANGESET_DESTROY(v->vpci.mem[i]);
-}
-
-int vpci_vcpu_init(struct vcpu *v)
-{
-    unsigned int i;
-
-    if ( !has_vpci(v->domain) && !is_idle_domain(v->domain) )
-        return 0;
-
-    for ( i = 0; i < ARRAY_SIZE(v->vpci.mem); i++ )
-    {
-        char str[32];
-
-        snprintf(str, sizeof(str), "%pv:BAR%u", v, i);
-        v->vpci.mem[i] = rangeset_new(v->domain, str, RANGESETF_no_print);
-        if ( !v->vpci.mem[i] )
-            return -ENOMEM;
-    }
-
-    return 0;
+    INIT_LIST_HEAD(&v->vpci.task_queue);
 }
 
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index f01e00ec92..817505badf 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -40,13 +40,6 @@ struct rangeset *rangeset_new(
 void rangeset_destroy(
     struct rangeset *r);
 
-/* Destroy a rangeset, and zero the pointer to it. */
-#define RANGESET_DESTROY(r)  \
-    ({                       \
-        rangeset_destroy(r); \
-        (r) = NULL;          \
-    })
-
 /*
  * Set a limit on the number of ranges that may exist in set @r.
  * NOTE: This must be called while @r is empty.
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index b55bacbe6e..e34f7abe6d 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -19,8 +19,7 @@
  */
 #define VPCI_MAX_VIRT_DEV       (PCI_SLOT(~0) + 1)
 
-void vpci_vcpu_destroy(struct vcpu *v);
-int vpci_vcpu_init(struct vcpu *v);
+void vpci_vcpu_init(struct vcpu *v);
 
 /* Assign vPCI to device by adding handlers. */
 int __must_check vpci_assign_device(struct pci_dev *pdev);
@@ -155,14 +154,23 @@ struct vpci {
 };
 
 #ifdef __XEN__
-struct vpci_vcpu {
+struct vpci_map_task {
     /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
-    const struct pci_dev *pdev;
-    struct rangeset *mem[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
+    struct list_head next;
+    struct vpci_bar_map {
+        uint64_t addr;
+        uint64_t guest_addr;
+        struct rangeset *mem;
+    } bars[ARRAY_SIZE(((struct vpci_header *)NULL)->bars)];
     uint16_t cmd;
     bool rom_only : 1;
 };
 
+struct vpci_vcpu {
+    const struct pci_dev *pdev;
+    struct list_head task_queue;
+};
+
 void vpci_dump_msi(void);
 
 /* Arch-specific vPCI MSI helpers. */
@@ -207,8 +215,7 @@ bool vpci_ecam_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int len,
 #else /* !CONFIG_HAS_VPCI */
 struct vpci_vcpu {};
 
-static inline void vpci_vcpu_destroy(struct vcpu *v) { }
-static inline int vpci_vcpu_init(struct vcpu *v) { return 0; }
+static inline void vpci_vcpu_init(struct vcpu *v) { }
 
 static inline int vpci_reinit_ext_capabilities(struct pci_dev *pdev)
 {
-- 
2.51.2