[PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR

Oleksandr Andrushchenko posted 11 patches 3 years, 2 months ago
There is a newer version of this series
[PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR
Posted by Oleksandr Andrushchenko 3 years, 2 months ago
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Instead of handling a single range set, that contains all the memory
regions of all the BARs and ROM, have them per BAR.

This is in preparation of making non-identity mappings in p2m for the
MMIOs/ROM.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
 xen/drivers/vpci/header.c | 172 ++++++++++++++++++++++++++------------
 xen/include/xen/vpci.h    |   3 +-
 2 files changed, 122 insertions(+), 53 deletions(-)

diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index ec4d215f36ff..9c603d26d302 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -131,49 +131,75 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
 
 bool vpci_process_pending(struct vcpu *v)
 {
-    if ( v->vpci.mem )
+    if ( v->vpci.num_mem_ranges )
     {
         struct map_data data = {
             .d = v->domain,
             .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
         };
-        int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
+        struct pci_dev *pdev = v->vpci.pdev;
+        struct vpci_header *header = &pdev->vpci->header;
+        unsigned int i;
 
-        if ( rc == -ERESTART )
-            return true;
+        for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+        {
+            struct vpci_bar *bar = &header->bars[i];
+            int rc;
 
-        spin_lock(&v->vpci.pdev->vpci->lock);
-        /* Disable memory decoding unconditionally on failure. */
-        modify_decoding(v->vpci.pdev,
-                        rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
-                        !rc && v->vpci.rom_only);
-        spin_unlock(&v->vpci.pdev->vpci->lock);
+            if ( !bar->mem )
+                continue;
 
-        rangeset_destroy(v->vpci.mem);
-        v->vpci.mem = NULL;
-        if ( rc )
-            /*
-             * FIXME: in case of failure remove the device from the domain.
-             * Note that there might still be leftover mappings. While this is
-             * safe for Dom0, for DomUs the domain will likely need to be
-             * killed in order to avoid leaking stale p2m mappings on
-             * failure.
-             */
-            vpci_remove_device(v->vpci.pdev);
+            rc = rangeset_consume_ranges(bar->mem, map_range, &data);
+
+            if ( rc == -ERESTART )
+                return true;
+
+            spin_lock(&pdev->vpci->lock);
+            /* Disable memory decoding unconditionally on failure. */
+            modify_decoding(pdev,
+                            rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
+                            !rc && v->vpci.rom_only);
+            spin_unlock(&pdev->vpci->lock);
+
+            rangeset_destroy(bar->mem);
+            bar->mem = NULL;
+            v->vpci.num_mem_ranges--;
+            if ( rc )
+                /*
+                 * FIXME: in case of failure remove the device from the domain.
+                 * Note that there might still be leftover mappings. While this is
+                 * safe for Dom0, for DomUs the domain will likely need to be
+                 * killed in order to avoid leaking stale p2m mappings on
+                 * failure.
+                 */
+                vpci_remove_device(pdev);
+        }
     }
 
     return false;
 }
 
 static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
-                            struct rangeset *mem, uint16_t cmd)
+                            uint16_t cmd)
 {
     struct map_data data = { .d = d, .map = true };
-    int rc;
+    struct vpci_header *header = &pdev->vpci->header;
+    int rc = 0;
+    unsigned int i;
+
+    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    {
+        struct vpci_bar *bar = &header->bars[i];
 
-    while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART )
-        process_pending_softirqs();
-    rangeset_destroy(mem);
+        if ( !bar->mem )
+            continue;
+
+        while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
+                                              &data)) == -ERESTART )
+            process_pending_softirqs();
+        rangeset_destroy(bar->mem);
+        bar->mem = NULL;
+    }
     if ( !rc )
         modify_decoding(pdev, cmd, false);
 
@@ -181,7 +207,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
 }
 
 static void defer_map(struct domain *d, struct pci_dev *pdev,
-                      struct rangeset *mem, uint16_t cmd, bool rom_only)
+                      uint16_t cmd, bool rom_only, uint8_t num_mem_ranges)
 {
     struct vcpu *curr = current;
 
@@ -192,9 +218,9 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
      * started for the same device if the domain is not well-behaved.
      */
     curr->vpci.pdev = pdev;
-    curr->vpci.mem = mem;
     curr->vpci.cmd = cmd;
     curr->vpci.rom_only = rom_only;
+    curr->vpci.num_mem_ranges = num_mem_ranges;
     /*
      * Raise a scheduler softirq in order to prevent the guest from resuming
      * execution with pending mapping operations, to trigger the invocation
@@ -206,42 +232,47 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
 static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
 {
     struct vpci_header *header = &pdev->vpci->header;
-    struct rangeset *mem = rangeset_new(NULL, NULL, 0);
     struct pci_dev *tmp, *dev = NULL;
     const struct vpci_msix *msix = pdev->vpci->msix;
-    unsigned int i;
+    unsigned int i, j;
     int rc;
-
-    if ( !mem )
-        return -ENOMEM;
+    uint8_t num_mem_ranges;
 
     /*
-     * Create a rangeset that represents the current device BARs memory region
+     * Create a rangeset per BAR that represents the current device memory region
      * and compare it against all the currently active BAR memory regions. If
      * an overlap is found, subtract it from the region to be mapped/unmapped.
      *
-     * First fill the rangeset with all the BARs of this device or with the ROM
+     * First fill the rangesets with all the BARs of this device or with the ROM
      * BAR only, depending on whether the guest is toggling the memory decode
      * bit of the command register, or the enable bit of the ROM BAR register.
      */
     for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
     {
-        const struct vpci_bar *bar = &header->bars[i];
+        struct vpci_bar *bar = &header->bars[i];
         unsigned long start = PFN_DOWN(bar->addr);
         unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
 
+        bar->mem = NULL;
+
         if ( !MAPPABLE_BAR(bar) ||
              (rom_only ? bar->type != VPCI_BAR_ROM
                        : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) )
             continue;
 
-        rc = rangeset_add_range(mem, start, end);
+        bar->mem = rangeset_new(NULL, NULL, 0);
+        if ( !bar->mem )
+        {
+            rc = -ENOMEM;
+            goto fail;
+        }
+
+        rc = rangeset_add_range(bar->mem, start, end);
         if ( rc )
         {
             printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
                    start, end, rc);
-            rangeset_destroy(mem);
-            return rc;
+            goto fail;
         }
     }
 
@@ -252,14 +283,21 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
         unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
                                      vmsix_table_size(pdev->vpci, i) - 1);
 
-        rc = rangeset_remove_range(mem, start, end);
-        if ( rc )
+        for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
         {
-            printk(XENLOG_G_WARNING
-                   "Failed to remove MSIX table [%lx, %lx]: %d\n",
-                   start, end, rc);
-            rangeset_destroy(mem);
-            return rc;
+            const struct vpci_bar *bar = &header->bars[j];
+
+            if ( !bar->mem )
+                continue;
+
+            rc = rangeset_remove_range(bar->mem, start, end);
+            if ( rc )
+            {
+                printk(XENLOG_G_WARNING
+                       "Failed to remove MSIX table [%lx, %lx]: %d\n",
+                       start, end, rc);
+                goto fail;
+            }
         }
     }
 
@@ -291,7 +329,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
             unsigned long start = PFN_DOWN(bar->addr);
             unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
 
-            if ( !bar->enabled || !rangeset_overlaps_range(mem, start, end) ||
+            if ( !bar->enabled ||
+                 !rangeset_overlaps_range(bar->mem, start, end) ||
                  /*
                   * If only the ROM enable bit is toggled check against other
                   * BARs in the same device for overlaps, but not against the
@@ -300,13 +339,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
                  (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) )
                 continue;
 
-            rc = rangeset_remove_range(mem, start, end);
+            rc = rangeset_remove_range(bar->mem, start, end);
             if ( rc )
             {
                 printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
                        start, end, rc);
-                rangeset_destroy(mem);
-                return rc;
+                goto fail;
             }
         }
     }
@@ -324,12 +362,42 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
          * will always be to establish mappings and process all the BARs.
          */
         ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
-        return apply_map(pdev->domain, pdev, mem, cmd);
+        return apply_map(pdev->domain, pdev, cmd);
     }
 
-    defer_map(dev->domain, dev, mem, cmd, rom_only);
+    /* Find out how many memory ranges has left after MSI and overlaps. */
+    num_mem_ranges = 0;
+    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    {
+        struct vpci_bar *bar = &header->bars[i];
+
+        if ( !rangeset_is_empty(bar->mem) )
+            num_mem_ranges++;
+    }
+
+    /*
+     * There are cases when PCI device, root port for example, has neither
+     * memory space nor IO. In this case PCI command register write is
+     * missed resulting in the underlying PCI device not functional, so:
+     *   - if there are no regions write the command register now
+     *   - if there are regions then defer work and write later on
+     */
+    if ( !num_mem_ranges )
+        pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
+    else
+        defer_map(dev->domain, dev, cmd, rom_only, num_mem_ranges);
 
     return 0;
+
+fail:
+    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
+    {
+        struct vpci_bar *bar = &header->bars[i];
+
+        rangeset_destroy(bar->mem);
+        bar->mem = NULL;
+    }
+    return rc;
 }
 
 static void cmd_write(const struct pci_dev *pdev, unsigned int reg,
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index a0320b22cb36..352e02d0106d 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -80,6 +80,7 @@ struct vpci {
             /* Guest view of the BAR. */
             uint64_t guest_addr;
             uint64_t size;
+            struct rangeset *mem;
             enum {
                 VPCI_BAR_EMPTY,
                 VPCI_BAR_IO,
@@ -154,9 +155,9 @@ struct vpci {
 
 struct vpci_vcpu {
     /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
-    struct rangeset *mem;
     struct pci_dev *pdev;
     uint16_t cmd;
+    uint8_t num_mem_ranges;
     bool rom_only : 1;
 };
 
-- 
2.25.1


Re: [PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR
Posted by Oleksandr Andrushchenko 3 years, 1 month ago
Hi, Roger!
Could you please take a look at the below?
Jan was questioning the per BAR range set approach, so it
is crucial for the maintainer (you) to answer here.

Thank you in advance,
Oleksandr

On 30.09.21 10:52, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>
> Instead of handling a single range set, that contains all the memory
> regions of all the BARs and ROM, have them per BAR.
>
> This is in preparation of making non-identity mappings in p2m for the
> MMIOs/ROM.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> ---
>   xen/drivers/vpci/header.c | 172 ++++++++++++++++++++++++++------------
>   xen/include/xen/vpci.h    |   3 +-
>   2 files changed, 122 insertions(+), 53 deletions(-)
>
> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
> index ec4d215f36ff..9c603d26d302 100644
> --- a/xen/drivers/vpci/header.c
> +++ b/xen/drivers/vpci/header.c
> @@ -131,49 +131,75 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd,
>   
>   bool vpci_process_pending(struct vcpu *v)
>   {
> -    if ( v->vpci.mem )
> +    if ( v->vpci.num_mem_ranges )
>       {
>           struct map_data data = {
>               .d = v->domain,
>               .map = v->vpci.cmd & PCI_COMMAND_MEMORY,
>           };
> -        int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data);
> +        struct pci_dev *pdev = v->vpci.pdev;
> +        struct vpci_header *header = &pdev->vpci->header;
> +        unsigned int i;
>   
> -        if ( rc == -ERESTART )
> -            return true;
> +        for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +        {
> +            struct vpci_bar *bar = &header->bars[i];
> +            int rc;
>   
> -        spin_lock(&v->vpci.pdev->vpci->lock);
> -        /* Disable memory decoding unconditionally on failure. */
> -        modify_decoding(v->vpci.pdev,
> -                        rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
> -                        !rc && v->vpci.rom_only);
> -        spin_unlock(&v->vpci.pdev->vpci->lock);
> +            if ( !bar->mem )
> +                continue;
>   
> -        rangeset_destroy(v->vpci.mem);
> -        v->vpci.mem = NULL;
> -        if ( rc )
> -            /*
> -             * FIXME: in case of failure remove the device from the domain.
> -             * Note that there might still be leftover mappings. While this is
> -             * safe for Dom0, for DomUs the domain will likely need to be
> -             * killed in order to avoid leaking stale p2m mappings on
> -             * failure.
> -             */
> -            vpci_remove_device(v->vpci.pdev);
> +            rc = rangeset_consume_ranges(bar->mem, map_range, &data);
> +
> +            if ( rc == -ERESTART )
> +                return true;
> +
> +            spin_lock(&pdev->vpci->lock);
> +            /* Disable memory decoding unconditionally on failure. */
> +            modify_decoding(pdev,
> +                            rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
> +                            !rc && v->vpci.rom_only);
> +            spin_unlock(&pdev->vpci->lock);
> +
> +            rangeset_destroy(bar->mem);
> +            bar->mem = NULL;
> +            v->vpci.num_mem_ranges--;
> +            if ( rc )
> +                /*
> +                 * FIXME: in case of failure remove the device from the domain.
> +                 * Note that there might still be leftover mappings. While this is
> +                 * safe for Dom0, for DomUs the domain will likely need to be
> +                 * killed in order to avoid leaking stale p2m mappings on
> +                 * failure.
> +                 */
> +                vpci_remove_device(pdev);
> +        }
>       }
>   
>       return false;
>   }
>   
>   static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
> -                            struct rangeset *mem, uint16_t cmd)
> +                            uint16_t cmd)
>   {
>       struct map_data data = { .d = d, .map = true };
> -    int rc;
> +    struct vpci_header *header = &pdev->vpci->header;
> +    int rc = 0;
> +    unsigned int i;
> +
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
>   
> -    while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART )
> -        process_pending_softirqs();
> -    rangeset_destroy(mem);
> +        if ( !bar->mem )
> +            continue;
> +
> +        while ( (rc = rangeset_consume_ranges(bar->mem, map_range,
> +                                              &data)) == -ERESTART )
> +            process_pending_softirqs();
> +        rangeset_destroy(bar->mem);
> +        bar->mem = NULL;
> +    }
>       if ( !rc )
>           modify_decoding(pdev, cmd, false);
>   
> @@ -181,7 +207,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
>   }
>   
>   static void defer_map(struct domain *d, struct pci_dev *pdev,
> -                      struct rangeset *mem, uint16_t cmd, bool rom_only)
> +                      uint16_t cmd, bool rom_only, uint8_t num_mem_ranges)
>   {
>       struct vcpu *curr = current;
>   
> @@ -192,9 +218,9 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
>        * started for the same device if the domain is not well-behaved.
>        */
>       curr->vpci.pdev = pdev;
> -    curr->vpci.mem = mem;
>       curr->vpci.cmd = cmd;
>       curr->vpci.rom_only = rom_only;
> +    curr->vpci.num_mem_ranges = num_mem_ranges;
>       /*
>        * Raise a scheduler softirq in order to prevent the guest from resuming
>        * execution with pending mapping operations, to trigger the invocation
> @@ -206,42 +232,47 @@ static void defer_map(struct domain *d, struct pci_dev *pdev,
>   static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
>   {
>       struct vpci_header *header = &pdev->vpci->header;
> -    struct rangeset *mem = rangeset_new(NULL, NULL, 0);
>       struct pci_dev *tmp, *dev = NULL;
>       const struct vpci_msix *msix = pdev->vpci->msix;
> -    unsigned int i;
> +    unsigned int i, j;
>       int rc;
> -
> -    if ( !mem )
> -        return -ENOMEM;
> +    uint8_t num_mem_ranges;
>   
>       /*
> -     * Create a rangeset that represents the current device BARs memory region
> +     * Create a rangeset per BAR that represents the current device memory region
>        * and compare it against all the currently active BAR memory regions. If
>        * an overlap is found, subtract it from the region to be mapped/unmapped.
>        *
> -     * First fill the rangeset with all the BARs of this device or with the ROM
> +     * First fill the rangesets with all the BARs of this device or with the ROM
>        * BAR only, depending on whether the guest is toggling the memory decode
>        * bit of the command register, or the enable bit of the ROM BAR register.
>        */
>       for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
>       {
> -        const struct vpci_bar *bar = &header->bars[i];
> +        struct vpci_bar *bar = &header->bars[i];
>           unsigned long start = PFN_DOWN(bar->addr);
>           unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
>   
> +        bar->mem = NULL;
> +
>           if ( !MAPPABLE_BAR(bar) ||
>                (rom_only ? bar->type != VPCI_BAR_ROM
>                          : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) )
>               continue;
>   
> -        rc = rangeset_add_range(mem, start, end);
> +        bar->mem = rangeset_new(NULL, NULL, 0);
> +        if ( !bar->mem )
> +        {
> +            rc = -ENOMEM;
> +            goto fail;
> +        }
> +
> +        rc = rangeset_add_range(bar->mem, start, end);
>           if ( rc )
>           {
>               printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n",
>                      start, end, rc);
> -            rangeset_destroy(mem);
> -            return rc;
> +            goto fail;
>           }
>       }
>   
> @@ -252,14 +283,21 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
>           unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) +
>                                        vmsix_table_size(pdev->vpci, i) - 1);
>   
> -        rc = rangeset_remove_range(mem, start, end);
> -        if ( rc )
> +        for ( j = 0; j < ARRAY_SIZE(header->bars); j++ )
>           {
> -            printk(XENLOG_G_WARNING
> -                   "Failed to remove MSIX table [%lx, %lx]: %d\n",
> -                   start, end, rc);
> -            rangeset_destroy(mem);
> -            return rc;
> +            const struct vpci_bar *bar = &header->bars[j];
> +
> +            if ( !bar->mem )
> +                continue;
> +
> +            rc = rangeset_remove_range(bar->mem, start, end);
> +            if ( rc )
> +            {
> +                printk(XENLOG_G_WARNING
> +                       "Failed to remove MSIX table [%lx, %lx]: %d\n",
> +                       start, end, rc);
> +                goto fail;
> +            }
>           }
>       }
>   
> @@ -291,7 +329,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
>               unsigned long start = PFN_DOWN(bar->addr);
>               unsigned long end = PFN_DOWN(bar->addr + bar->size - 1);
>   
> -            if ( !bar->enabled || !rangeset_overlaps_range(mem, start, end) ||
> +            if ( !bar->enabled ||
> +                 !rangeset_overlaps_range(bar->mem, start, end) ||
>                    /*
>                     * If only the ROM enable bit is toggled check against other
>                     * BARs in the same device for overlaps, but not against the
> @@ -300,13 +339,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
>                    (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) )
>                   continue;
>   
> -            rc = rangeset_remove_range(mem, start, end);
> +            rc = rangeset_remove_range(bar->mem, start, end);
>               if ( rc )
>               {
>                   printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n",
>                          start, end, rc);
> -                rangeset_destroy(mem);
> -                return rc;
> +                goto fail;
>               }
>           }
>       }
> @@ -324,12 +362,42 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
>            * will always be to establish mappings and process all the BARs.
>            */
>           ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only);
> -        return apply_map(pdev->domain, pdev, mem, cmd);
> +        return apply_map(pdev->domain, pdev, cmd);
>       }
>   
> -    defer_map(dev->domain, dev, mem, cmd, rom_only);
> +    /* Find out how many memory ranges has left after MSI and overlaps. */
> +    num_mem_ranges = 0;
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
> +
> +        if ( !rangeset_is_empty(bar->mem) )
> +            num_mem_ranges++;
> +    }
> +
> +    /*
> +     * There are cases when PCI device, root port for example, has neither
> +     * memory space nor IO. In this case PCI command register write is
> +     * missed resulting in the underlying PCI device not functional, so:
> +     *   - if there are no regions write the command register now
> +     *   - if there are regions then defer work and write later on
> +     */
> +    if ( !num_mem_ranges )
> +        pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
> +    else
> +        defer_map(dev->domain, dev, cmd, rom_only, num_mem_ranges);
>   
>       return 0;
> +
> +fail:
> +    for ( i = 0; i < ARRAY_SIZE(header->bars); i++ )
> +    {
> +        struct vpci_bar *bar = &header->bars[i];
> +
> +        rangeset_destroy(bar->mem);
> +        bar->mem = NULL;
> +    }
> +    return rc;
>   }
>   
>   static void cmd_write(const struct pci_dev *pdev, unsigned int reg,
> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
> index a0320b22cb36..352e02d0106d 100644
> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -80,6 +80,7 @@ struct vpci {
>               /* Guest view of the BAR. */
>               uint64_t guest_addr;
>               uint64_t size;
> +            struct rangeset *mem;
>               enum {
>                   VPCI_BAR_EMPTY,
>                   VPCI_BAR_IO,
> @@ -154,9 +155,9 @@ struct vpci {
>   
>   struct vpci_vcpu {
>       /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
> -    struct rangeset *mem;
>       struct pci_dev *pdev;
>       uint16_t cmd;
> +    uint8_t num_mem_ranges;
>       bool rom_only : 1;
>   };
>   
Re: [PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR
Posted by Roger Pau Monné 3 years, 1 month ago
On Mon, Oct 25, 2021 at 11:51:57AM +0000, Oleksandr Andrushchenko wrote:
> Hi, Roger!
> Could you please take a look at the below?
> Jan was questioning the per BAR range set approach, so it
> is crucial for the maintainer (you) to answer here.

I'm open to suggestions to using something different than a rangeset
per BAR, but lacking any concrete proposal I think using rangesets is
fine.

One possible way might be to extend rangesets so that private data
could be stored for each rangeset range, but that would then make
merging operations impossible, likewise splitting ranges would be
troublesome.

We could then store the physical BAR address in that private data and
use the rangeset addresses as guest physical address space. It's
unclear however that this approach would be any better than just using
a rangeset per BAR.

Thanks, Roger.

Re: [PATCH v3 06/11] vpci/header: Handle p2m range sets per BAR
Posted by Jan Beulich 3 years ago
On 26.10.2021 11:40, Roger Pau Monné wrote:
> On Mon, Oct 25, 2021 at 11:51:57AM +0000, Oleksandr Andrushchenko wrote:
>> Hi, Roger!
>> Could you please take a look at the below?
>> Jan was questioning the per BAR range set approach, so it
>> is crucial for the maintainer (you) to answer here.
> 
> I'm open to suggestions to using something different than a rangeset
> per BAR, but lacking any concrete proposal I think using rangesets is
> fine.

The main reason for my objection is that for the average BAR the
rangeset will hold exactly one range. That's not an efficient way
to express a single range.

> One possible way might be to extend rangesets so that private data
> could be stored for each rangeset range, but that would then make
> merging operations impossible, likewise splitting ranges would be
> troublesome.

Indeed, so I don't view this as an option.

Jan