Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If
firmware initializes a 32-bit BAR to a bad address, Linux may try to
write a new address to the BAR without disabling memory decoding. Since
Xen refuses such writes, the BAR (and thus PCI device) will be
non-functional.
Currently the deferred mapping machinery supports only map or unmap
operations. Rework the deferred mapping machinery to support
unmap-then-map (VPCI_MOVE) operations.
Allow the hardware domain to issue 32-bit BAR writes with memory
decoding enabled, using the VPCI_MOVE operation to remap the BAR in p2m.
Take the opportunity to remove a stray newline in bar_write().
Resolves: https://gitlab.com/xen-project/xen/-/issues/197
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
---
RFC->v1:
* keep memory decoding enabled in hardware
* allow write while memory decoding is enabled for 32-bit BARs only
* rework BAR mapping machinery to support unmap-then-map operation
---
 xen/drivers/vpci/header.c | 86 +++++++++++++++++++++++++++------------
 xen/include/xen/vpci.h    |  5 +++
 2 files changed, 66 insertions(+), 25 deletions(-)
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index c9519c804d97..f2ffad2ace32 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -214,7 +214,6 @@ bool vpci_process_pending(struct vcpu *v)
     const struct pci_dev *pdev = v->vpci.pdev;
     struct vpci_header *header = NULL;
     unsigned int i;
-    int rc;
 
     if ( !pdev )
         return false;
@@ -229,16 +228,34 @@ bool vpci_process_pending(struct vcpu *v)
     }
 
     header = &pdev->vpci->header;
-    rc = map_bars(header, v->domain, v->vpci.cmd & PCI_COMMAND_MEMORY);
 
-    if ( rc == -ERESTART )
+    if ( v->vpci.map_op == VPCI_UNMAP || v->vpci.map_op == VPCI_MOVE )
     {
-        read_unlock(&v->domain->pci_lock);
-        return true;
+        int rc = map_bars(header, v->domain, false);
+
+        if ( rc == -ERESTART )
+        {
+            read_unlock(&v->domain->pci_lock);
+            return true;
+        }
+
+        if ( rc )
+            goto fail;
     }
 
-    if ( rc )
-        goto fail;
+    if ( v->vpci.map_op == VPCI_MAP || v->vpci.map_op == VPCI_MOVE )
+    {
+        int rc = map_bars(header, v->domain, true);
+
+        if ( rc == -ERESTART )
+        {
+            read_unlock(&v->domain->pci_lock);
+            return true;
+        }
+
+        if ( rc )
+            goto fail;
+    }
 
     v->vpci.pdev = NULL;
 
@@ -312,7 +329,8 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
     return rc;
 }
 
-static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
+static void defer_map(const struct pci_dev *pdev, uint16_t cmd,
+                      enum vpci_map_op map_op, bool rom_only)
 {
     struct vcpu *curr = current;
 
@@ -324,6 +342,7 @@ static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
      */
     curr->vpci.pdev = pdev;
     curr->vpci.cmd = cmd;
+    curr->vpci.map_op = map_op;
     curr->vpci.rom_only = rom_only;
     /*
      * Raise a scheduler softirq in order to prevent the guest from resuming
@@ -333,7 +352,8 @@ static void defer_map(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
     raise_softirq(SCHEDULE_SOFTIRQ);
 }
 
-static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
+static int modify_bars(const struct pci_dev *pdev, uint16_t cmd,
+                       enum vpci_map_op map_op, bool rom_only)
 {
     struct vpci_header *header = &pdev->vpci->header;
     struct pci_dev *tmp;
@@ -344,9 +364,9 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
 
     ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
 
-    if ( !(cmd & PCI_COMMAND_MEMORY) )
+    if ( map_op == VPCI_UNMAP )
     {
-        defer_map(pdev, cmd, rom_only);
+        defer_map(pdev, cmd, map_op, rom_only);
 
         return 0;
     }
@@ -378,7 +398,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
              (rom_only ? bar->type != VPCI_BAR_ROM
                        : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) ||
              /* Skip BARs already in the requested state. */
-             bar->enabled == !!(cmd & PCI_COMMAND_MEMORY) )
+             (bar->enabled == !!(cmd & PCI_COMMAND_MEMORY) &&
+              map_op != VPCI_MOVE) )
             continue;
 
         if ( !pci_check_bar(pdev, _mfn(start), _mfn(end)) )
@@ -551,7 +572,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only)
         return apply_map(pdev->domain, pdev, cmd);
     }
 
-    defer_map(pdev, cmd, rom_only);
+    defer_map(pdev, cmd, map_op, rom_only);
 
     return 0;
 }
@@ -584,7 +605,8 @@ static void cf_check cmd_write(
          * memory decoding bit has not been changed, so leave everything as-is,
          * hoping the guest will realize and try again.
          */
-        modify_bars(pdev, cmd, false);
+        modify_bars(pdev, cmd, cmd & PCI_COMMAND_MEMORY ? VPCI_MAP : VPCI_UNMAP,
+                    false);
     else
         pci_conf_write16(pdev->sbdf, reg, cmd);
 }
@@ -615,20 +637,27 @@ static void cf_check bar_write(
         val &= PCI_BASE_ADDRESS_MEM_MASK;
 
     /*
-     * Xen only cares whether the BAR is mapped into the p2m, so allow BAR
-     * writes as long as the BAR is not mapped into the p2m.
+     * Allow 64-bit BAR writes only when the BAR is not mapped in p2m. Always
+     * allow 32-bit BAR writes, but skip unnecessary p2m operations when mapped.
      */
     if ( bar->enabled )
     {
-        /* If the value written is the current one avoid printing a warning. */
-        if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
-            gprintk(XENLOG_WARNING,
-                    "%pp: ignored BAR %zu write while mapped\n",
-                    &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
-        return;
+        if ( bar->type == VPCI_BAR_MEM32 )
+        {
+            if ( val == bar->addr )
+                return;
+        }
+        else
+        {
+            /* If the value written is the same avoid printing a warning. */
+            if ( val != (uint32_t)(bar->addr >> (hi ? 32 : 0)) )
+                gprintk(XENLOG_WARNING,
+                        "%pp: ignored BAR %zu write while mapped\n",
+                        &pdev->sbdf, bar - pdev->vpci->header.bars + hi);
+            return;
+        }
     }
 
-
     /*
      * Update the cached address, so that when memory decoding is enabled
      * Xen can map the BAR into the guest p2m.
@@ -647,6 +676,10 @@ static void cf_check bar_write(
     }
 
     pci_conf_write32(pdev->sbdf, reg, val);
+
+    if ( bar->enabled )
+        modify_bars(pdev, pci_conf_read16(pdev->sbdf, PCI_COMMAND), VPCI_MOVE,
+                    false);
 }
 
 static void cf_check guest_mem_bar_write(const struct pci_dev *pdev,
@@ -752,7 +785,8 @@ static void cf_check rom_write(
      * Pass PCI_COMMAND_MEMORY or 0 to signal a map/unmap request, note that
      * this fabricated command is never going to be written to the register.
      */
-    else if ( modify_bars(pdev, new_enabled ? PCI_COMMAND_MEMORY : 0, true) )
+    else if ( modify_bars(pdev, new_enabled ? PCI_COMMAND_MEMORY : 0,
+                          new_enabled ? VPCI_MAP : VPCI_UNMAP, true) )
         /*
          * No memory has been added or removed from the p2m (because the actual
          * p2m changes are deferred in defer_map) and the ROM enable bit has
@@ -1054,7 +1088,9 @@ static int cf_check init_header(struct pci_dev *pdev)
             goto fail;
     }
 
-    return (cmd & PCI_COMMAND_MEMORY) ? modify_bars(pdev, cmd, false) : 0;
+    return (cmd & PCI_COMMAND_MEMORY)
+           ? modify_bars(pdev, cmd, VPCI_MAP, false)
+           : 0;
 
  fail:
     pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index e74359848440..2ddfb147e7b7 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -197,6 +197,11 @@ struct vpci_vcpu {
     /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */
     const struct pci_dev *pdev;
     uint16_t cmd;
+    enum vpci_map_op {
+        VPCI_MAP,
+        VPCI_UNMAP,
+        VPCI_MOVE,
+    } map_op;
     bool rom_only : 1;
 };
 
-- 
2.49.0On 31.05.2025 14:54, Stewart Hildebrand wrote: > Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If > firmware initializes a 32-bit BAR to a bad address, Linux may try to > write a new address to the BAR without disabling memory decoding. Since > Xen refuses such writes, the BAR (and thus PCI device) will be > non-functional. Doing this for 32-bit BARs only, with not even an outline what to do about the same issue with 64-bit ones, seems like it won't buy us very much. Jan
On Thu, Jun 05, 2025 at 12:41:06PM +0200, Jan Beulich wrote: > On 31.05.2025 14:54, Stewart Hildebrand wrote: > > Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If > > firmware initializes a 32-bit BAR to a bad address, Linux may try to > > write a new address to the BAR without disabling memory decoding. Since > > Xen refuses such writes, the BAR (and thus PCI device) will be > > non-functional. > > Doing this for 32-bit BARs only, with not even an outline what to do about > the same issue with 64-bit ones, seems like it won't buy us very much. IIRC Linux will disable decoding in the common case when updating the position of a 64bit BAR. However it won't disable decoding for 32bit BARs. I think that's why Stewart cares more about the 32bit case than the 64bit one. Regards, Roger.
On 6/5/25 06:41, Jan Beulich wrote: > On 31.05.2025 14:54, Stewart Hildebrand wrote: >> Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If >> firmware initializes a 32-bit BAR to a bad address, Linux may try to >> write a new address to the BAR without disabling memory decoding. Since >> Xen refuses such writes, the BAR (and thus PCI device) will be >> non-functional. > > Doing this for 32-bit BARs only, with not even an outline what to do about > the same issue with 64-bit ones, seems like it won't buy us very much. It buys us quite a lot: it means the difference between booting vs. booting with degraded functionality or not booting at all with PVH dom0 on some platforms with certain PCI devices plugged in. The plan for 64-bit BARs for now is to continue to refuse the write(s) when the 64-bit BAR is mapped to avoid mapping half-updated BARs in p2m. I'll add something to this effect to the commit message. Also see https://gitlab.com/xen-project/xen/-/issues/197
On 11.06.2025 22:22, Stewart Hildebrand wrote: > On 6/5/25 06:41, Jan Beulich wrote: >> On 31.05.2025 14:54, Stewart Hildebrand wrote: >>> Currently, Xen vPCI refuses BAR writes if the BAR is mapped in p2m. If >>> firmware initializes a 32-bit BAR to a bad address, Linux may try to >>> write a new address to the BAR without disabling memory decoding. Since >>> Xen refuses such writes, the BAR (and thus PCI device) will be >>> non-functional. >> >> Doing this for 32-bit BARs only, with not even an outline what to do about >> the same issue with 64-bit ones, seems like it won't buy us very much. > > It buys us quite a lot: it means the difference between booting vs. > booting with degraded functionality or not booting at all with PVH dom0 > on some platforms with certain PCI devices plugged in. > > The plan for 64-bit BARs for now is to continue to refuse the write(s) > when the 64-bit BAR is mapped to avoid mapping half-updated BARs in p2m. > > I'll add something to this effect to the commit message. Yes please, in particular ... > Also see https://gitlab.com/xen-project/xen/-/issues/197 ... to make clear that Linux indeed aims at disabling memory decode when fiddling with 64-bit BARs. That reduces the set of remaining cases quite a bit. Jan
© 2016 - 2025 Red Hat, Inc.