[PATCH v2] xen/pci: detect when BARs are not suitably positioned

Roger Pau Monne posted 1 patch 2 years, 3 months ago
Test gitlab-ci passed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20220127082218.57902-1-roger.pau@citrix.com
There is a newer version of this series
xen/arch/arm/mm.c             | 11 ++++++
xen/arch/x86/mm.c             | 17 +++++++++
xen/drivers/passthrough/pci.c | 69 ++++++++++++++++++++++++++++++++++-
xen/include/xen/mm.h          |  2 +
xen/include/xen/pci_regs.h    |  2 +
xen/include/xen/vpci.h        |  2 -
6 files changed, 100 insertions(+), 3 deletions(-)
[PATCH v2] xen/pci: detect when BARs are not suitably positioned
Posted by Roger Pau Monne 2 years, 3 months ago
One of the boxes where I was attempting to boot Xen in PVH dom0 mode
has quirky firmware, as it will handover with a PCI device with memory
decoding enabled and a BAR of size 4K at address 0. Such BAR overlaps
with a RAM range on the e820.

This interacts badly with the dom0 PVH build, as BARs will be setup on
the p2m before RAM, so if there's a BAR positioned over a RAM region
it will trigger a domain crash when the dom0 builder attempts to
populate that region with a regular RAM page.

It's in general a very bad idea to have a BAR overlapping with a RAM
region, so add some sanity checks for devices that are added with
memory decoding enabled in order to assure that BARs are not placed on
top of memory regions defined in the memory map. If overlaps are
detected just disable the memory decoding bit for the device and
expect the hardware domain to properly position the BAR.

Note apply_quirks must be called before check_pdev so that ignore_bars
is set when calling the later. PCI_HEADER_{NORMAL,BRIDGE}_NR_BARS
needs to be moved into pci_regs.h so it's defined even in the absence
of vPCI.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v1:
 - Add comment regarding pci_size_mem_bar failure.
 - Make e820entry const.
 - Move is_iomem_range after is_iomem_page.
 - Reword error message.
 - Make is_iomem_range paddr_t
 - Expand commit message.
 - Move PCI_HEADER_{NORMAL,BRIDGE}_NR_BARS.
 - Only attempt to read ROM BAR if rom_pos != 0.
---
 xen/arch/arm/mm.c             | 11 ++++++
 xen/arch/x86/mm.c             | 17 +++++++++
 xen/drivers/passthrough/pci.c | 69 ++++++++++++++++++++++++++++++++++-
 xen/include/xen/mm.h          |  2 +
 xen/include/xen/pci_regs.h    |  2 +
 xen/include/xen/vpci.h        |  2 -
 6 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index eea926d823..4626e9eb8b 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -1625,6 +1625,17 @@ bool is_iomem_page(mfn_t mfn)
     return !mfn_valid(mfn);
 }
 
+bool is_iomem_range(paddr_t start, uint64_t size)
+{
+    unsigned int i;
+
+    for ( i = 0; i < PFN_UP(size); i++ )
+        if ( !is_iomem_page(_mfn(PFN_DOWN(start) + i)) )
+            return false;
+
+    return true;
+}
+
 void clear_and_clean_page(struct page_info *page)
 {
     void *p = __map_domain_page(page);
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 1397f83e41..3f451134c6 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -783,6 +783,23 @@ bool is_iomem_page(mfn_t mfn)
     return (page_get_owner(page) == dom_io);
 }
 
+bool is_iomem_range(paddr_t start, uint64_t size)
+{
+    unsigned int i;
+
+    for ( i = 0; i < e820.nr_map; i++ )
+    {
+        const struct e820entry *entry = &e820.map[i];
+
+        /* Do not allow overlaps with any memory range. */
+        if ( start < entry->addr + entry->size &&
+             entry->addr < start + size )
+            return false;
+    }
+
+    return true;
+}
+
 static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr)
 {
     int err = 0;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 0d8ab2e716..07753109e0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -233,6 +233,7 @@ static void check_pdev(const struct pci_dev *pdev)
      PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
      PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
     u16 val;
+    unsigned int nbars = 0, rom_pos = 0, i;
 
     if ( command_mask )
     {
@@ -251,6 +252,8 @@ static void check_pdev(const struct pci_dev *pdev)
     switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
     {
     case PCI_HEADER_TYPE_BRIDGE:
+        nbars = PCI_HEADER_BRIDGE_NR_BARS;
+        rom_pos = PCI_ROM_ADDRESS1;
         if ( !bridge_ctl_mask )
             break;
         val = pci_conf_read16(pdev->sbdf, PCI_BRIDGE_CONTROL);
@@ -267,11 +270,75 @@ static void check_pdev(const struct pci_dev *pdev)
         }
         break;
 
+    case PCI_HEADER_TYPE_NORMAL:
+        nbars = PCI_HEADER_NORMAL_NR_BARS;
+        rom_pos = PCI_ROM_ADDRESS;
+        break;
+
     case PCI_HEADER_TYPE_CARDBUS:
         /* TODO */
         break;
     }
 #undef PCI_STATUS_CHECK
+
+    /* Check if BARs overlap with other memory regions. */
+    val = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
+    if ( !(val & PCI_COMMAND_MEMORY) || pdev->ignore_bars )
+        return;
+
+    pci_conf_write16(pdev->sbdf, PCI_COMMAND, val & ~PCI_COMMAND_MEMORY);
+    for ( i = 0; i < nbars; )
+    {
+        uint64_t addr, size;
+        unsigned int reg = PCI_BASE_ADDRESS_0 + i * 4;
+        int rc = 1;
+
+        if ( (pci_conf_read32(pdev->sbdf, reg) & PCI_BASE_ADDRESS_SPACE) !=
+             PCI_BASE_ADDRESS_SPACE_MEMORY )
+            goto next;
+
+        rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
+                              (i == nbars - 1) ? PCI_BAR_LAST : 0);
+        if ( rc < 0 )
+            /* Unable to size, better leave memory decoding disabled. */
+            return;
+        if ( size && !is_iomem_range(addr, size) )
+        {
+            /*
+             * Return without enabling memory decoding if BAR position is not
+             * in IO suitable memory. Let the hardware domain re-position the
+             * BAR.
+             */
+            printk(XENLOG_WARNING
+"%pp disabled: BAR%u [%" PRIx64 ", %" PRIx64 ") overlaps with memory map\n",
+                   &pdev->sbdf, i, addr, addr + size);
+            return;
+        }
+
+ next:
+        ASSERT(rc > 0);
+        i += rc;
+    }
+
+    if ( rom_pos &&
+         (pci_conf_read32(pdev->sbdf, rom_pos) & PCI_ROM_ADDRESS_ENABLE) )
+    {
+        uint64_t addr, size;
+        int rc = pci_size_mem_bar(pdev->sbdf, rom_pos, &addr, &size,
+                                  PCI_BAR_ROM);
+
+        if ( rc < 0 )
+            return;
+        if ( size && !is_iomem_range(addr, size) )
+        {
+            printk(XENLOG_WARNING
+"%pp disabled: ROM BAR [%" PRIx64 ", %" PRIx64 ") overlaps with memory map\n",
+                   &pdev->sbdf, addr, addr + size);
+            return;
+        }
+    }
+
+    pci_conf_write16(pdev->sbdf, PCI_COMMAND, val);
 }
 
 static void apply_quirks(struct pci_dev *pdev)
@@ -399,8 +466,8 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
             break;
     }
 
-    check_pdev(pdev);
     apply_quirks(pdev);
+    check_pdev(pdev);
 
     return pdev;
 }
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 5db26ed477..4801811bb5 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -554,6 +554,8 @@ int __must_check steal_page(struct domain *d, struct page_info *page,
 int page_is_ram_type(unsigned long mfn, unsigned long mem_type);
 /* Returns the page type(s). */
 unsigned int page_get_ram_type(mfn_t mfn);
+/* Check if a range is in IO suitable memory. */
+bool is_iomem_range(paddr_t start, uint64_t size);
 
 /* Prepare/destroy a ring for a dom0 helper. Helper with talk
  * with Xen on behalf of this domain. */
diff --git a/xen/include/xen/pci_regs.h b/xen/include/xen/pci_regs.h
index cc4ee3b83e..ee8e82be36 100644
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -88,6 +88,8 @@
  * 0xffffffff to the register, and reading it back.  Only
  * 1 bits are decoded.
  */
+#define PCI_HEADER_NORMAL_NR_BARS	6
+#define PCI_HEADER_BRIDGE_NR_BARS	2
 #define PCI_BASE_ADDRESS_0	0x10	/* 32 bits */
 #define PCI_BASE_ADDRESS_1	0x14	/* 32 bits [htype 0,1 only] */
 #define PCI_BASE_ADDRESS_2	0x18	/* 32 bits [htype 0 only] */
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 3f32de9d7e..e8ac1eb395 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -80,8 +80,6 @@ struct vpci {
             bool prefetchable : 1;
             /* Store whether the BAR is mapped into guest p2m. */
             bool enabled      : 1;
-#define PCI_HEADER_NORMAL_NR_BARS        6
-#define PCI_HEADER_BRIDGE_NR_BARS        2
         } bars[PCI_HEADER_NORMAL_NR_BARS + 1];
         /* At most 6 BARS + 1 expansion ROM BAR. */
 
-- 
2.34.1


Re: [PATCH v2] xen/pci: detect when BARs are not suitably positioned
Posted by Jan Beulich 2 years, 3 months ago
On 27.01.2022 09:22, Roger Pau Monne wrote:
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -1625,6 +1625,17 @@ bool is_iomem_page(mfn_t mfn)
>      return !mfn_valid(mfn);
>  }
>  
> +bool is_iomem_range(paddr_t start, uint64_t size)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < PFN_UP(size); i++ )
> +        if ( !is_iomem_page(_mfn(PFN_DOWN(start) + i)) )
> +            return false;
> +
> +    return true;
> +}

I'm afraid you can't very well call this non-RFC as long as this doesn't
scale. Or if you're building on this still being dead code on Arm, then
some kind of "fixme" annotation would be needed here (or the function be
omitted altogether, for Arm folks to sort out before they actually start
selecting the HAS_PCI Kconfig setting).

> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -783,6 +783,23 @@ bool is_iomem_page(mfn_t mfn)
>      return (page_get_owner(page) == dom_io);
>  }
>  
> +bool is_iomem_range(paddr_t start, uint64_t size)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < e820.nr_map; i++ )
> +    {
> +        const struct e820entry *entry = &e820.map[i];
> +
> +        /* Do not allow overlaps with any memory range. */
> +        if ( start < entry->addr + entry->size &&
> +             entry->addr < start + size )
> +            return false;
> +    }
> +
> +    return true;
> +}

I should have realized (and pointed out) earlier that with the type
check dropped the function name no longer represents what the
function does. It now really is unsuitable for about anything other
that the checking of BARs.

> @@ -267,11 +270,75 @@ static void check_pdev(const struct pci_dev *pdev)
>          }
>          break;
>  
> +    case PCI_HEADER_TYPE_NORMAL:
> +        nbars = PCI_HEADER_NORMAL_NR_BARS;
> +        rom_pos = PCI_ROM_ADDRESS;
> +        break;
> +
>      case PCI_HEADER_TYPE_CARDBUS:
>          /* TODO */
>          break;
>      }
>  #undef PCI_STATUS_CHECK
> +
> +    /* Check if BARs overlap with other memory regions. */
> +    val = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
> +    if ( !(val & PCI_COMMAND_MEMORY) || pdev->ignore_bars )
> +        return;
> +
> +    pci_conf_write16(pdev->sbdf, PCI_COMMAND, val & ~PCI_COMMAND_MEMORY);
> +    for ( i = 0; i < nbars; )
> +    {
> +        uint64_t addr, size;
> +        unsigned int reg = PCI_BASE_ADDRESS_0 + i * 4;
> +        int rc = 1;
> +
> +        if ( (pci_conf_read32(pdev->sbdf, reg) & PCI_BASE_ADDRESS_SPACE) !=
> +             PCI_BASE_ADDRESS_SPACE_MEMORY )
> +            goto next;
> +
> +        rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
> +                              (i == nbars - 1) ? PCI_BAR_LAST : 0);
> +        if ( rc < 0 )
> +            /* Unable to size, better leave memory decoding disabled. */
> +            return;
> +        if ( size && !is_iomem_range(addr, size) )
> +        {
> +            /*
> +             * Return without enabling memory decoding if BAR position is not
> +             * in IO suitable memory. Let the hardware domain re-position the
> +             * BAR.
> +             */
> +            printk(XENLOG_WARNING
> +"%pp disabled: BAR%u [%" PRIx64 ", %" PRIx64 ") overlaps with memory map\n",

I guess despite its length this still wants indenting properly. Maybe
you could fold this and ...

> +                   &pdev->sbdf, i, addr, addr + size);
> +            return;
> +        }
> +
> + next:
> +        ASSERT(rc > 0);
> +        i += rc;
> +    }
> +
> +    if ( rom_pos &&
> +         (pci_conf_read32(pdev->sbdf, rom_pos) & PCI_ROM_ADDRESS_ENABLE) )
> +    {
> +        uint64_t addr, size;
> +        int rc = pci_size_mem_bar(pdev->sbdf, rom_pos, &addr, &size,
> +                                  PCI_BAR_ROM);
> +
> +        if ( rc < 0 )
> +            return;
> +        if ( size && !is_iomem_range(addr, size) )
> +        {
> +            printk(XENLOG_WARNING
> +"%pp disabled: ROM BAR [%" PRIx64 ", %" PRIx64 ") overlaps with memory map\n",

.. this into

    static const char warn[] =
        "%pp disabled: %sBAR [%" PRIx64 ", %" PRIx64 ") overlaps with memory map\n";

to limit indentation and redundancy at the same time? Could then also
be re-used later for the SR-IOV BAR check.

Jan