[PATCH] hw/pci-bridge: Fix release ordering by embedding PCIBridgeWindows within PCIBridge

Jonathan Cameron via posted 1 patch 1 year ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230420145937.17152-1-Jonathan.Cameron@huawei.com
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
There is a newer version of this series
hw/pci/pci_bridge.c         | 20 ++++++++------------
include/hw/pci/pci_bridge.h |  3 ++-
2 files changed, 10 insertions(+), 13 deletions(-)
[PATCH] hw/pci-bridge: Fix release ordering by embedding PCIBridgeWindows within PCIBridge
Posted by Jonathan Cameron via 1 year ago
The lifetime of the PCIBridgeWindows instance accessed via the windows pointer
in struct PCIBridge is managed separately from the PCIBridge itself.

Triggered by ./qemu-system-x86_64 -M x-remote -display none -monitor stdio
QEMU monitor: device_add cxl-downstream

In some error handling paths (such as the above due to attaching a cxl-downstream
port anything other than a cxl-upstream port) the g_free() of the PCIBridge
windows in pci_bridge_region_cleanup() is called before the final call of
flatview_uref() in address_space_set_flatview() ultimately from
drain_call_rcu()

At one stage this resulted in a crash, currently can still be observed using
valgrind which records a use after free.

When present, only one instance is allocated. pci_bridge_update_mappings()
can operate directly on an instance rather than creating a new one and
swapping it in.  Thus there appears to be no reason to not directly
couple the lifetimes of the two structures by embedding the PCIBridgeWindows
within the PCIBridge removing the need for the problematic separate free.

Patch is same as was posted deep in the discussion.
https://lore.kernel.org/qemu-devel/20230403171232.000020bb@huawei.com/

Posted as an RFC as only lightly tested and I'm not sure what the reasoning
behind the separation of lifetimes originally was. As such perhaps this is
not the best route to fixing the issue.

Reported-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 hw/pci/pci_bridge.c         | 20 ++++++++------------
 include/hw/pci/pci_bridge.h |  3 ++-
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index dd5af508f9..698fd01ae6 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -184,11 +184,11 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent,
     }
 }
 
-static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
+static void pci_bridge_region_init(PCIBridge *br)
 {
     PCIDevice *pd = PCI_DEVICE(br);
     PCIBus *parent = pci_get_bus(pd);
-    PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
+    PCIBridgeWindows *w = &br->windows;
     uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND);
 
     pci_bridge_init_alias(br, &w->alias_pref_mem,
@@ -211,8 +211,6 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
                           cmd & PCI_COMMAND_IO);
 
     pci_bridge_init_vga_aliases(br, parent, w->alias_vga);
-
-    return w;
 }
 
 static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
@@ -234,19 +232,17 @@ static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO]));
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI]));
     object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM]));
-    g_free(w);
 }
 
 void pci_bridge_update_mappings(PCIBridge *br)
 {
-    PCIBridgeWindows *w = br->windows;
-
+    PCIBridgeWindows *w = &br->windows;
     /* Make updates atomic to: handle the case of one VCPU updating the bridge
      * while another accesses an unaffected region. */
     memory_region_transaction_begin();
-    pci_bridge_region_del(br, br->windows);
+    pci_bridge_region_del(br, w);
     pci_bridge_region_cleanup(br, w);
-    br->windows = pci_bridge_region_init(br);
+    pci_bridge_region_init(br);
     memory_region_transaction_commit();
 }
 
@@ -385,7 +381,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io",
                        4 * GiB);
-    br->windows = pci_bridge_region_init(br);
+    pci_bridge_region_init(br);
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
 }
@@ -396,8 +392,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
     PCIBridge *s = PCI_BRIDGE(pci_dev);
     assert(QLIST_EMPTY(&s->sec_bus.child));
     QLIST_REMOVE(&s->sec_bus, sibling);
-    pci_bridge_region_del(s, s->windows);
-    pci_bridge_region_cleanup(s, s->windows);
+    pci_bridge_region_del(s, &s->windows);
+    pci_bridge_region_cleanup(s, &s->windows);
     /* object_unparent() is called automatically during device deletion */
 }
 
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index 01670e9e65..ac75ec0c1b 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -30,6 +30,7 @@
 #include "hw/pci/pci_bus.h"
 #include "hw/cxl/cxl.h"
 #include "qom/object.h"
+#include "qemu/rcu.h"
 
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
@@ -73,7 +74,7 @@ struct PCIBridge {
     MemoryRegion address_space_mem;
     MemoryRegion address_space_io;
 
-    PCIBridgeWindows *windows;
+    PCIBridgeWindows windows;
 
     pci_map_irq_fn map_irq;
     const char *bus_name;
-- 
2.37.2
Re: [RFC PATCH] hw/pci-bridge: Fix release ordering by embedding PCIBridgeWindows within PCIBridge
Posted by Philippe Mathieu-Daudé 1 year ago
Hi Jonathan,

On 20/4/23 16:59, Jonathan Cameron via wrote:
> The lifetime of the PCIBridgeWindows instance accessed via the windows pointer
> in struct PCIBridge is managed separately from the PCIBridge itself.
> 
> Triggered by ./qemu-system-x86_64 -M x-remote -display none -monitor stdio
> QEMU monitor: device_add cxl-downstream
> 
> In some error handling paths (such as the above due to attaching a cxl-downstream
> port anything other than a cxl-upstream port) the g_free() of the PCIBridge
> windows in pci_bridge_region_cleanup() is called before the final call of
> flatview_uref() in address_space_set_flatview() ultimately from
> drain_call_rcu()
> 
> At one stage this resulted in a crash, currently can still be observed using
> valgrind which records a use after free.
> 
> When present, only one instance is allocated. pci_bridge_update_mappings()
> can operate directly on an instance rather than creating a new one and
> swapping it in.  Thus there appears to be no reason to not directly
> couple the lifetimes of the two structures by embedding the PCIBridgeWindows
> within the PCIBridge removing the need for the problematic separate free.
> 
> Patch is same as was posted deep in the discussion.
> https://lore.kernel.org/qemu-devel/20230403171232.000020bb@huawei.com/
> 
> Posted as an RFC as only lightly tested and I'm not sure what the reasoning
> behind the separation of lifetimes originally was. As such perhaps this is
> not the best route to fixing the issue.
> 
> Reported-by: Thomas Huth <thuth@redhat.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>   hw/pci/pci_bridge.c         | 20 ++++++++------------
>   include/hw/pci/pci_bridge.h |  3 ++-
>   2 files changed, 10 insertions(+), 13 deletions(-)


> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> index 01670e9e65..ac75ec0c1b 100644
> --- a/include/hw/pci/pci_bridge.h
> +++ b/include/hw/pci/pci_bridge.h
> @@ -30,6 +30,7 @@
>   #include "hw/pci/pci_bus.h"
>   #include "hw/cxl/cxl.h"
>   #include "qom/object.h"
> +#include "qemu/rcu.h"

Where is this header is used?

>   typedef struct PCIBridgeWindows PCIBridgeWindows;
>   
> @@ -73,7 +74,7 @@ struct PCIBridge {
>       MemoryRegion address_space_mem;
>       MemoryRegion address_space_io;
>   
> -    PCIBridgeWindows *windows;
> +    PCIBridgeWindows windows;
>   
>       pci_map_irq_fn map_irq;
>       const char *bus_name;
Re: [RFC PATCH] hw/pci-bridge: Fix release ordering by embedding PCIBridgeWindows within PCIBridge
Posted by Jonathan Cameron via 1 year ago
On Fri, 21 Apr 2023 08:26:43 +0200
Philippe Mathieu-Daudé <philmd@linaro.org> wrote:

> Hi Jonathan,
> 
> On 20/4/23 16:59, Jonathan Cameron via wrote:
> > The lifetime of the PCIBridgeWindows instance accessed via the windows pointer
> > in struct PCIBridge is managed separately from the PCIBridge itself.
> > 
> > Triggered by ./qemu-system-x86_64 -M x-remote -display none -monitor stdio
> > QEMU monitor: device_add cxl-downstream
> > 
> > In some error handling paths (such as the above due to attaching a cxl-downstream
> > port anything other than a cxl-upstream port) the g_free() of the PCIBridge
> > windows in pci_bridge_region_cleanup() is called before the final call of
> > flatview_uref() in address_space_set_flatview() ultimately from
> > drain_call_rcu()
> > 
> > At one stage this resulted in a crash, currently can still be observed using
> > valgrind which records a use after free.
> > 
> > When present, only one instance is allocated. pci_bridge_update_mappings()
> > can operate directly on an instance rather than creating a new one and
> > swapping it in.  Thus there appears to be no reason to not directly
> > couple the lifetimes of the two structures by embedding the PCIBridgeWindows
> > within the PCIBridge removing the need for the problematic separate free.
> > 
> > Patch is same as was posted deep in the discussion.
> > https://lore.kernel.org/qemu-devel/20230403171232.000020bb@huawei.com/
> > 
> > Posted as an RFC as only lightly tested and I'm not sure what the reasoning
> > behind the separation of lifetimes originally was. As such perhaps this is
> > not the best route to fixing the issue.
> > 
> > Reported-by: Thomas Huth <thuth@redhat.com>
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > ---
> >   hw/pci/pci_bridge.c         | 20 ++++++++------------
> >   include/hw/pci/pci_bridge.h |  3 ++-
> >   2 files changed, 10 insertions(+), 13 deletions(-)  
> 
> 
> > diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> > index 01670e9e65..ac75ec0c1b 100644
> > --- a/include/hw/pci/pci_bridge.h
> > +++ b/include/hw/pci/pci_bridge.h
> > @@ -30,6 +30,7 @@
> >   #include "hw/pci/pci_bus.h"
> >   #include "hw/cxl/cxl.h"
> >   #include "qom/object.h"
> > +#include "qemu/rcu.h"  
> 
> Where is this header is used

Left over garbage from a previous attempt to fix.  Good spot.
I'll clean that out and resend shortly.

Thanks,

Jonathan
 
> 
> >   typedef struct PCIBridgeWindows PCIBridgeWindows;
> >   
> > @@ -73,7 +74,7 @@ struct PCIBridge {
> >       MemoryRegion address_space_mem;
> >       MemoryRegion address_space_io;
> >   
> > -    PCIBridgeWindows *windows;
> > +    PCIBridgeWindows windows;
> >   
> >       pci_map_irq_fn map_irq;
> >       const char *bus_name;  
> 
Re: [PATCH] hw/pci-bridge: Fix release ordering by embedding PCIBridgeWindows within PCIBridge
Posted by Jonathan Cameron via 1 year ago
This was intended to be [RFC] for reasons given below.
+ I failed to CC Thomas who reported the issue.  Not my finest hour.

On Thu, 20 Apr 2023 15:59:37 +0100
Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote:

> The lifetime of the PCIBridgeWindows instance accessed via the windows pointer
> in struct PCIBridge is managed separately from the PCIBridge itself.
> 
> Triggered by ./qemu-system-x86_64 -M x-remote -display none -monitor stdio
> QEMU monitor: device_add cxl-downstream
> 
> In some error handling paths (such as the above due to attaching a cxl-downstream
> port anything other than a cxl-upstream port) the g_free() of the PCIBridge
> windows in pci_bridge_region_cleanup() is called before the final call of
> flatview_uref() in address_space_set_flatview() ultimately from
> drain_call_rcu()
> 
> At one stage this resulted in a crash, currently can still be observed using
> valgrind which records a use after free.
> 
> When present, only one instance is allocated. pci_bridge_update_mappings()
> can operate directly on an instance rather than creating a new one and
> swapping it in.  Thus there appears to be no reason to not directly
> couple the lifetimes of the two structures by embedding the PCIBridgeWindows
> within the PCIBridge removing the need for the problematic separate free.
> 
> Patch is same as was posted deep in the discussion.
> https://lore.kernel.org/qemu-devel/20230403171232.000020bb@huawei.com/
> 
> Posted as an RFC as only lightly tested and I'm not sure what the reasoning
> behind the separation of lifetimes originally was. As such perhaps this is
> not the best route to fixing the issue.
> 
> Reported-by: Thomas Huth <thuth@redhat.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  hw/pci/pci_bridge.c         | 20 ++++++++------------
>  include/hw/pci/pci_bridge.h |  3 ++-
>  2 files changed, 10 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
> index dd5af508f9..698fd01ae6 100644
> --- a/hw/pci/pci_bridge.c
> +++ b/hw/pci/pci_bridge.c
> @@ -184,11 +184,11 @@ static void pci_bridge_init_vga_aliases(PCIBridge *br, PCIBus *parent,
>      }
>  }
>  
> -static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
> +static void pci_bridge_region_init(PCIBridge *br)
>  {
>      PCIDevice *pd = PCI_DEVICE(br);
>      PCIBus *parent = pci_get_bus(pd);
> -    PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
> +    PCIBridgeWindows *w = &br->windows;
>      uint16_t cmd = pci_get_word(pd->config + PCI_COMMAND);
>  
>      pci_bridge_init_alias(br, &w->alias_pref_mem,
> @@ -211,8 +211,6 @@ static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
>                            cmd & PCI_COMMAND_IO);
>  
>      pci_bridge_init_vga_aliases(br, parent, w->alias_vga);
> -
> -    return w;
>  }
>  
>  static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
> @@ -234,19 +232,17 @@ static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
>      object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_LO]));
>      object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_IO_HI]));
>      object_unparent(OBJECT(&w->alias_vga[QEMU_PCI_VGA_MEM]));
> -    g_free(w);
>  }
>  
>  void pci_bridge_update_mappings(PCIBridge *br)
>  {
> -    PCIBridgeWindows *w = br->windows;
> -
> +    PCIBridgeWindows *w = &br->windows;
>      /* Make updates atomic to: handle the case of one VCPU updating the bridge
>       * while another accesses an unaffected region. */
>      memory_region_transaction_begin();
> -    pci_bridge_region_del(br, br->windows);
> +    pci_bridge_region_del(br, w);
>      pci_bridge_region_cleanup(br, w);
> -    br->windows = pci_bridge_region_init(br);
> +    pci_bridge_region_init(br);
>      memory_region_transaction_commit();
>  }
>  
> @@ -385,7 +381,7 @@ void pci_bridge_initfn(PCIDevice *dev, const char *typename)
>      sec_bus->address_space_io = &br->address_space_io;
>      memory_region_init(&br->address_space_io, OBJECT(br), "pci_bridge_io",
>                         4 * GiB);
> -    br->windows = pci_bridge_region_init(br);
> +    pci_bridge_region_init(br);
>      QLIST_INIT(&sec_bus->child);
>      QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
>  }
> @@ -396,8 +392,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
>      PCIBridge *s = PCI_BRIDGE(pci_dev);
>      assert(QLIST_EMPTY(&s->sec_bus.child));
>      QLIST_REMOVE(&s->sec_bus, sibling);
> -    pci_bridge_region_del(s, s->windows);
> -    pci_bridge_region_cleanup(s, s->windows);
> +    pci_bridge_region_del(s, &s->windows);
> +    pci_bridge_region_cleanup(s, &s->windows);
>      /* object_unparent() is called automatically during device deletion */
>  }
>  
> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> index 01670e9e65..ac75ec0c1b 100644
> --- a/include/hw/pci/pci_bridge.h
> +++ b/include/hw/pci/pci_bridge.h
> @@ -30,6 +30,7 @@
>  #include "hw/pci/pci_bus.h"
>  #include "hw/cxl/cxl.h"
>  #include "qom/object.h"
> +#include "qemu/rcu.h"
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;
>  
> @@ -73,7 +74,7 @@ struct PCIBridge {
>      MemoryRegion address_space_mem;
>      MemoryRegion address_space_io;
>  
> -    PCIBridgeWindows *windows;
> +    PCIBridgeWindows windows;
>  
>      pci_map_irq_fn map_irq;
>      const char *bus_name;