hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ hw/s390x/s390-pci-vfio.c | 2 ++ include/hw/s390x/s390-pci-bus.h | 5 +++++ 3 files changed, 35 insertions(+)
ISM device firmware stores unique state information that can
can cause a wholesale unmap of the associated IOMMU (e.g. when
we get a termination signal for QEMU) to trigger firmware errors
because firmware believes we are attempting to invalidate entries
that are still in-use by the guest OS (when in fact that guest is
in the process of being terminated or rebooted).
To alleviate this, register both a shutdown notifier (for unexpected
termination cases e.g. virsh destroy) as well as a reset callback
(for cases like guest OS reboot). For each of these scenarios, trigger
PCI device reset; this is enough to indicate to firmware that the IOMMU
is no longer in-use by the guest OS, making it safe to invalidate any
associated IOMMU entries.
Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X")
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++
hw/s390x/s390-pci-vfio.c | 2 ++
include/hw/s390x/s390-pci-bus.h | 5 +++++
3 files changed, 35 insertions(+)
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 977e7daa15..02751f3597 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -24,6 +24,8 @@
#include "hw/pci/msi.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
#ifndef DEBUG_S390PCI_BUS
#define DEBUG_S390PCI_BUS 0
@@ -150,10 +152,30 @@ out:
psccb->header.response_code = cpu_to_be16(rc);
}
+static void s390_pci_shutdown_notifier(Notifier *n, void *opaque)
+{
+ S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice,
+ shutdown_notifier);
+
+ pci_device_reset(pbdev->pdev);
+}
+
+static void s390_pci_reset_cb(void *opaque)
+{
+ S390PCIBusDevice *pbdev = opaque;
+
+ pci_device_reset(pbdev->pdev);
+}
+
static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev)
{
HotplugHandler *hotplug_ctrl;
+ if (pbdev->pft == ZPCI_PFT_ISM) {
+ notifier_remove(&pbdev->shutdown_notifier);
+ qemu_unregister_reset(s390_pci_reset_cb, pbdev);
+ }
+
/* Unplug the PCI device */
if (pbdev->pdev) {
DeviceState *pdev = DEVICE(pbdev->pdev);
@@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->fh |= FH_SHM_VFIO;
pbdev->forwarding_assist = false;
}
+ /* Register shutdown notifier and reset callback for ISM devices */
+ if (pbdev->pft == ZPCI_PFT_ISM) {
+ pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier;
+ qemu_register_shutdown_notifier(&pbdev->shutdown_notifier);
+ qemu_register_reset(s390_pci_reset_cb, pbdev);
+ }
} else {
pbdev->fh |= FH_SHM_EMUL;
/* Always intercept emulated devices */
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 5f0adb0b4a..419763f829 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
/* The following values remain 0 until we support other FMB formats */
pbdev->zpci_fn.fmbl = 0;
pbdev->zpci_fn.pft = 0;
+ /* Store function type separately for type-specific behavior */
+ pbdev->pft = cap->pft;
}
static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info,
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 0605fcea24..4c812c65db 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -39,6 +39,9 @@
#define UID_CHECKING_ENABLED 0x01
#define ZPCI_DTSM 0x40
+/* zPCI Function Types */
+#define ZPCI_PFT_ISM 5
+
OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE)
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS)
OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE)
@@ -343,6 +346,7 @@ struct S390PCIBusDevice {
uint16_t noi;
uint16_t maxstbl;
uint8_t sum;
+ uint8_t pft;
S390PCIGroup *pci_group;
ClpRspQueryPci zpci_fn;
S390MsixInfo msix;
@@ -351,6 +355,7 @@ struct S390PCIBusDevice {
MemoryRegion msix_notify_mr;
IndAddr *summary_ind;
IndAddr *indicator;
+ Notifier shutdown_notifier;
bool pci_unplug_request_processed;
bool unplug_requested;
bool interp;
--
2.38.1
On Fri, 2022-12-09 at 14:57 -0500, Matthew Rosato wrote: > ISM device firmware stores unique state information that can > can cause a wholesale unmap of the associated IOMMU (e.g. when > we get a termination signal for QEMU) to trigger firmware errors > because firmware believes we are attempting to invalidate entries > that are still in-use by the guest OS (when in fact that guest is > in the process of being terminated or rebooted). > To alleviate this, register both a shutdown notifier (for unexpected > termination cases e.g. virsh destroy) as well as a reset callback > (for cases like guest OS reboot). For each of these scenarios, > trigger > PCI device reset; this is enough to indicate to firmware that the > IOMMU > is no longer in-use by the guest OS, making it safe to invalidate any > associated IOMMU entries. > > Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices > without MSI-X") > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> I realize Thomas already queued this, but for the record: Reviewed-by: Eric Farman <farman@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ > hw/s390x/s390-pci-vfio.c | 2 ++ > include/hw/s390x/s390-pci-bus.h | 5 +++++ > 3 files changed, 35 insertions(+) > > diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c > index 977e7daa15..02751f3597 100644 > --- a/hw/s390x/s390-pci-bus.c > +++ b/hw/s390x/s390-pci-bus.c > @@ -24,6 +24,8 @@ > #include "hw/pci/msi.h" > #include "qemu/error-report.h" > #include "qemu/module.h" > +#include "sysemu/reset.h" > +#include "sysemu/runstate.h" > > #ifndef DEBUG_S390PCI_BUS > #define DEBUG_S390PCI_BUS 0 > @@ -150,10 +152,30 @@ out: > psccb->header.response_code = cpu_to_be16(rc); > } > > +static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) > +{ > + S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, > + shutdown_notifier); > + > + pci_device_reset(pbdev->pdev); > +} > + > +static void s390_pci_reset_cb(void *opaque) > +{ > + S390PCIBusDevice *pbdev = opaque; > + > + pci_device_reset(pbdev->pdev); > +} > + > static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) > { > HotplugHandler *hotplug_ctrl; > > + if (pbdev->pft == ZPCI_PFT_ISM) { > + notifier_remove(&pbdev->shutdown_notifier); > + qemu_unregister_reset(s390_pci_reset_cb, pbdev); > + } > + > /* Unplug the PCI device */ > if (pbdev->pdev) { > DeviceState *pdev = DEVICE(pbdev->pdev); > @@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler > *hotplug_dev, DeviceState *dev, > pbdev->fh |= FH_SHM_VFIO; > pbdev->forwarding_assist = false; > } > + /* Register shutdown notifier and reset callback for ISM > devices */ > + if (pbdev->pft == ZPCI_PFT_ISM) { > + pbdev->shutdown_notifier.notify = > s390_pci_shutdown_notifier; > + qemu_register_shutdown_notifier(&pbdev- > >shutdown_notifier); > + qemu_register_reset(s390_pci_reset_cb, pbdev); > + } > } else { > pbdev->fh |= FH_SHM_EMUL; > /* Always intercept emulated devices */ > diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c > index 5f0adb0b4a..419763f829 100644 > --- a/hw/s390x/s390-pci-vfio.c > +++ b/hw/s390x/s390-pci-vfio.c > @@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice > *pbdev, > /* The following values remain 0 until we support other FMB > formats */ > pbdev->zpci_fn.fmbl = 0; > pbdev->zpci_fn.pft = 0; > + /* Store function type separately for type-specific behavior */ > + pbdev->pft = cap->pft; > } > > static bool get_host_fh(S390PCIBusDevice *pbdev, struct > vfio_device_info *info, > diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390- > pci-bus.h > index 0605fcea24..4c812c65db 100644 > --- a/include/hw/s390x/s390-pci-bus.h > +++ b/include/hw/s390x/s390-pci-bus.h > @@ -39,6 +39,9 @@ > #define UID_CHECKING_ENABLED 0x01 > #define ZPCI_DTSM 0x40 > > +/* zPCI Function Types */ > +#define ZPCI_PFT_ISM 5 > + > OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) > OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) > OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBusDevice, S390_PCI_DEVICE) > @@ -343,6 +346,7 @@ struct S390PCIBusDevice { > uint16_t noi; > uint16_t maxstbl; > uint8_t sum; > + uint8_t pft; > S390PCIGroup *pci_group; > ClpRspQueryPci zpci_fn; > S390MsixInfo msix; > @@ -351,6 +355,7 @@ struct S390PCIBusDevice { > MemoryRegion msix_notify_mr; > IndAddr *summary_ind; > IndAddr *indicator; > + Notifier shutdown_notifier; > bool pci_unplug_request_processed; > bool unplug_requested; > bool interp;
On 09/12/2022 20.57, Matthew Rosato wrote: > ISM device firmware stores unique state information that can > can cause a wholesale unmap of the associated IOMMU (e.g. when > we get a termination signal for QEMU) to trigger firmware errors > because firmware believes we are attempting to invalidate entries > that are still in-use by the guest OS (when in fact that guest is > in the process of being terminated or rebooted). > To alleviate this, register both a shutdown notifier (for unexpected > termination cases e.g. virsh destroy) as well as a reset callback > (for cases like guest OS reboot). For each of these scenarios, trigger > PCI device reset; this is enough to indicate to firmware that the IOMMU > is no longer in-use by the guest OS, making it safe to invalidate any > associated IOMMU entries. > > Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") > Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ > hw/s390x/s390-pci-vfio.c | 2 ++ > include/hw/s390x/s390-pci-bus.h | 5 +++++ > 3 files changed, 35 insertions(+) > > diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c > index 977e7daa15..02751f3597 100644 > --- a/hw/s390x/s390-pci-bus.c > +++ b/hw/s390x/s390-pci-bus.c > @@ -24,6 +24,8 @@ > #include "hw/pci/msi.h" > #include "qemu/error-report.h" > #include "qemu/module.h" > +#include "sysemu/reset.h" > +#include "sysemu/runstate.h" > > #ifndef DEBUG_S390PCI_BUS > #define DEBUG_S390PCI_BUS 0 > @@ -150,10 +152,30 @@ out: > psccb->header.response_code = cpu_to_be16(rc); > } > > +static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) > +{ > + S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, > + shutdown_notifier); > + > + pci_device_reset(pbdev->pdev); > +} > + > +static void s390_pci_reset_cb(void *opaque) > +{ > + S390PCIBusDevice *pbdev = opaque; > + > + pci_device_reset(pbdev->pdev); > +} > + > static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) > { > HotplugHandler *hotplug_ctrl; > > + if (pbdev->pft == ZPCI_PFT_ISM) { > + notifier_remove(&pbdev->shutdown_notifier); > + qemu_unregister_reset(s390_pci_reset_cb, pbdev); > + } > + > /* Unplug the PCI device */ > if (pbdev->pdev) { > DeviceState *pdev = DEVICE(pbdev->pdev); > @@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, > pbdev->fh |= FH_SHM_VFIO; > pbdev->forwarding_assist = false; > } > + /* Register shutdown notifier and reset callback for ISM devices */ > + if (pbdev->pft == ZPCI_PFT_ISM) { > + pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; > + qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); > + qemu_register_reset(s390_pci_reset_cb, pbdev); > + } > } else { > pbdev->fh |= FH_SHM_EMUL; > /* Always intercept emulated devices */ > diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c > index 5f0adb0b4a..419763f829 100644 > --- a/hw/s390x/s390-pci-vfio.c > +++ b/hw/s390x/s390-pci-vfio.c > @@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, > /* The following values remain 0 until we support other FMB formats */ > pbdev->zpci_fn.fmbl = 0; > pbdev->zpci_fn.pft = 0; > + /* Store function type separately for type-specific behavior */ > + pbdev->pft = cap->pft; > } Thanks, queued: https://gitlab.com/thuth/qemu/-/commits/s390x-next/ I had to adjust the hunk in s390_pci_read_base() due to a conflict with your earlier patch, please check whether it looks sane to you. Thomas
On 12/12/22 6:34 AM, Thomas Huth wrote: > On 09/12/2022 20.57, Matthew Rosato wrote: >> ISM device firmware stores unique state information that can >> can cause a wholesale unmap of the associated IOMMU (e.g. when >> we get a termination signal for QEMU) to trigger firmware errors >> because firmware believes we are attempting to invalidate entries >> that are still in-use by the guest OS (when in fact that guest is >> in the process of being terminated or rebooted). >> To alleviate this, register both a shutdown notifier (for unexpected >> termination cases e.g. virsh destroy) as well as a reset callback >> (for cases like guest OS reboot). For each of these scenarios, trigger >> PCI device reset; this is enough to indicate to firmware that the IOMMU >> is no longer in-use by the guest OS, making it safe to invalidate any >> associated IOMMU entries. >> >> Fixes: 15d0e7942d3b ("s390x/pci: don't fence interpreted devices without MSI-X") >> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> >> --- >> hw/s390x/s390-pci-bus.c | 28 ++++++++++++++++++++++++++++ >> hw/s390x/s390-pci-vfio.c | 2 ++ >> include/hw/s390x/s390-pci-bus.h | 5 +++++ >> 3 files changed, 35 insertions(+) >> >> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c >> index 977e7daa15..02751f3597 100644 >> --- a/hw/s390x/s390-pci-bus.c >> +++ b/hw/s390x/s390-pci-bus.c >> @@ -24,6 +24,8 @@ >> #include "hw/pci/msi.h" >> #include "qemu/error-report.h" >> #include "qemu/module.h" >> +#include "sysemu/reset.h" >> +#include "sysemu/runstate.h" >> #ifndef DEBUG_S390PCI_BUS >> #define DEBUG_S390PCI_BUS 0 >> @@ -150,10 +152,30 @@ out: >> psccb->header.response_code = cpu_to_be16(rc); >> } >> +static void s390_pci_shutdown_notifier(Notifier *n, void *opaque) >> +{ >> + S390PCIBusDevice *pbdev = container_of(n, S390PCIBusDevice, >> + shutdown_notifier); >> + >> + pci_device_reset(pbdev->pdev); >> +} >> + >> +static void s390_pci_reset_cb(void *opaque) >> +{ >> + S390PCIBusDevice *pbdev = opaque; >> + >> + pci_device_reset(pbdev->pdev); >> +} >> + >> static void s390_pci_perform_unplug(S390PCIBusDevice *pbdev) >> { >> HotplugHandler *hotplug_ctrl; >> + if (pbdev->pft == ZPCI_PFT_ISM) { >> + notifier_remove(&pbdev->shutdown_notifier); >> + qemu_unregister_reset(s390_pci_reset_cb, pbdev); >> + } >> + >> /* Unplug the PCI device */ >> if (pbdev->pdev) { >> DeviceState *pdev = DEVICE(pbdev->pdev); >> @@ -1111,6 +1133,12 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, >> pbdev->fh |= FH_SHM_VFIO; >> pbdev->forwarding_assist = false; >> } >> + /* Register shutdown notifier and reset callback for ISM devices */ >> + if (pbdev->pft == ZPCI_PFT_ISM) { >> + pbdev->shutdown_notifier.notify = s390_pci_shutdown_notifier; >> + qemu_register_shutdown_notifier(&pbdev->shutdown_notifier); >> + qemu_register_reset(s390_pci_reset_cb, pbdev); >> + } >> } else { >> pbdev->fh |= FH_SHM_EMUL; >> /* Always intercept emulated devices */ >> diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c >> index 5f0adb0b4a..419763f829 100644 >> --- a/hw/s390x/s390-pci-vfio.c >> +++ b/hw/s390x/s390-pci-vfio.c >> @@ -122,6 +122,8 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, >> /* The following values remain 0 until we support other FMB formats */ >> pbdev->zpci_fn.fmbl = 0; >> pbdev->zpci_fn.pft = 0; >> + /* Store function type separately for type-specific behavior */ >> + pbdev->pft = cap->pft; >> } > > Thanks, queued: > > https://gitlab.com/thuth/qemu/-/commits/s390x-next/ > > I had to adjust the hunk in s390_pci_read_base() due to a conflict with your earlier patch, please check whether it looks sane to you. Yep, that adjustment is good (and FWIW, was the same on my local branch). Thanks! Matt
© 2016 - 2024 Red Hat, Inc.