Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
This flag is set/checked prior to calling a device's MemoryRegion
handlers, and set when device code initiates DMA. The purpose of this
flag is to prevent two types of DMA-based reentrancy issues:
1.) mmio -> dma -> mmio case
2.) bh -> dma write -> mmio case
These issues have led to problems such as stack-exhaustion and
use-after-frees.
Summary of the problem from Peter Maydell:
https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
Acked-by: Peter Xu <peterx@redhat.com>
---
include/hw/qdev-core.h | 7 +++++++
softmmu/memory.c | 17 +++++++++++++++++
softmmu/trace-events | 1 +
3 files changed, 25 insertions(+)
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index bd50ad5ee1..7623703943 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -162,6 +162,10 @@ struct NamedClockList {
QLIST_ENTRY(NamedClockList) node;
};
+typedef struct {
+ bool engaged_in_io;
+} MemReentrancyGuard;
+
/**
* DeviceState:
* @realized: Indicates whether the device has been fully constructed.
@@ -194,6 +198,9 @@ struct DeviceState {
int alias_required_for_version;
ResettableState reset;
GSList *unplug_blockers;
+
+ /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
+ MemReentrancyGuard mem_reentrancy_guard;
};
struct DeviceListener {
diff --git a/softmmu/memory.c b/softmmu/memory.c
index 4699ba55ec..57bf18a257 100644
--- a/softmmu/memory.c
+++ b/softmmu/memory.c
@@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
uint64_t access_mask;
unsigned access_size;
unsigned i;
+ DeviceState *dev = NULL;
MemTxResult r = MEMTX_OK;
if (!access_size_min) {
@@ -542,6 +543,19 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_size_max = 4;
}
+ /* Do not allow more than one simultanous access to a device's IO Regions */
+ if (mr->owner &&
+ !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
+ dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
+ if (dev) {
+ if (dev->mem_reentrancy_guard.engaged_in_io) {
+ trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
+ return MEMTX_ERROR;
+ }
+ dev->mem_reentrancy_guard.engaged_in_io = true;
+ }
+ }
+
/* FIXME: support unaligned access? */
access_size = MAX(MIN(size, access_size_max), access_size_min);
access_mask = MAKE_64BIT_MASK(0, access_size * 8);
@@ -556,6 +570,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
access_mask, attrs);
}
}
+ if (dev) {
+ dev->mem_reentrancy_guard.engaged_in_io = false;
+ }
return r;
}
diff --git a/softmmu/trace-events b/softmmu/trace-events
index 22606dc27b..62d04ea9a7 100644
--- a/softmmu/trace-events
+++ b/softmmu/trace-events
@@ -13,6 +13,7 @@ memory_region_ops_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, u
memory_region_ops_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size, const char *name) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u name '%s'"
memory_region_subpage_read(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_subpage_write(int cpu_index, void *mr, uint64_t offset, uint64_t value, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" value 0x%"PRIx64" size %u"
+memory_region_reentrant_io(int cpu_index, void *mr, uint64_t offset, unsigned size) "cpu %d mr %p offset 0x%"PRIx64" size %u"
memory_region_ram_device_read(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_ram_device_write(int cpu_index, void *mr, uint64_t addr, uint64_t value, unsigned size) "cpu %d mr %p addr 0x%"PRIx64" value 0x%"PRIx64" size %u"
memory_region_sync_dirty(const char *mr, const char *listener, int global) "mr '%s' listener '%s' synced (global=%d)"
--
2.39.0
On 13/3/23 09:24, Alexander Bulekov wrote: > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA. > This flag is set/checked prior to calling a device's MemoryRegion > handlers, and set when device code initiates DMA. The purpose of this > flag is to prevent two types of DMA-based reentrancy issues: > > 1.) mmio -> dma -> mmio case > 2.) bh -> dma write -> mmio case > > These issues have led to problems such as stack-exhaustion and > use-after-frees. > > Summary of the problem from Peter Maydell: > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com > > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827 > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282 BTW we need to commit these reproducers as tests/qtest/fuzz-*. > Reviewed-by: Darren Kenny <darren.kenny@oracle.com> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu> > Acked-by: Peter Xu <peterx@redhat.com> > --- > include/hw/qdev-core.h | 7 +++++++ > softmmu/memory.c | 17 +++++++++++++++++ > softmmu/trace-events | 1 + > 3 files changed, 25 insertions(+)
Hi Alex,
Sorry for the late review, *sigh*.
On 13/3/23 09:24, Alexander Bulekov wrote:
> Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> This flag is set/checked prior to calling a device's MemoryRegion
> handlers, and set when device code initiates DMA. The purpose of this
> flag is to prevent two types of DMA-based reentrancy issues:
>
> 1.) mmio -> dma -> mmio case
> 2.) bh -> dma write -> mmio case
>
> These issues have led to problems such as stack-exhaustion and
> use-after-frees.
>
> Summary of the problem from Peter Maydell:
> https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
>
> Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> Acked-by: Peter Xu <peterx@redhat.com>
> ---
> include/hw/qdev-core.h | 7 +++++++
> softmmu/memory.c | 17 +++++++++++++++++
> softmmu/trace-events | 1 +
> 3 files changed, 25 insertions(+)
>
> diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> index bd50ad5ee1..7623703943 100644
> --- a/include/hw/qdev-core.h
> +++ b/include/hw/qdev-core.h
> @@ -162,6 +162,10 @@ struct NamedClockList {
> QLIST_ENTRY(NamedClockList) node;
> };
>
> +typedef struct {
> + bool engaged_in_io;
Do you plan to add more fields?
> +} MemReentrancyGuard;
> +
> /**
> * DeviceState:
> * @realized: Indicates whether the device has been fully constructed.
> @@ -194,6 +198,9 @@ struct DeviceState {
> int alias_required_for_version;
> ResettableState reset;
> GSList *unplug_blockers;
> +
> + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> + MemReentrancyGuard mem_reentrancy_guard;
At this point I'm not sure anymore this is a device or MR property.
> };
>
> struct DeviceListener {
> diff --git a/softmmu/memory.c b/softmmu/memory.c
> index 4699ba55ec..57bf18a257 100644
> --- a/softmmu/memory.c
> +++ b/softmmu/memory.c
> @@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> uint64_t access_mask;
> unsigned access_size;
> unsigned i;
> + DeviceState *dev = NULL;
> MemTxResult r = MEMTX_OK;
>
> if (!access_size_min) {
> @@ -542,6 +543,19 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> access_size_max = 4;
> }
>
> + /* Do not allow more than one simultanous access to a device's IO Regions */
Typo "simultaneous".
1/ access_with_adjusted_size() is complex enough and we are having hard
time getting it right. I'd prefer we don't intermix size adjustment
and re-entrancy check in the same function. This check could belong
to the callers.
2/ I'm not keen on calling QOM object_dynamic_cast() in this hot path;
and mixing QDev API within MR one. At least, can we cache this value
once in memory_region_do_init() since we have access to @owner?
> + if (mr->owner &&
> + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> + dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
> + if (dev) {
> + if (dev->mem_reentrancy_guard.engaged_in_io) {
> + trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
> + return MEMTX_ERROR;
MEMTX_ERROR is device-specific, I'm not sure it is right to return it
from this generic path. Maybe you meant MEMTX_ACCESS_ERROR?
> + }
> + dev->mem_reentrancy_guard.engaged_in_io = true;
> + }
> + }
> +
> /* FIXME: support unaligned access? */
> access_size = MAX(MIN(size, access_size_max), access_size_min);
> access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> @@ -556,6 +570,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> access_mask, attrs);
> }
> }
> + if (dev) {
> + dev->mem_reentrancy_guard.engaged_in_io = false;
> + }
> return r;
> }
On 230313 0945, Philippe Mathieu-Daudé wrote:
> Hi Alex,
>
> Sorry for the late review, *sigh*.
>
> On 13/3/23 09:24, Alexander Bulekov wrote:
> > Add a flag to the DeviceState, when a device is engaged in PIO/MMIO/DMA.
> > This flag is set/checked prior to calling a device's MemoryRegion
> > handlers, and set when device code initiates DMA. The purpose of this
> > flag is to prevent two types of DMA-based reentrancy issues:
> >
> > 1.) mmio -> dma -> mmio case
> > 2.) bh -> dma write -> mmio case
> >
> > These issues have led to problems such as stack-exhaustion and
> > use-after-frees.
> >
> > Summary of the problem from Peter Maydell:
> > https://lore.kernel.org/qemu-devel/CAFEAcA_23vc7hE3iaM-JVA6W38LK4hJoWae5KcknhPRD5fPBZA@mail.gmail.com
> >
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/62
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/540
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/541
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/556
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/557
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/827
> > Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1282
> >
> > Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > Acked-by: Peter Xu <peterx@redhat.com>
> > ---
> > include/hw/qdev-core.h | 7 +++++++
> > softmmu/memory.c | 17 +++++++++++++++++
> > softmmu/trace-events | 1 +
> > 3 files changed, 25 insertions(+)
> >
> > diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
> > index bd50ad5ee1..7623703943 100644
> > --- a/include/hw/qdev-core.h
> > +++ b/include/hw/qdev-core.h
> > @@ -162,6 +162,10 @@ struct NamedClockList {
> > QLIST_ENTRY(NamedClockList) node;
> > };
> > +typedef struct {
> > + bool engaged_in_io;
>
> Do you plan to add more fields?
Not right now, but maybe some need will come up.
> > +} MemReentrancyGuard;
> > +
> > /**
> > * DeviceState:
> > * @realized: Indicates whether the device has been fully constructed.
> > @@ -194,6 +198,9 @@ struct DeviceState {
> > int alias_required_for_version;
> > ResettableState reset;
> > GSList *unplug_blockers;
> > +
> > + /* Is the device currently in mmio/pio/dma? Used to prevent re-entrancy */
> > + MemReentrancyGuard mem_reentrancy_guard;
>
> At this point I'm not sure anymore this is a device or MR property.
It's designed to be an MR property. If it were MR specific, it wouldn't
handle the BH -> DMA case, or this one, where there are two MRs (doorbell
and oper) involed.
https://gitlab.com/qemu-project/qemu/-/issues/540
>
> > };
> > struct DeviceListener {
> > diff --git a/softmmu/memory.c b/softmmu/memory.c
> > index 4699ba55ec..57bf18a257 100644
> > --- a/softmmu/memory.c
> > +++ b/softmmu/memory.c
> > @@ -533,6 +533,7 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > uint64_t access_mask;
> > unsigned access_size;
> > unsigned i;
> > + DeviceState *dev = NULL;
> > MemTxResult r = MEMTX_OK;
> > if (!access_size_min) {
> > @@ -542,6 +543,19 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > access_size_max = 4;
> > }
> > + /* Do not allow more than one simultanous access to a device's IO Regions */
>
> Typo "simultaneous".
>
> 1/ access_with_adjusted_size() is complex enough and we are having hard
> time getting it right. I'd prefer we don't intermix size adjustment
> and re-entrancy check in the same function. This check could belong
> to the callers.
>
Would moving the code within this function to keep it separate from the
size adjustment be good enough? Otherwise we would end up with duplicate
code in the read/write callers.
The size-adjustment seems to be orthogonal (the MR won't change)?
> 2/ I'm not keen on calling QOM object_dynamic_cast() in this hot path;
> and mixing QDev API within MR one. At least, can we cache this value
> once in memory_region_do_init() since we have access to @owner?
>
Sounds like a good idea. Is it ever possible for the owner/owner's
address to change?
Thanks
-Alex
> > + if (mr->owner &&
> > + !mr->ram_device && !mr->ram && !mr->rom_device && !mr->readonly) {
> > + dev = (DeviceState *) object_dynamic_cast(mr->owner, TYPE_DEVICE);
> > + if (dev) {
> > + if (dev->mem_reentrancy_guard.engaged_in_io) {
> > + trace_memory_region_reentrant_io(get_cpu_index(), mr, addr, size);
> > + return MEMTX_ERROR;
>
> MEMTX_ERROR is device-specific, I'm not sure it is right to return it
> from this generic path. Maybe you meant MEMTX_ACCESS_ERROR?
>
> > + }
> > + dev->mem_reentrancy_guard.engaged_in_io = true;
> > + }
> > + }
> > +
> > /* FIXME: support unaligned access? */
> > access_size = MAX(MIN(size, access_size_max), access_size_min);
> > access_mask = MAKE_64BIT_MASK(0, access_size * 8);
> > @@ -556,6 +570,9 @@ static MemTxResult access_with_adjusted_size(hwaddr addr,
> > access_mask, attrs);
> > }
> > }
> > + if (dev) {
> > + dev->mem_reentrancy_guard.engaged_in_io = false;
> > + }
> > return r;
> > }
>
On 230313 0515, Alexander Bulekov wrote: > > > > At this point I'm not sure anymore this is a device or MR property. > > It's designed to be an MR property. If it were MR specific, it wouldn't Should be "It's designed to be a Device property."
© 2016 - 2026 Red Hat, Inc.