From: Mattias Nissler <mnissler@rivosinc.com>
When DMA memory can't be directly accessed, as is the case when
running the device model in a separate process without shareable DMA
file descriptors, bounce buffering is used.
It is not uncommon for device models to request mapping of several DMA
regions at the same time. Examples include:
* net devices, e.g. when transmitting a packet that is split across
several TX descriptors (observed with igb)
* USB host controllers, when handling a packet with multiple data TRBs
(observed with xhci)
Previously, qemu only provided a single bounce buffer per AddressSpace
and would fail DMA map requests while the buffer was already in use. In
turn, this would cause DMA failures that ultimately manifest as hardware
errors from the guest perspective.
This change allocates DMA bounce buffers dynamically instead of
supporting only a single buffer. Thus, multiple DMA mappings work
correctly also when RAM can't be mmap()-ed.
The total bounce buffer allocation size is limited individually for each
AddressSpace. The default limit is 4096 bytes, matching the previous
maximum buffer size. A new x-max-bounce-buffer-size parameter is
provided to configure the limit for PCI devices.
Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Acked-by: Peter Xu <peterx@redhat.com>
Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/exec/memory.h | 14 +++----
include/hw/pci/pci_device.h | 3 ++
hw/pci/pci.c | 8 ++++
system/memory.c | 5 ++-
system/physmem.c | 82 ++++++++++++++++++++++++++-----------
5 files changed, 76 insertions(+), 36 deletions(-)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 296fd068c0..e5e865d1a9 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -1084,13 +1084,7 @@ typedef struct AddressSpaceMapClient {
QLIST_ENTRY(AddressSpaceMapClient) link;
} AddressSpaceMapClient;
-typedef struct {
- MemoryRegion *mr;
- void *buffer;
- hwaddr addr;
- hwaddr len;
- bool in_use;
-} BounceBuffer;
+#define DEFAULT_MAX_BOUNCE_BUFFER_SIZE (4096)
/**
* struct AddressSpace: describes a mapping of addresses to #MemoryRegion objects
@@ -1110,8 +1104,10 @@ struct AddressSpace {
QTAILQ_HEAD(, MemoryListener) listeners;
QTAILQ_ENTRY(AddressSpace) address_spaces_link;
- /* Bounce buffer to use for this address space. */
- BounceBuffer bounce;
+ /* Maximum DMA bounce buffer size used for indirect memory map requests */
+ size_t max_bounce_buffer_size;
+ /* Total size of bounce buffers currently allocated, atomically accessed */
+ size_t bounce_buffer_size;
/* List of callbacks to invoke when buffers free up */
QemuMutex map_client_list_lock;
QLIST_HEAD(, AddressSpaceMapClient) map_client_list;
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 15694f2489..91df40f989 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -167,6 +167,9 @@ struct PCIDevice {
/* ID of standby device in net_failover pair */
char *failover_pair_id;
uint32_t acpi_index;
+
+ /* Maximum DMA bounce buffer size used for indirect memory map requests */
+ uint32_t max_bounce_buffer_size;
};
static inline int pci_intx(PCIDevice *pci_dev)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index fab86d0567..d2caf3ee8b 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,6 +85,8 @@ static Property pci_props[] = {
QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
+ DEFINE_PROP_SIZE32("x-max-bounce-buffer-size", PCIDevice,
+ max_bounce_buffer_size, DEFAULT_MAX_BOUNCE_BUFFER_SIZE),
DEFINE_PROP_END_OF_LIST()
};
@@ -1204,6 +1206,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
"bus master container", UINT64_MAX);
address_space_init(&pci_dev->bus_master_as,
&pci_dev->bus_master_container_region, pci_dev->name);
+ pci_dev->bus_master_as.max_bounce_buffer_size =
+ pci_dev->max_bounce_buffer_size;
if (phase_check(PHASE_MACHINE_READY)) {
pci_init_bus_master(pci_dev);
@@ -2633,6 +2637,10 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
k->unrealize = pci_qdev_unrealize;
k->bus_type = TYPE_PCI_BUS;
device_class_set_props(k, pci_props);
+ object_class_property_set_description(
+ klass, "x-max-bounce-buffer-size",
+ "Maximum buffer size allocated for bounce buffers used for mapped "
+ "access to indirect DMA memory");
}
static void pci_device_class_base_init(ObjectClass *klass, void *data)
diff --git a/system/memory.c b/system/memory.c
index 5e6eb459d5..f6f6fee6d8 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -3148,7 +3148,8 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
as->ioeventfds = NULL;
QTAILQ_INIT(&as->listeners);
QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
- as->bounce.in_use = false;
+ as->max_bounce_buffer_size = DEFAULT_MAX_BOUNCE_BUFFER_SIZE;
+ as->bounce_buffer_size = 0;
qemu_mutex_init(&as->map_client_list_lock);
QLIST_INIT(&as->map_client_list);
as->name = g_strdup(name ? name : "anonymous");
@@ -3158,7 +3159,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
static void do_address_space_destroy(AddressSpace *as)
{
- assert(!qatomic_read(&as->bounce.in_use));
+ assert(qatomic_read(&as->bounce_buffer_size) == 0);
assert(QLIST_EMPTY(&as->map_client_list));
qemu_mutex_destroy(&as->map_client_list_lock);
diff --git a/system/physmem.c b/system/physmem.c
index 94600a33ec..971bfa0855 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3095,6 +3095,20 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len)
NULL, len, FLUSH_CACHE);
}
+/*
+ * A magic value stored in the first 8 bytes of the bounce buffer struct. Used
+ * to detect illegal pointers passed to address_space_unmap.
+ */
+#define BOUNCE_BUFFER_MAGIC 0xb4017ceb4ffe12ed
+
+typedef struct {
+ uint64_t magic;
+ MemoryRegion *mr;
+ hwaddr addr;
+ size_t len;
+ uint8_t buffer[];
+} BounceBuffer;
+
static void
address_space_unregister_map_client_do(AddressSpaceMapClient *client)
{
@@ -3120,9 +3134,9 @@ void address_space_register_map_client(AddressSpace *as, QEMUBH *bh)
QEMU_LOCK_GUARD(&as->map_client_list_lock);
client->bh = bh;
QLIST_INSERT_HEAD(&as->map_client_list, client, link);
- /* Write map_client_list before reading in_use. */
+ /* Write map_client_list before reading bounce_buffer_size. */
smp_mb();
- if (!qatomic_read(&as->bounce.in_use)) {
+ if (qatomic_read(&as->bounce_buffer_size) < as->max_bounce_buffer_size) {
address_space_notify_map_clients_locked(as);
}
}
@@ -3251,28 +3265,40 @@ void *address_space_map(AddressSpace *as,
mr = flatview_translate(fv, addr, &xlat, &l, is_write, attrs);
if (!memory_access_is_direct(mr, is_write)) {
- if (qatomic_xchg(&as->bounce.in_use, true)) {
+ size_t used = qatomic_read(&as->bounce_buffer_size);
+ for (;;) {
+ hwaddr alloc = MIN(as->max_bounce_buffer_size - used, l);
+ size_t new_size = used + alloc;
+ size_t actual =
+ qatomic_cmpxchg(&as->bounce_buffer_size, used, new_size);
+ if (actual == used) {
+ l = alloc;
+ break;
+ }
+ used = actual;
+ }
+
+ if (l == 0) {
*plen = 0;
return NULL;
}
- /* Avoid unbounded allocations */
- l = MIN(l, TARGET_PAGE_SIZE);
- as->bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
- as->bounce.addr = addr;
- as->bounce.len = l;
+ BounceBuffer *bounce = g_malloc0(l + sizeof(BounceBuffer));
+ bounce->magic = BOUNCE_BUFFER_MAGIC;
memory_region_ref(mr);
- as->bounce.mr = mr;
+ bounce->mr = mr;
+ bounce->addr = addr;
+ bounce->len = l;
+
if (!is_write) {
flatview_read(fv, addr, MEMTXATTRS_UNSPECIFIED,
- as->bounce.buffer, l);
+ bounce->buffer, l);
}
*plen = l;
- return as->bounce.buffer;
+ return bounce->buffer;
}
-
memory_region_ref(mr);
*plen = flatview_extend_translation(fv, addr, len, mr, xlat,
l, is_write, attrs);
@@ -3287,12 +3313,11 @@ void *address_space_map(AddressSpace *as,
void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
bool is_write, hwaddr access_len)
{
- if (buffer != as->bounce.buffer) {
- MemoryRegion *mr;
- ram_addr_t addr1;
+ MemoryRegion *mr;
+ ram_addr_t addr1;
- mr = memory_region_from_host(buffer, &addr1);
- assert(mr != NULL);
+ mr = memory_region_from_host(buffer, &addr1);
+ if (mr != NULL) {
if (is_write) {
invalidate_and_set_dirty(mr, addr1, access_len);
}
@@ -3302,15 +3327,22 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
memory_region_unref(mr);
return;
}
+
+
+ BounceBuffer *bounce = container_of(buffer, BounceBuffer, buffer);
+ assert(bounce->magic == BOUNCE_BUFFER_MAGIC);
+
if (is_write) {
- address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED,
- as->bounce.buffer, access_len);
- }
- qemu_vfree(as->bounce.buffer);
- as->bounce.buffer = NULL;
- memory_region_unref(as->bounce.mr);
- /* Clear in_use before reading map_client_list. */
- qatomic_set_mb(&as->bounce.in_use, false);
+ address_space_write(as, bounce->addr, MEMTXATTRS_UNSPECIFIED,
+ bounce->buffer, access_len);
+ }
+
+ qatomic_sub(&as->bounce_buffer_size, bounce->len);
+ bounce->magic = ~BOUNCE_BUFFER_MAGIC;
+ memory_region_unref(bounce->mr);
+ g_free(bounce);
+ /* Write bounce_buffer_size before reading map_client_list. */
+ smp_mb();
address_space_notify_map_clients(as);
}
--
2.45.0
Hello, +Mark (for the Mac devices) On 9/9/24 22:11, Peter Xu wrote: > From: Mattias Nissler <mnissler@rivosinc.com> > > When DMA memory can't be directly accessed, as is the case when > running the device model in a separate process without shareable DMA > file descriptors, bounce buffering is used. > > It is not uncommon for device models to request mapping of several DMA > regions at the same time. Examples include: > * net devices, e.g. when transmitting a packet that is split across > several TX descriptors (observed with igb) > * USB host controllers, when handling a packet with multiple data TRBs > (observed with xhci) > > Previously, qemu only provided a single bounce buffer per AddressSpace > and would fail DMA map requests while the buffer was already in use. In > turn, this would cause DMA failures that ultimately manifest as hardware > errors from the guest perspective. > > This change allocates DMA bounce buffers dynamically instead of > supporting only a single buffer. Thus, multiple DMA mappings work > correctly also when RAM can't be mmap()-ed. > > The total bounce buffer allocation size is limited individually for each > AddressSpace. The default limit is 4096 bytes, matching the previous > maximum buffer size. A new x-max-bounce-buffer-size parameter is > provided to configure the limit for PCI devices. > > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> > Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> > Acked-by: Peter Xu <peterx@redhat.com> > Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > include/exec/memory.h | 14 +++---- > include/hw/pci/pci_device.h | 3 ++ > hw/pci/pci.c | 8 ++++ > system/memory.c | 5 ++- > system/physmem.c | 82 ++++++++++++++++++++++++++----------- > 5 files changed, 76 insertions(+), 36 deletions(-) Here is a report of a segv of the ppc64 mac99+cpu970 machine booting debian. See the stack trace below. Just wanted to let you know. I will digging further next week. Thanks, C. Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 3333 assert(bounce->magic == BOUNCE_BUFFER_MAGIC); (gdb) bt #0 address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 #1 address_space_unmap (as=as@entry=0x5555565d45c0 <address_space_memory>, buffer=0x0, len=<optimized out>, is_write=<optimized out>, access_len=0) at ../system/physmem.c:3313 #2 0x000055555595ea48 in dma_memory_unmap (access_len=<optimized out>, dir=<optimized out>, len=<optimized out>, buffer=<optimized out>, as=<optimized out>) at /home/legoater/work/qemu/qemu.git/include/sysemu/dma.h:236 #3 pmac_ide_atapi_transfer_cb (opaque=0x555556c06470, ret=<optimized out>) at ../hw/ide/macio.c:122 #4 0x00005555559861f3 in DBDMA_run (s=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:546 #5 DBDMA_run_bh (opaque=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:556 #6 0x0000555555f19f33 in aio_bh_call (bh=bh@entry=0x555556ab5570) at ../util/async.c:171 #7 0x0000555555f1a0f5 in aio_bh_poll (ctx=ctx@entry=0x5555566af150) at ../util/async.c:218 #8 0x0000555555f0269e in aio_dispatch (ctx=0x5555566af150) at ../util/aio-posix.c:423 #9 0x0000555555f19d8e in aio_ctx_dispatch (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:360 #10 0x00007ffff7315f4f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 #11 0x0000555555f1b488 in glib_pollfds_poll () at ../util/main-loop.c:287 #12 os_host_main_loop_wait (timeout=2143429) at ../util/main-loop.c:310 #13 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:589 #14 0x0000555555abeba3 in qemu_main_loop () at ../system/runstate.c:826 #15 0x0000555555e63787 in qemu_default_main () at ../system/main.c:37 #16 0x00007ffff6e29590 in __libc_start_call_main () at /lib64/libc.so.6 #17 0x00007ffff6e29640 in __libc_start_main_impl () at /lib64/libc.so.6 #18 0x000055555588d4f5 in _start ()
On Fri, Sep 13, 2024 at 04:35:32PM +0200, Cédric Le Goater wrote: > Hello, > > +Mark (for the Mac devices) > > On 9/9/24 22:11, Peter Xu wrote: > > From: Mattias Nissler <mnissler@rivosinc.com> > > > > When DMA memory can't be directly accessed, as is the case when > > running the device model in a separate process without shareable DMA > > file descriptors, bounce buffering is used. > > > > It is not uncommon for device models to request mapping of several DMA > > regions at the same time. Examples include: > > * net devices, e.g. when transmitting a packet that is split across > > several TX descriptors (observed with igb) > > * USB host controllers, when handling a packet with multiple data TRBs > > (observed with xhci) > > > > Previously, qemu only provided a single bounce buffer per AddressSpace > > and would fail DMA map requests while the buffer was already in use. In > > turn, this would cause DMA failures that ultimately manifest as hardware > > errors from the guest perspective. > > > > This change allocates DMA bounce buffers dynamically instead of > > supporting only a single buffer. Thus, multiple DMA mappings work > > correctly also when RAM can't be mmap()-ed. > > > > The total bounce buffer allocation size is limited individually for each > > AddressSpace. The default limit is 4096 bytes, matching the previous > > maximum buffer size. A new x-max-bounce-buffer-size parameter is > > provided to configure the limit for PCI devices. > > > > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> > > Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> > > Acked-by: Peter Xu <peterx@redhat.com> > > Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com > > Signed-off-by: Peter Xu <peterx@redhat.com> > > --- > > include/exec/memory.h | 14 +++---- > > include/hw/pci/pci_device.h | 3 ++ > > hw/pci/pci.c | 8 ++++ > > system/memory.c | 5 ++- > > system/physmem.c | 82 ++++++++++++++++++++++++++----------- > > 5 files changed, 76 insertions(+), 36 deletions(-) > > Here is a report of a segv of the ppc64 mac99+cpu970 machine booting debian. > See the stack trace below. Just wanted to let you know. I will digging further > next week. > > Thanks, > > C. > > > > Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. > address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, > as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 > 3333 assert(bounce->magic == BOUNCE_BUFFER_MAGIC); > (gdb) bt > #0 address_space_unmap > (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) > at ../system/physmem.c:3333 > #1 address_space_unmap > (as=as@entry=0x5555565d45c0 <address_space_memory>, buffer=0x0, len=<optimized out>, is_write=<optimized out>, access_len=0) at ../system/physmem.c:3313 > #2 0x000055555595ea48 in dma_memory_unmap > (access_len=<optimized out>, dir=<optimized out>, len=<optimized out>, buffer=<optimized out>, as=<optimized out>) at /home/legoater/work/qemu/qemu.git/include/sysemu/dma.h:236 > #3 pmac_ide_atapi_transfer_cb (opaque=0x555556c06470, ret=<optimized out>) at ../hw/ide/macio.c:122 > #4 0x00005555559861f3 in DBDMA_run (s=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:546 > #5 DBDMA_run_bh (opaque=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:556 > #6 0x0000555555f19f33 in aio_bh_call (bh=bh@entry=0x555556ab5570) at ../util/async.c:171 > #7 0x0000555555f1a0f5 in aio_bh_poll (ctx=ctx@entry=0x5555566af150) at ../util/async.c:218 > #8 0x0000555555f0269e in aio_dispatch (ctx=0x5555566af150) at ../util/aio-posix.c:423 > #9 0x0000555555f19d8e in aio_ctx_dispatch > (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:360 > #10 0x00007ffff7315f4f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 > #11 0x0000555555f1b488 in glib_pollfds_poll () at ../util/main-loop.c:287 > #12 os_host_main_loop_wait (timeout=2143429) at ../util/main-loop.c:310 > #13 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:589 > #14 0x0000555555abeba3 in qemu_main_loop () at ../system/runstate.c:826 > #15 0x0000555555e63787 in qemu_default_main () at ../system/main.c:37 > #16 0x00007ffff6e29590 in __libc_start_call_main () at /lib64/libc.so.6 > #17 0x00007ffff6e29640 in __libc_start_main_impl () at /lib64/libc.so.6 > #18 0x000055555588d4f5 in _start () Thanks for the report! Mattias, Would you have time to take a look? Thanks, -- Peter Xu
Thanks for the report, and my apologies for the breakage. On Fri, Sep 13, 2024 at 4:47 PM Peter Xu <peterx@redhat.com> wrote: > > On Fri, Sep 13, 2024 at 04:35:32PM +0200, Cédric Le Goater wrote: > > Hello, > > > > +Mark (for the Mac devices) > > > > On 9/9/24 22:11, Peter Xu wrote: > > > From: Mattias Nissler <mnissler@rivosinc.com> > > > > > > When DMA memory can't be directly accessed, as is the case when > > > running the device model in a separate process without shareable DMA > > > file descriptors, bounce buffering is used. > > > > > > It is not uncommon for device models to request mapping of several DMA > > > regions at the same time. Examples include: > > > * net devices, e.g. when transmitting a packet that is split across > > > several TX descriptors (observed with igb) > > > * USB host controllers, when handling a packet with multiple data TRBs > > > (observed with xhci) > > > > > > Previously, qemu only provided a single bounce buffer per AddressSpace > > > and would fail DMA map requests while the buffer was already in use. In > > > turn, this would cause DMA failures that ultimately manifest as hardware > > > errors from the guest perspective. > > > > > > This change allocates DMA bounce buffers dynamically instead of > > > supporting only a single buffer. Thus, multiple DMA mappings work > > > correctly also when RAM can't be mmap()-ed. > > > > > > The total bounce buffer allocation size is limited individually for each > > > AddressSpace. The default limit is 4096 bytes, matching the previous > > > maximum buffer size. A new x-max-bounce-buffer-size parameter is > > > provided to configure the limit for PCI devices. > > > > > > Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> > > > Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> > > > Acked-by: Peter Xu <peterx@redhat.com> > > > Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com > > > Signed-off-by: Peter Xu <peterx@redhat.com> > > > --- > > > include/exec/memory.h | 14 +++---- > > > include/hw/pci/pci_device.h | 3 ++ > > > hw/pci/pci.c | 8 ++++ > > > system/memory.c | 5 ++- > > > system/physmem.c | 82 ++++++++++++++++++++++++++----------- > > > 5 files changed, 76 insertions(+), 36 deletions(-) > > > > Here is a report of a segv of the ppc64 mac99+cpu970 machine booting debian. > > See the stack trace below. Just wanted to let you know. I will digging further > > next week. > > > > Thanks, > > > > C. > > > > > > > > Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. > > address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, > > as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 > > 3333 assert(bounce->magic == BOUNCE_BUFFER_MAGIC); > > (gdb) bt > > #0 address_space_unmap > > (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) > > at ../system/physmem.c:3333 > > #1 address_space_unmap > > (as=as@entry=0x5555565d45c0 <address_space_memory>, buffer=0x0, len=<optimized out>, is_write=<optimized out>, access_len=0) at ../system/physmem.c:3313 > > #2 0x000055555595ea48 in dma_memory_unmap > > (access_len=<optimized out>, dir=<optimized out>, len=<optimized out>, buffer=<optimized out>, as=<optimized out>) at /home/legoater/work/qemu/qemu.git/include/sysemu/dma.h:236 > > #3 pmac_ide_atapi_transfer_cb (opaque=0x555556c06470, ret=<optimized out>) at ../hw/ide/macio.c:122 > > #4 0x00005555559861f3 in DBDMA_run (s=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:546 > > #5 DBDMA_run_bh (opaque=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:556 > > #6 0x0000555555f19f33 in aio_bh_call (bh=bh@entry=0x555556ab5570) at ../util/async.c:171 > > #7 0x0000555555f1a0f5 in aio_bh_poll (ctx=ctx@entry=0x5555566af150) at ../util/async.c:218 > > #8 0x0000555555f0269e in aio_dispatch (ctx=0x5555566af150) at ../util/aio-posix.c:423 > > #9 0x0000555555f19d8e in aio_ctx_dispatch > > (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:360 > > #10 0x00007ffff7315f4f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 > > #11 0x0000555555f1b488 in glib_pollfds_poll () at ../util/main-loop.c:287 > > #12 os_host_main_loop_wait (timeout=2143429) at ../util/main-loop.c:310 > > #13 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:589 > > #14 0x0000555555abeba3 in qemu_main_loop () at ../system/runstate.c:826 > > #15 0x0000555555e63787 in qemu_default_main () at ../system/main.c:37 > > #16 0x00007ffff6e29590 in __libc_start_call_main () at /lib64/libc.so.6 > > #17 0x00007ffff6e29640 in __libc_start_main_impl () at /lib64/libc.so.6 > > #18 0x000055555588d4f5 in _start () > > Thanks for the report! > > Mattias, > > Would you have time to take a look? I noticed that the backtrace indicates address_space_unmap is called with buffer=0x0, len=0. This wasn't really correct before my concurrent bounce buffering change either, but it looks like the previous code would have tolerated this to a certain extent (at least no immediate crashes). Original code in question: if (is_write) { address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, as->bounce.buffer, access_len); } qemu_vfree(as->bounce.buffer); as->bounce.buffer = NULL; memory_region_unref(as->bounce.mr); /* Clear in_use before reading map_client_list. */ qatomic_set_mb(&as->bounce.in_use, false); address_space_notify_map_clients(as); address_space_write and qemu_vfree are safe to call with NULL/0 parameters. as->bounce.buffer = NULL would leak the buffer if one is allocated, and memory_region_unref(as->bounce.mr) is only OK if the bounce buffer hasn't been used before, otherwise we'd erroneously drop a memory region reference. We have two options here: Either we fix the caller to not call address_space_unmap with buffer=NULL. Or alternatively we make address_space_unmap NULL-safe by putting a check to return immediately when being passed a NULL buffer parameter. Looking at the code, the dma_memory_unmap calls in hw/ide/macio.c seem to be passing buffer=NULL unconditionally, since the dma_mem field in struct DBDMA_io is never set to anything non-zero. In fact, I believe after commit be1e343995ef81fc05d9a4e1ec263ca171d842e7 "macio: switch over to new byte-aligned DMA helpers", the dma_memory_unmap calls in hw/ide/macio.c aren't doing anything and should probably have been removed together with the dma_mem, dma_len and dir fields in struct DBDMA_io. Speculative patch: diff --git a/hw/ide/macio.c b/hw/ide/macio.c index e84bf2c9f6..15dd40138e 100644 --- a/hw/ide/macio.c +++ b/hw/ide/macio.c @@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret) return; done: - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, - io->dir, io->dma_len); - if (ret < 0) { block_acct_failed(blk_get_stats(s->blk), &s->acct); } else { @@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) return; done: - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, - io->dir, io->dma_len); - if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { if (ret < 0) { block_acct_failed(blk_get_stats(s->blk), &s->acct); diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h index 4a3f644516..c774f6bf84 100644 --- a/include/hw/ppc/mac_dbdma.h +++ b/include/hw/ppc/mac_dbdma.h @@ -44,10 +44,6 @@ struct DBDMA_io { DBDMA_end dma_end; /* DMA is in progress, don't start another one */ bool processing; - /* DMA request */ - void *dma_mem; - dma_addr_t dma_len; - DMADirection dir; }; /* Cédric, can you try with the above patch and/or share more details of your setup so I can verify (I tried booting a ppc64el-pseries dqib image but didn't see the issue)? Thanks, Mattias
On 9/16/24 10:23, Mattias Nissler wrote: > Thanks for the report, and my apologies for the breakage. > > On Fri, Sep 13, 2024 at 4:47 PM Peter Xu <peterx@redhat.com> wrote: >> >> On Fri, Sep 13, 2024 at 04:35:32PM +0200, Cédric Le Goater wrote: >>> Hello, >>> >>> +Mark (for the Mac devices) >>> >>> On 9/9/24 22:11, Peter Xu wrote: >>>> From: Mattias Nissler <mnissler@rivosinc.com> >>>> >>>> When DMA memory can't be directly accessed, as is the case when >>>> running the device model in a separate process without shareable DMA >>>> file descriptors, bounce buffering is used. >>>> >>>> It is not uncommon for device models to request mapping of several DMA >>>> regions at the same time. Examples include: >>>> * net devices, e.g. when transmitting a packet that is split across >>>> several TX descriptors (observed with igb) >>>> * USB host controllers, when handling a packet with multiple data TRBs >>>> (observed with xhci) >>>> >>>> Previously, qemu only provided a single bounce buffer per AddressSpace >>>> and would fail DMA map requests while the buffer was already in use. In >>>> turn, this would cause DMA failures that ultimately manifest as hardware >>>> errors from the guest perspective. >>>> >>>> This change allocates DMA bounce buffers dynamically instead of >>>> supporting only a single buffer. Thus, multiple DMA mappings work >>>> correctly also when RAM can't be mmap()-ed. >>>> >>>> The total bounce buffer allocation size is limited individually for each >>>> AddressSpace. The default limit is 4096 bytes, matching the previous >>>> maximum buffer size. A new x-max-bounce-buffer-size parameter is >>>> provided to configure the limit for PCI devices. >>>> >>>> Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> >>>> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> >>>> Acked-by: Peter Xu <peterx@redhat.com> >>>> Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com >>>> Signed-off-by: Peter Xu <peterx@redhat.com> >>>> --- >>>> include/exec/memory.h | 14 +++---- >>>> include/hw/pci/pci_device.h | 3 ++ >>>> hw/pci/pci.c | 8 ++++ >>>> system/memory.c | 5 ++- >>>> system/physmem.c | 82 ++++++++++++++++++++++++++----------- >>>> 5 files changed, 76 insertions(+), 36 deletions(-) >>> >>> Here is a report of a segv of the ppc64 mac99+cpu970 machine booting debian. >>> See the stack trace below. Just wanted to let you know. I will digging further >>> next week. >>> >>> Thanks, >>> >>> C. >>> >>> >>> >>> Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. >>> address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, >>> as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 >>> 3333 assert(bounce->magic == BOUNCE_BUFFER_MAGIC); >>> (gdb) bt >>> #0 address_space_unmap >>> (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) >>> at ../system/physmem.c:3333 >>> #1 address_space_unmap >>> (as=as@entry=0x5555565d45c0 <address_space_memory>, buffer=0x0, len=<optimized out>, is_write=<optimized out>, access_len=0) at ../system/physmem.c:3313 >>> #2 0x000055555595ea48 in dma_memory_unmap >>> (access_len=<optimized out>, dir=<optimized out>, len=<optimized out>, buffer=<optimized out>, as=<optimized out>) at /home/legoater/work/qemu/qemu.git/include/sysemu/dma.h:236 >>> #3 pmac_ide_atapi_transfer_cb (opaque=0x555556c06470, ret=<optimized out>) at ../hw/ide/macio.c:122 >>> #4 0x00005555559861f3 in DBDMA_run (s=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:546 >>> #5 DBDMA_run_bh (opaque=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:556 >>> #6 0x0000555555f19f33 in aio_bh_call (bh=bh@entry=0x555556ab5570) at ../util/async.c:171 >>> #7 0x0000555555f1a0f5 in aio_bh_poll (ctx=ctx@entry=0x5555566af150) at ../util/async.c:218 >>> #8 0x0000555555f0269e in aio_dispatch (ctx=0x5555566af150) at ../util/aio-posix.c:423 >>> #9 0x0000555555f19d8e in aio_ctx_dispatch >>> (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:360 >>> #10 0x00007ffff7315f4f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 >>> #11 0x0000555555f1b488 in glib_pollfds_poll () at ../util/main-loop.c:287 >>> #12 os_host_main_loop_wait (timeout=2143429) at ../util/main-loop.c:310 >>> #13 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:589 >>> #14 0x0000555555abeba3 in qemu_main_loop () at ../system/runstate.c:826 >>> #15 0x0000555555e63787 in qemu_default_main () at ../system/main.c:37 >>> #16 0x00007ffff6e29590 in __libc_start_call_main () at /lib64/libc.so.6 >>> #17 0x00007ffff6e29640 in __libc_start_main_impl () at /lib64/libc.so.6 >>> #18 0x000055555588d4f5 in _start () >> >> Thanks for the report! >> >> Mattias, >> >> Would you have time to take a look? > > I noticed that the backtrace indicates address_space_unmap is called > with buffer=0x0, len=0. This wasn't really correct before my > concurrent bounce buffering change either, but it looks like the > previous code would have tolerated this to a certain extent (at least > no immediate crashes). Original code in question: > > if (is_write) { > address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, > as->bounce.buffer, access_len); > } > qemu_vfree(as->bounce.buffer); > as->bounce.buffer = NULL; > memory_region_unref(as->bounce.mr); > /* Clear in_use before reading map_client_list. */ > qatomic_set_mb(&as->bounce.in_use, false); > address_space_notify_map_clients(as); > > address_space_write and qemu_vfree are safe to call with NULL/0 > parameters. as->bounce.buffer = NULL would leak the buffer if one is > allocated, and memory_region_unref(as->bounce.mr) is only OK if the > bounce buffer hasn't been used before, otherwise we'd erroneously drop > a memory region reference. > > We have two options here: Either we fix the caller to not call > address_space_unmap with buffer=NULL. Or alternatively we make > address_space_unmap NULL-safe by putting a check to return immediately > when being passed a NULL buffer parameter. > > Looking at the code, the dma_memory_unmap calls in hw/ide/macio.c seem > to be passing buffer=NULL unconditionally, since the dma_mem field in > struct DBDMA_io is never set to anything non-zero. In fact, I believe > after commit be1e343995ef81fc05d9a4e1ec263ca171d842e7 "macio: switch > over to new byte-aligned DMA helpers", the dma_memory_unmap calls in > hw/ide/macio.c aren't doing anything and should probably have been > removed together with the dma_mem, dma_len and dir fields in struct > DBDMA_io. Speculative patch: > > diff --git a/hw/ide/macio.c b/hw/ide/macio.c > index e84bf2c9f6..15dd40138e 100644 > --- a/hw/ide/macio.c > +++ b/hw/ide/macio.c > @@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void > *opaque, int ret) > return; > > done: > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > - io->dir, io->dma_len); > - > if (ret < 0) { > block_acct_failed(blk_get_stats(s->blk), &s->acct); > } else { > @@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) > return; > > done: > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > - io->dir, io->dma_len); > - > if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { > if (ret < 0) { > block_acct_failed(blk_get_stats(s->blk), &s->acct); > diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h > index 4a3f644516..c774f6bf84 100644 > --- a/include/hw/ppc/mac_dbdma.h > +++ b/include/hw/ppc/mac_dbdma.h > @@ -44,10 +44,6 @@ struct DBDMA_io { > DBDMA_end dma_end; > /* DMA is in progress, don't start another one */ > bool processing; > - /* DMA request */ > - void *dma_mem; > - dma_addr_t dma_len; > - DMADirection dir; > }; > > /* > > Cédric, can you try with the above patch and/or crash seems gone. > share more details of your setup so I can verify You will need a Linnux powerpc or powerpc64 image for mac machines, which are not common now days, or MacOS images. My debian images are big. I will try to build you a small one for more tests. > (I tried booting a ppc64el-pseries dqib > image but didn't see the issue)? pseriers is a very different type of machine, the equivalent of the virt machine on ARM and RISCV. The HW is completely different. Thanks, C.
Mattias, > Cédric, can you try with the above patch and/or > > crash seems gone. > >> share more details of your setup so I can verify > > You will need a Linnux powerpc or powerpc64 image for mac machines, > which are not common now days, or MacOS images. My debian images > are big. I will try to build you a small one for more tests. Grab : https://cdimage.debian.org/cdimage/ports/10.0/powerpc/iso-cd/debian-10.0-powerpc-NETINST-1.iso and run : qemu-system-ppc -M mac99 -cpu g4 -cdrom debian-10.0.0-powerpc-NETINST-1.iso -nographic -boot d Thanks, C.
Thanks Cédric, I can reproduce now, and my proposed patch fixes avoids the crash as expected. On Mon, Sep 16, 2024 at 2:28 PM Cédric Le Goater <clg@kaod.org> wrote: > > Mattias, > > > > Cédric, can you try with the above patch and/or > > > > crash seems gone. > > > >> share more details of your setup so I can verify > > > > You will need a Linnux powerpc or powerpc64 image for mac machines, > > which are not common now days, or MacOS images. My debian images > > are big. I will try to build you a small one for more tests. > > Grab : > > https://cdimage.debian.org/cdimage/ports/10.0/powerpc/iso-cd/debian-10.0-powerpc-NETINST-1.iso > > and run : > > qemu-system-ppc -M mac99 -cpu g4 -cdrom debian-10.0.0-powerpc-NETINST-1.iso -nographic -boot d > > Thanks, > > C. > >
On 9/16/24 14:41, Mattias Nissler wrote: > Thanks Cédric, I can reproduce now, and my proposed patch fixes avoids > the crash as expected. disk images for macos9 and macosx10 all boot. C.
On Mon, Sep 16, 2024 at 3:06 PM Cédric Le Goater <clg@kaod.org> wrote: > > On 9/16/24 14:41, Mattias Nissler wrote: > > Thanks Cédric, I can reproduce now, and my proposed patch fixes avoids > > the crash as expected. > disk images for macos9 and macosx10 all boot. Thanks for testing, happy to hear! I will go ahead and send the change proposed earlier as a patch to the list then. > > C. > > >
On 16/09/2024 09:23, Mattias Nissler wrote: > Thanks for the report, and my apologies for the breakage. > > On Fri, Sep 13, 2024 at 4:47 PM Peter Xu <peterx@redhat.com> wrote: >> >> On Fri, Sep 13, 2024 at 04:35:32PM +0200, Cédric Le Goater wrote: >>> Hello, >>> >>> +Mark (for the Mac devices) >>> >>> On 9/9/24 22:11, Peter Xu wrote: >>>> From: Mattias Nissler <mnissler@rivosinc.com> >>>> >>>> When DMA memory can't be directly accessed, as is the case when >>>> running the device model in a separate process without shareable DMA >>>> file descriptors, bounce buffering is used. >>>> >>>> It is not uncommon for device models to request mapping of several DMA >>>> regions at the same time. Examples include: >>>> * net devices, e.g. when transmitting a packet that is split across >>>> several TX descriptors (observed with igb) >>>> * USB host controllers, when handling a packet with multiple data TRBs >>>> (observed with xhci) >>>> >>>> Previously, qemu only provided a single bounce buffer per AddressSpace >>>> and would fail DMA map requests while the buffer was already in use. In >>>> turn, this would cause DMA failures that ultimately manifest as hardware >>>> errors from the guest perspective. >>>> >>>> This change allocates DMA bounce buffers dynamically instead of >>>> supporting only a single buffer. Thus, multiple DMA mappings work >>>> correctly also when RAM can't be mmap()-ed. >>>> >>>> The total bounce buffer allocation size is limited individually for each >>>> AddressSpace. The default limit is 4096 bytes, matching the previous >>>> maximum buffer size. A new x-max-bounce-buffer-size parameter is >>>> provided to configure the limit for PCI devices. >>>> >>>> Signed-off-by: Mattias Nissler <mnissler@rivosinc.com> >>>> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> >>>> Acked-by: Peter Xu <peterx@redhat.com> >>>> Link: https://lore.kernel.org/r/20240819135455.2957406-1-mnissler@rivosinc.com >>>> Signed-off-by: Peter Xu <peterx@redhat.com> >>>> --- >>>> include/exec/memory.h | 14 +++---- >>>> include/hw/pci/pci_device.h | 3 ++ >>>> hw/pci/pci.c | 8 ++++ >>>> system/memory.c | 5 ++- >>>> system/physmem.c | 82 ++++++++++++++++++++++++++----------- >>>> 5 files changed, 76 insertions(+), 36 deletions(-) >>> >>> Here is a report of a segv of the ppc64 mac99+cpu970 machine booting debian. >>> See the stack trace below. Just wanted to let you know. I will digging further >>> next week. >>> >>> Thanks, >>> >>> C. >>> >>> >>> >>> Thread 1 "qemu-system-ppc" received signal SIGSEGV, Segmentation fault. >>> address_space_unmap (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, >>> as=0x5555565d45c0 <address_space_memory>) at ../system/physmem.c:3333 >>> 3333 assert(bounce->magic == BOUNCE_BUFFER_MAGIC); >>> (gdb) bt >>> #0 address_space_unmap >>> (len=<optimized out>, access_len=0, is_write=false, buffer=0x0, as=0x5555565d45c0 <address_space_memory>) >>> at ../system/physmem.c:3333 >>> #1 address_space_unmap >>> (as=as@entry=0x5555565d45c0 <address_space_memory>, buffer=0x0, len=<optimized out>, is_write=<optimized out>, access_len=0) at ../system/physmem.c:3313 >>> #2 0x000055555595ea48 in dma_memory_unmap >>> (access_len=<optimized out>, dir=<optimized out>, len=<optimized out>, buffer=<optimized out>, as=<optimized out>) at /home/legoater/work/qemu/qemu.git/include/sysemu/dma.h:236 >>> #3 pmac_ide_atapi_transfer_cb (opaque=0x555556c06470, ret=<optimized out>) at ../hw/ide/macio.c:122 >>> #4 0x00005555559861f3 in DBDMA_run (s=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:546 >>> #5 DBDMA_run_bh (opaque=0x555556c04c60) at ../hw/misc/macio/mac_dbdma.c:556 >>> #6 0x0000555555f19f33 in aio_bh_call (bh=bh@entry=0x555556ab5570) at ../util/async.c:171 >>> #7 0x0000555555f1a0f5 in aio_bh_poll (ctx=ctx@entry=0x5555566af150) at ../util/async.c:218 >>> #8 0x0000555555f0269e in aio_dispatch (ctx=0x5555566af150) at ../util/aio-posix.c:423 >>> #9 0x0000555555f19d8e in aio_ctx_dispatch >>> (source=<optimized out>, callback=<optimized out>, user_data=<optimized out>) at ../util/async.c:360 >>> #10 0x00007ffff7315f4f in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 >>> #11 0x0000555555f1b488 in glib_pollfds_poll () at ../util/main-loop.c:287 >>> #12 os_host_main_loop_wait (timeout=2143429) at ../util/main-loop.c:310 >>> #13 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:589 >>> #14 0x0000555555abeba3 in qemu_main_loop () at ../system/runstate.c:826 >>> #15 0x0000555555e63787 in qemu_default_main () at ../system/main.c:37 >>> #16 0x00007ffff6e29590 in __libc_start_call_main () at /lib64/libc.so.6 >>> #17 0x00007ffff6e29640 in __libc_start_main_impl () at /lib64/libc.so.6 >>> #18 0x000055555588d4f5 in _start () >> >> Thanks for the report! >> >> Mattias, >> >> Would you have time to take a look? > > I noticed that the backtrace indicates address_space_unmap is called > with buffer=0x0, len=0. This wasn't really correct before my > concurrent bounce buffering change either, but it looks like the > previous code would have tolerated this to a certain extent (at least > no immediate crashes). Original code in question: > > if (is_write) { > address_space_write(as, as->bounce.addr, MEMTXATTRS_UNSPECIFIED, > as->bounce.buffer, access_len); > } > qemu_vfree(as->bounce.buffer); > as->bounce.buffer = NULL; > memory_region_unref(as->bounce.mr); > /* Clear in_use before reading map_client_list. */ > qatomic_set_mb(&as->bounce.in_use, false); > address_space_notify_map_clients(as); > > address_space_write and qemu_vfree are safe to call with NULL/0 > parameters. as->bounce.buffer = NULL would leak the buffer if one is > allocated, and memory_region_unref(as->bounce.mr) is only OK if the > bounce buffer hasn't been used before, otherwise we'd erroneously drop > a memory region reference. > > We have two options here: Either we fix the caller to not call > address_space_unmap with buffer=NULL. Or alternatively we make > address_space_unmap NULL-safe by putting a check to return immediately > when being passed a NULL buffer parameter. > > Looking at the code, the dma_memory_unmap calls in hw/ide/macio.c seem > to be passing buffer=NULL unconditionally, since the dma_mem field in > struct DBDMA_io is never set to anything non-zero. In fact, I believe > after commit be1e343995ef81fc05d9a4e1ec263ca171d842e7 "macio: switch > over to new byte-aligned DMA helpers", the dma_memory_unmap calls in > hw/ide/macio.c aren't doing anything and should probably have been > removed together with the dma_mem, dma_len and dir fields in struct > DBDMA_io. Speculative patch: > > diff --git a/hw/ide/macio.c b/hw/ide/macio.c > index e84bf2c9f6..15dd40138e 100644 > --- a/hw/ide/macio.c > +++ b/hw/ide/macio.c > @@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void > *opaque, int ret) > return; > > done: > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > - io->dir, io->dma_len); > - > if (ret < 0) { > block_acct_failed(blk_get_stats(s->blk), &s->acct); > } else { > @@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) > return; > > done: > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > - io->dir, io->dma_len); > - > if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { > if (ret < 0) { > block_acct_failed(blk_get_stats(s->blk), &s->acct); > diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h > index 4a3f644516..c774f6bf84 100644 > --- a/include/hw/ppc/mac_dbdma.h > +++ b/include/hw/ppc/mac_dbdma.h > @@ -44,10 +44,6 @@ struct DBDMA_io { > DBDMA_end dma_end; > /* DMA is in progress, don't start another one */ > bool processing; > - /* DMA request */ > - void *dma_mem; > - dma_addr_t dma_len; > - DMADirection dir; > }; > > /* > > Cédric, can you try with the above patch and/or share more details of > your setup so I can verify (I tried booting a ppc64el-pseries dqib > image but didn't see the issue)? I'm fairly sure that this patch would break MacOS 9 which was the reason that dma_memory_unmap() was added here in the first place: what I was finding was that without the dma_memory_unmap() the destination RAM wasn't being invalidated (or marked dirty), causing random crashes during boot. Would the issue be solved by adding a corresponding dma_memory_map() beforehand at the relevant places in hw/ide/macio.c? If that's required as part of the setup for bounce buffers then I can see how not having this present could cause problems. ATB, Mark.
On Mon, 16 Sept 2024 at 12:29, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> wrote: > > On 16/09/2024 09:23, Mattias Nissler wrote: > > Looking at the code, the dma_memory_unmap calls in hw/ide/macio.c seem > > to be passing buffer=NULL unconditionally, since the dma_mem field in > > struct DBDMA_io is never set to anything non-zero. In fact, I believe > > after commit be1e343995ef81fc05d9a4e1ec263ca171d842e7 "macio: switch > > over to new byte-aligned DMA helpers", the dma_memory_unmap calls in > > hw/ide/macio.c aren't doing anything and should probably have been > > removed together with the dma_mem, dma_len and dir fields in struct > > DBDMA_io. Speculative patch: > > > > diff --git a/hw/ide/macio.c b/hw/ide/macio.c > > index e84bf2c9f6..15dd40138e 100644 > > --- a/hw/ide/macio.c > > +++ b/hw/ide/macio.c > > @@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void > > *opaque, int ret) > > return; > > > > done: > > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > > - io->dir, io->dma_len); > > - > > if (ret < 0) { > > block_acct_failed(blk_get_stats(s->blk), &s->acct); > > } else { > > @@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) > > return; > > > > done: > > - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, > > - io->dir, io->dma_len); > > - > > if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { > > if (ret < 0) { > > block_acct_failed(blk_get_stats(s->blk), &s->acct); > > diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h > > index 4a3f644516..c774f6bf84 100644 > > --- a/include/hw/ppc/mac_dbdma.h > > +++ b/include/hw/ppc/mac_dbdma.h > > @@ -44,10 +44,6 @@ struct DBDMA_io { > > DBDMA_end dma_end; > > /* DMA is in progress, don't start another one */ > > bool processing; > > - /* DMA request */ > > - void *dma_mem; > > - dma_addr_t dma_len; > > - DMADirection dir; > > }; > > > > /* > > > > Cédric, can you try with the above patch and/or share more details of > > your setup so I can verify (I tried booting a ppc64el-pseries dqib > > image but didn't see the issue)? > > I'm fairly sure that this patch would break MacOS 9 which was the reason that > dma_memory_unmap() was added here in the first place: what I was finding was that > without the dma_memory_unmap() the destination RAM wasn't being invalidated (or > marked dirty), causing random crashes during boot. dma_memory_unmap() of something you never mapped is definitely wrong. Whatever is going on here, leaving the unmap call in after you removed the dma_memory_map() call is just papering over the actual cause of the crashes. > Would the issue be solved by adding a corresponding dma_memory_map() beforehand at > the relevant places in hw/ide/macio.c? If that's required as part of the setup for > bounce buffers then I can see how not having this present could cause problems. The only purpose of this API is sequences of: host_ptr = dma_memory_map(...); access the host_ptr directly; dma_memory_unmap(...); The bounce-buffer stuff is an internal implementation detail of making this API work when the DMA is going to a device. We need to find whatever the actual cause of the macos failure is. Mattias' suggested change looks right to me. I do wonder if something needs the memory barrier than unmap does as part of its operation, e.g. in the implementation of the dma_blk_* functions. -- PMM
On 16/09/2024 12:44, Peter Maydell wrote: > On Mon, 16 Sept 2024 at 12:29, Mark Cave-Ayland > <mark.cave-ayland@ilande.co.uk> wrote: >> >> On 16/09/2024 09:23, Mattias Nissler wrote: >>> Looking at the code, the dma_memory_unmap calls in hw/ide/macio.c seem >>> to be passing buffer=NULL unconditionally, since the dma_mem field in >>> struct DBDMA_io is never set to anything non-zero. In fact, I believe >>> after commit be1e343995ef81fc05d9a4e1ec263ca171d842e7 "macio: switch >>> over to new byte-aligned DMA helpers", the dma_memory_unmap calls in >>> hw/ide/macio.c aren't doing anything and should probably have been >>> removed together with the dma_mem, dma_len and dir fields in struct >>> DBDMA_io. Speculative patch: >>> >>> diff --git a/hw/ide/macio.c b/hw/ide/macio.c >>> index e84bf2c9f6..15dd40138e 100644 >>> --- a/hw/ide/macio.c >>> +++ b/hw/ide/macio.c >>> @@ -119,9 +119,6 @@ static void pmac_ide_atapi_transfer_cb(void >>> *opaque, int ret) >>> return; >>> >>> done: >>> - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, >>> - io->dir, io->dma_len); >>> - >>> if (ret < 0) { >>> block_acct_failed(blk_get_stats(s->blk), &s->acct); >>> } else { >>> @@ -202,9 +199,6 @@ static void pmac_ide_transfer_cb(void *opaque, int ret) >>> return; >>> >>> done: >>> - dma_memory_unmap(&address_space_memory, io->dma_mem, io->dma_len, >>> - io->dir, io->dma_len); >>> - >>> if (s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) { >>> if (ret < 0) { >>> block_acct_failed(blk_get_stats(s->blk), &s->acct); >>> diff --git a/include/hw/ppc/mac_dbdma.h b/include/hw/ppc/mac_dbdma.h >>> index 4a3f644516..c774f6bf84 100644 >>> --- a/include/hw/ppc/mac_dbdma.h >>> +++ b/include/hw/ppc/mac_dbdma.h >>> @@ -44,10 +44,6 @@ struct DBDMA_io { >>> DBDMA_end dma_end; >>> /* DMA is in progress, don't start another one */ >>> bool processing; >>> - /* DMA request */ >>> - void *dma_mem; >>> - dma_addr_t dma_len; >>> - DMADirection dir; >>> }; >>> >>> /* >>> >>> Cédric, can you try with the above patch and/or share more details of >>> your setup so I can verify (I tried booting a ppc64el-pseries dqib >>> image but didn't see the issue)? >> >> I'm fairly sure that this patch would break MacOS 9 which was the reason that >> dma_memory_unmap() was added here in the first place: what I was finding was that >> without the dma_memory_unmap() the destination RAM wasn't being invalidated (or >> marked dirty), causing random crashes during boot. > > dma_memory_unmap() of something you never mapped is > definitely wrong. Whatever is going on here, leaving the unmap > call in after you removed the dma_memory_map() call is just > papering over the actual cause of the crashes. > >> Would the issue be solved by adding a corresponding dma_memory_map() beforehand at >> the relevant places in hw/ide/macio.c? If that's required as part of the setup for >> bounce buffers then I can see how not having this present could cause problems. > > The only purpose of this API is sequences of: > host_ptr = dma_memory_map(...); > access the host_ptr directly; > dma_memory_unmap(...); > > The bounce-buffer stuff is an internal implementation detail > of making this API work when the DMA is going to a device. > > We need to find whatever the actual cause of the macos failure is. > Mattias' suggested change looks right to me. > > I do wonder if something needs the memory barrier than > unmap does as part of its operation, e.g. in the > implementation of the dma_blk_* functions. It has been a few years now, but I'm fairly sure the issue was that dma_blk_read() didn't mark RAM containing code as dirty/invalid, and since MacOS 9 used overlays then it would crash randomly trying to execute stale memory. dma_memory_unmap() checks to see if the direction was to RAM, and then marks the memory dirty allowing the new code to get picked up after a MMU fault. If the memory barriers are already in place for the dma_blk_*() functions then the analysis could be correct, in which case the bug is a misunderstanding I made in be1e343995 ("macio: switch over to new byte-aligned DMA helpers") back in 2016. ATB, Mark.
On Mon, 16 Sept 2024 at 13:14, Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk> wrote: > > On 16/09/2024 12:44, Peter Maydell wrote: > > > On Mon, 16 Sept 2024 at 12:29, Mark Cave-Ayland > > <mark.cave-ayland@ilande.co.uk> wrote: > >> I'm fairly sure that this patch would break MacOS 9 which was the reason that > >> dma_memory_unmap() was added here in the first place: what I was finding was that > >> without the dma_memory_unmap() the destination RAM wasn't being invalidated (or > >> marked dirty), causing random crashes during boot. > > > > dma_memory_unmap() of something you never mapped is > > definitely wrong. Whatever is going on here, leaving the unmap > > call in after you removed the dma_memory_map() call is just > > papering over the actual cause of the crashes. > > > >> Would the issue be solved by adding a corresponding dma_memory_map() beforehand at > >> the relevant places in hw/ide/macio.c? If that's required as part of the setup for > >> bounce buffers then I can see how not having this present could cause problems. > > > > The only purpose of this API is sequences of: > > host_ptr = dma_memory_map(...); > > access the host_ptr directly; > > dma_memory_unmap(...); > > > > The bounce-buffer stuff is an internal implementation detail > > of making this API work when the DMA is going to a device. > > > > We need to find whatever the actual cause of the macos failure is. > > Mattias' suggested change looks right to me. > > > > I do wonder if something needs the memory barrier than > > unmap does as part of its operation, e.g. in the > > implementation of the dma_blk_* functions. > > It has been a few years now, but I'm fairly sure the issue was that dma_blk_read() > didn't mark RAM containing code as dirty/invalid, and since MacOS 9 used overlays > then it would crash randomly trying to execute stale memory. dma_memory_unmap() > checks to see if the direction was to RAM, and then marks the memory dirty allowing > the new code to get picked up after a MMU fault. dma_blk_io() does its writes into guest memory by doing a dma_memory_map()/write-to-host-pointer/dma_memory_unmap() sequence, though (this is done in dma_blk_cb()). More generally there should be *no* path for doing writes to guest memory that does not handle the dirty-memory case: so if there is one we need to find and fix it. thanks -- PMM
On Mon, Sep 16, 2024 at 2:28 PM Peter Maydell <peter.maydell@linaro.org> wrote: > > On Mon, 16 Sept 2024 at 13:14, Mark Cave-Ayland > <mark.cave-ayland@ilande.co.uk> wrote: > > > > On 16/09/2024 12:44, Peter Maydell wrote: > > > > > On Mon, 16 Sept 2024 at 12:29, Mark Cave-Ayland > > > <mark.cave-ayland@ilande.co.uk> wrote: > > >> I'm fairly sure that this patch would break MacOS 9 which was the reason that > > >> dma_memory_unmap() was added here in the first place: what I was finding was that > > >> without the dma_memory_unmap() the destination RAM wasn't being invalidated (or > > >> marked dirty), causing random crashes during boot. > > > > > > dma_memory_unmap() of something you never mapped is > > > definitely wrong. Whatever is going on here, leaving the unmap > > > call in after you removed the dma_memory_map() call is just > > > papering over the actual cause of the crashes. > > > > > >> Would the issue be solved by adding a corresponding dma_memory_map() beforehand at > > >> the relevant places in hw/ide/macio.c? If that's required as part of the setup for > > >> bounce buffers then I can see how not having this present could cause problems. > > > > > > The only purpose of this API is sequences of: > > > host_ptr = dma_memory_map(...); > > > access the host_ptr directly; > > > dma_memory_unmap(...); > > > > > > The bounce-buffer stuff is an internal implementation detail > > > of making this API work when the DMA is going to a device. > > > > > > We need to find whatever the actual cause of the macos failure is. > > > Mattias' suggested change looks right to me. > > > > > > I do wonder if something needs the memory barrier than > > > unmap does as part of its operation, e.g. in the > > > implementation of the dma_blk_* functions. > > > > It has been a few years now, but I'm fairly sure the issue was that dma_blk_read() > > didn't mark RAM containing code as dirty/invalid, and since MacOS 9 used overlays > > then it would crash randomly trying to execute stale memory. dma_memory_unmap() > > checks to see if the direction was to RAM, and then marks the memory dirty allowing > > the new code to get picked up after a MMU fault. > > dma_blk_io() does its writes into guest memory by doing > a dma_memory_map()/write-to-host-pointer/dma_memory_unmap() > sequence, though (this is done in dma_blk_cb()). > > More generally there should be *no* path for doing writes to > guest memory that does not handle the dirty-memory case: > so if there is one we need to find and fix it. I concur that it should be the responsibility of the code performing the DMA write to make sure any invalidation side effects take place rather than relying on ad-hoc calls taking place later. Regardless, in the interest of reaching a conclusion here: Mark, can you provide instructions on how to verify MacOS 9 or alternatively kindly do a quick test? Thanks, Mattias
© 2016 - 2024 Red Hat, Inc.