For testing, it can be useful to simulate an enormous amount of memory
(e.g. 2^64 RAM). This adds an MMIO device that acts as sparse memory.
When something writes a nonzero value to a sparse-mem address, we
allocate a block of memory. This block is kept around, until all of the
bytes within the block are zero-ed. The device has a very low priority
(so it can be mapped beneath actual RAM, and virtual device MMIO
regions).
Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
---
MAINTAINERS | 1 +
hw/mem/meson.build | 1 +
hw/mem/sparse-mem.c | 154 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+)
create mode 100644 hw/mem/sparse-mem.c
diff --git a/MAINTAINERS b/MAINTAINERS
index f22d83c178..9e3d8b1401 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2618,6 +2618,7 @@ R: Thomas Huth <thuth@redhat.com>
S: Maintained
F: tests/qtest/fuzz/
F: scripts/oss-fuzz/
+F: hw/mem/sparse-mem.c
F: docs/devel/fuzzing.rst
Register API
diff --git a/hw/mem/meson.build b/hw/mem/meson.build
index 0d22f2b572..732f459e0a 100644
--- a/hw/mem/meson.build
+++ b/hw/mem/meson.build
@@ -1,5 +1,6 @@
mem_ss = ss.source_set()
mem_ss.add(files('memory-device.c'))
+mem_ss.add(files('sparse-mem.c'))
mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
new file mode 100644
index 0000000000..ffda6f76b4
--- /dev/null
+++ b/hw/mem/sparse-mem.c
@@ -0,0 +1,154 @@
+/*
+ * A sparse memory device
+ *
+ * Copyright Red Hat Inc., 2021
+ *
+ * Authors:
+ * Alexander Bulekov <alxndr@bu.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "exec/address-spaces.h"
+#include "hw/qdev-properties.h"
+
+#define TYPE_SPARSE_MEM "sparse-mem"
+#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM)
+
+#define SPARSE_BLOCK_SIZE 0x1000
+
+typedef struct SparseMemState {
+ DeviceState parent_obj;
+ MemoryRegion mmio;
+ uint64_t baseaddr;
+ uint64_t length;
+ uint64_t usage;
+ uint64_t maxsize;
+ GHashTable *mapped;
+} SparseMemState;
+
+typedef struct sparse_mem_block {
+ uint16_t nonzeros;
+ uint8_t data[SPARSE_BLOCK_SIZE];
+} sparse_mem_block;
+
+static uint64_t sparse_mem_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ SparseMemState *s = opaque;
+ uint64_t ret = 0;
+ size_t pfn = addr / SPARSE_BLOCK_SIZE;
+ size_t offset = addr % SPARSE_BLOCK_SIZE;
+ sparse_mem_block *block;
+
+ block = g_hash_table_lookup(s->mapped, (void *)pfn);
+ if (block) {
+ assert(offset + size <= sizeof(block->data));
+ memcpy(&ret, block->data + offset, size);
+ }
+ return ret;
+}
+
+static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v,
+ unsigned int size)
+{
+ SparseMemState *s = opaque;
+ size_t pfn = addr / SPARSE_BLOCK_SIZE;
+ size_t offset = addr % SPARSE_BLOCK_SIZE;
+ int nonzeros = 0;
+ sparse_mem_block *block;
+
+ if (!g_hash_table_lookup(s->mapped, (void *)pfn) &&
+ s->usage + SPARSE_BLOCK_SIZE < s->maxsize && v) {
+ g_hash_table_insert(s->mapped, (void *)pfn,
+ g_new0(sparse_mem_block, 1));
+ s->usage += sizeof(block->data);
+ }
+ block = g_hash_table_lookup(s->mapped, (void *)pfn);
+ if (!block) {
+ return;
+ }
+
+ assert(offset + size <= sizeof(block->data));
+
+ /*
+ * Track the number of nonzeros, so we can adjust the block's nonzero count
+ * after writing the value v
+ */
+ for (int i = 0; i < size; i++) {
+ nonzeros -= (block->data[offset + i] != 0);
+ }
+
+ memcpy(block->data + offset, &v, size);
+
+ for (int i = 0; i < size; i++) {
+ nonzeros += (block->data[offset + i] != 0);
+ }
+
+ /* Update the number of nonzeros in the block, free it, if it's empty */
+ assert(block->nonzeros + nonzeros < sizeof(block->data));
+ assert((int)block->nonzeros + nonzeros >= 0);
+ block->nonzeros += nonzeros;
+
+ if (block->nonzeros == 0) {
+ g_free(block);
+ g_hash_table_remove(s->mapped, (void *)pfn);
+ s->usage -= sizeof(block->data);
+ }
+}
+
+static const MemoryRegionOps sparse_mem_ops = {
+ .read = sparse_mem_read,
+ .write = sparse_mem_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 8,
+ .unaligned = false,
+ },
+};
+
+static Property sparse_mem_properties[] = {
+ /* The base address of the memory */
+ DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0),
+ /* The length of the sparse memory region */
+ DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX),
+ /* Max amount of actual memory that can be used to back the sparse memory */
+ DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 0x100000),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void sparse_mem_realize(DeviceState *dev, Error **errp)
+{
+ SparseMemState *s = SPARSE_MEM(dev);
+
+ assert(s->baseaddr + s->length > s->baseaddr);
+
+ s->mapped = g_hash_table_new(NULL, NULL);
+ memory_region_init_io(&(s->mmio), OBJECT(s), &sparse_mem_ops, s,
+ "sparse-mem", s->length);
+ memory_region_add_subregion_overlap(get_system_memory(), s->baseaddr,
+ &(s->mmio), -100);
+}
+
+static void sparse_mem_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, sparse_mem_properties);
+
+ dc->desc = "Sparse Memory Device";
+ dc->realize = sparse_mem_realize;
+}
+
+static const TypeInfo sparse_mem_types[] = {
+ {
+ .name = TYPE_SPARSE_MEM,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SparseMemState),
+ .class_init = sparse_mem_class_init,
+ },
+};
+DEFINE_TYPES(sparse_mem_types);
--
2.28.0
On 3/11/21 6:36 AM, Alexander Bulekov wrote:
> For testing, it can be useful to simulate an enormous amount of memory
> (e.g. 2^64 RAM). This adds an MMIO device that acts as sparse memory.
> When something writes a nonzero value to a sparse-mem address, we
> allocate a block of memory. This block is kept around, until all of the
> bytes within the block are zero-ed. The device has a very low priority
> (so it can be mapped beneath actual RAM, and virtual device MMIO
> regions).
I'm not convinced we need this, but still added some comments while
reviewing.
>
> Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> ---
> MAINTAINERS | 1 +
> hw/mem/meson.build | 1 +
> hw/mem/sparse-mem.c | 154 ++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 156 insertions(+)
> create mode 100644 hw/mem/sparse-mem.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f22d83c178..9e3d8b1401 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2618,6 +2618,7 @@ R: Thomas Huth <thuth@redhat.com>
> S: Maintained
> F: tests/qtest/fuzz/
> F: scripts/oss-fuzz/
> +F: hw/mem/sparse-mem.c
> F: docs/devel/fuzzing.rst
>
> Register API
> diff --git a/hw/mem/meson.build b/hw/mem/meson.build
> index 0d22f2b572..732f459e0a 100644
> --- a/hw/mem/meson.build
> +++ b/hw/mem/meson.build
> @@ -1,5 +1,6 @@
> mem_ss = ss.source_set()
> mem_ss.add(files('memory-device.c'))
> +mem_ss.add(files('sparse-mem.c'))
> mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
> mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
> mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
> diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
> new file mode 100644
> index 0000000000..ffda6f76b4
> --- /dev/null
> +++ b/hw/mem/sparse-mem.c
> @@ -0,0 +1,154 @@
> +/*
> + * A sparse memory device
> + *
> + * Copyright Red Hat Inc., 2021
> + *
> + * Authors:
> + * Alexander Bulekov <alxndr@bu.edu>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include "exec/address-spaces.h"
> +#include "hw/qdev-properties.h"
> +
> +#define TYPE_SPARSE_MEM "sparse-mem"
> +#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM)
> +
> +#define SPARSE_BLOCK_SIZE 0x1000
> +
> +typedef struct SparseMemState {
> + DeviceState parent_obj;
> + MemoryRegion mmio;
> + uint64_t baseaddr;
> + uint64_t length;
> + uint64_t usage;
usage -> size_used?
> + uint64_t maxsize;
> + GHashTable *mapped;
> +} SparseMemState;
> +
> +typedef struct sparse_mem_block {
> + uint16_t nonzeros;
> + uint8_t data[SPARSE_BLOCK_SIZE];
> +} sparse_mem_block;
> +
> +static const MemoryRegionOps sparse_mem_ops = {
> + .read = sparse_mem_read,
> + .write = sparse_mem_write,
> + .endianness = DEVICE_LITTLE_ENDIAN,
> + .valid = {
> + .min_access_size = 1,
> + .max_access_size = 8,
> + .unaligned = false,
Why restrict unaligned accesses?
> + },
> +};
> +
> +static Property sparse_mem_properties[] = {
> + /* The base address of the memory */
> + DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0),
> + /* The length of the sparse memory region */
> + DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX),
> + /* Max amount of actual memory that can be used to back the sparse memory */
> + DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 0x100000),
0x100000 -> 1 * MiB
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void sparse_mem_realize(DeviceState *dev, Error **errp)
> +{
> + SparseMemState *s = SPARSE_MEM(dev);
Anyhow, we should restrict this device to QTest accelerator, right?
Maybe:
if (!qtest_enabled()) {
error_setg(errp, "sparse_mem device requires QTest");
return;
}
> +
> + assert(s->baseaddr + s->length > s->baseaddr);
Don't you need more than 64-bit to do this check?
> +
> + s->mapped = g_hash_table_new(NULL, NULL);
> + memory_region_init_io(&(s->mmio), OBJECT(s), &sparse_mem_ops, s,
> + "sparse-mem", s->length);
> + memory_region_add_subregion_overlap(get_system_memory(), s->baseaddr,
> + &(s->mmio), -100);
mr_add() to sysmem from a non-sysbus device is odd... Maybe it is
acceptable, I don't know enough.
> +}
> +
On 210311 1525, Philippe Mathieu-Daudé wrote:
> On 3/11/21 6:36 AM, Alexander Bulekov wrote:
> > For testing, it can be useful to simulate an enormous amount of memory
> > (e.g. 2^64 RAM). This adds an MMIO device that acts as sparse memory.
> > When something writes a nonzero value to a sparse-mem address, we
> > allocate a block of memory. This block is kept around, until all of the
> > bytes within the block are zero-ed. The device has a very low priority
> > (so it can be mapped beneath actual RAM, and virtual device MMIO
> > regions).
>
> I'm not convinced we need this, but still added some comments while
> reviewing.
>
> >
> > Signed-off-by: Alexander Bulekov <alxndr@bu.edu>
> > ---
> > MAINTAINERS | 1 +
> > hw/mem/meson.build | 1 +
> > hw/mem/sparse-mem.c | 154 ++++++++++++++++++++++++++++++++++++++++++++
> > 3 files changed, 156 insertions(+)
> > create mode 100644 hw/mem/sparse-mem.c
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index f22d83c178..9e3d8b1401 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -2618,6 +2618,7 @@ R: Thomas Huth <thuth@redhat.com>
> > S: Maintained
> > F: tests/qtest/fuzz/
> > F: scripts/oss-fuzz/
> > +F: hw/mem/sparse-mem.c
> > F: docs/devel/fuzzing.rst
> >
> > Register API
> > diff --git a/hw/mem/meson.build b/hw/mem/meson.build
> > index 0d22f2b572..732f459e0a 100644
> > --- a/hw/mem/meson.build
> > +++ b/hw/mem/meson.build
> > @@ -1,5 +1,6 @@
> > mem_ss = ss.source_set()
> > mem_ss.add(files('memory-device.c'))
> > +mem_ss.add(files('sparse-mem.c'))
> > mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c'))
> > mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c'))
> > mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c'))
> > diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c
> > new file mode 100644
> > index 0000000000..ffda6f76b4
> > --- /dev/null
> > +++ b/hw/mem/sparse-mem.c
> > @@ -0,0 +1,154 @@
> > +/*
> > + * A sparse memory device
> > + *
> > + * Copyright Red Hat Inc., 2021
> > + *
> > + * Authors:
> > + * Alexander Bulekov <alxndr@bu.edu>
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +
> > +#include "exec/address-spaces.h"
> > +#include "hw/qdev-properties.h"
> > +
> > +#define TYPE_SPARSE_MEM "sparse-mem"
> > +#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM)
> > +
> > +#define SPARSE_BLOCK_SIZE 0x1000
> > +
> > +typedef struct SparseMemState {
> > + DeviceState parent_obj;
> > + MemoryRegion mmio;
> > + uint64_t baseaddr;
> > + uint64_t length;
> > + uint64_t usage;
>
> usage -> size_used?
>
Ok - that's nicer.
> > + uint64_t maxsize;
> > + GHashTable *mapped;
> > +} SparseMemState;
> > +
> > +typedef struct sparse_mem_block {
> > + uint16_t nonzeros;
> > + uint8_t data[SPARSE_BLOCK_SIZE];
> > +} sparse_mem_block;
> > +
>
> > +static const MemoryRegionOps sparse_mem_ops = {
> > + .read = sparse_mem_read,
> > + .write = sparse_mem_write,
> > + .endianness = DEVICE_LITTLE_ENDIAN,
> > + .valid = {
> > + .min_access_size = 1,
> > + .max_access_size = 8,
> > + .unaligned = false,
>
> Why restrict unaligned accesses?
>
It is mostly a shortcut to avoid dealing with accesses that span
multiple "blocks". E.g. a read from (uint32_t*)0x1ffe would require
looking both at the 0x1000 and 0x2000 blocks.
> > + },
> > +};
> > +
> > +static Property sparse_mem_properties[] = {
> > + /* The base address of the memory */
> > + DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0),
> > + /* The length of the sparse memory region */
> > + DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX),
> > + /* Max amount of actual memory that can be used to back the sparse memory */
> > + DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 0x100000),
>
> 0x100000 -> 1 * MiB
>
Ok.
> > + DEFINE_PROP_END_OF_LIST(),
> > +};
> > +
> > +static void sparse_mem_realize(DeviceState *dev, Error **errp)
> > +{
> > + SparseMemState *s = SPARSE_MEM(dev);
>
> Anyhow, we should restrict this device to QTest accelerator, right?
>
> Maybe:
> if (!qtest_enabled()) {
> error_setg(errp, "sparse_mem device requires QTest");
> return;
> }
>
> > +
> > + assert(s->baseaddr + s->length > s->baseaddr);
>
> Don't you need more than 64-bit to do this check?
The check is to make sure that baseaddr + length doesn't overflow the
64-bit address-space.
>
> > +
> > + s->mapped = g_hash_table_new(NULL, NULL);
> > + memory_region_init_io(&(s->mmio), OBJECT(s), &sparse_mem_ops, s,
> > + "sparse-mem", s->length);
> > + memory_region_add_subregion_overlap(get_system_memory(), s->baseaddr,
> > + &(s->mmio), -100);
>
> mr_add() to sysmem from a non-sysbus device is odd... Maybe it is
> acceptable, I don't know enough.
>
I will try to find a more standard way to do this.
Thanks
-Alex
> > +}
> > +
>
© 2016 - 2026 Red Hat, Inc.