Determine the BARs used by the PCI device and register handlers to
manage the access to the same.
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
include/exec/memory.h | 3 +
hw/remote/vfio-user-obj.c | 167 ++++++++++++++++++++++++++++++++
softmmu/physmem.c | 4 +-
tests/qtest/fuzz/generic_fuzz.c | 9 +-
hw/remote/trace-events | 3 +
5 files changed, 180 insertions(+), 6 deletions(-)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 4d5997e6bb..4b061e62d5 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2810,6 +2810,9 @@ MemTxResult address_space_write_cached_slow(MemoryRegionCache *cache,
hwaddr addr, const void *buf,
hwaddr len);
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr);
+bool prepare_mmio_access(MemoryRegion *mr);
+
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
{
if (is_write) {
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index 425e45e8b2..6910c16243 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -53,6 +53,7 @@
#include "hw/qdev-core.h"
#include "hw/pci/pci.h"
#include "qemu/timer.h"
+#include "exec/memory.h"
#define TYPE_VFU_OBJECT "x-vfio-user-server"
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@@ -324,6 +325,170 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
}
+static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
+ hwaddr offset, char * const buf,
+ hwaddr len, const bool is_write)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *ram_ptr = NULL;
+ bool release_lock = false;
+ MemoryRegionSection section = { 0 };
+ MemoryRegion *mr = NULL;
+ int access_size;
+ hwaddr size = 0;
+ MemTxResult result;
+ uint64_t val;
+
+ section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
+ offset, len);
+
+ if (!section.mr) {
+ return 0;
+ }
+
+ mr = section.mr;
+ offset = section.offset_within_region;
+
+ if (is_write && mr->readonly) {
+ warn_report("vfu: attempting to write to readonly region in "
+ "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
+ pci_bar, offset, (offset + len));
+ return 0;
+ }
+
+ if (memory_access_is_direct(mr, is_write)) {
+ /**
+ * Some devices expose a PCI expansion ROM, which could be buffer
+ * based as compared to other regions which are primarily based on
+ * MemoryRegionOps. memory_region_find() would already check
+ * for buffer overflow, we don't need to repeat it here.
+ */
+ ram_ptr = memory_region_get_ram_ptr(mr);
+
+ size = len;
+
+ if (is_write) {
+ memcpy((ram_ptr + offset), buf, size);
+ } else {
+ memcpy(buf, (ram_ptr + offset), size);
+ }
+
+ goto exit;
+ }
+
+ while (len > 0) {
+ /**
+ * The read/write logic used below is similar to the ones in
+ * flatview_read/write_continue()
+ */
+ release_lock = prepare_mmio_access(mr);
+
+ access_size = memory_access_size(mr, len, offset);
+
+ if (is_write) {
+ val = ldn_he_p(ptr, access_size);
+
+ result = memory_region_dispatch_write(mr, offset, val,
+ size_memop(access_size),
+ MEMTXATTRS_UNSPECIFIED);
+ } else {
+ result = memory_region_dispatch_read(mr, offset, &val,
+ size_memop(access_size),
+ MEMTXATTRS_UNSPECIFIED);
+
+ stn_he_p(ptr, access_size, val);
+ }
+
+ if (release_lock) {
+ qemu_mutex_unlock_iothread();
+ release_lock = false;
+ }
+
+ if (result != MEMTX_OK) {
+ warn_report("vfu: failed to %s 0x%"PRIx64"",
+ is_write ? "write to" : "read from",
+ (offset - size));
+
+ goto exit;
+ }
+
+ len -= access_size;
+ size += access_size;
+ ptr += access_size;
+ offset += access_size;
+ }
+
+exit:
+ memory_region_unref(mr);
+
+ return size;
+}
+
+/**
+ * VFU_OBJECT_BAR_HANDLER - macro for defining handlers for PCI BARs.
+ *
+ * To create handler for BAR number 2, VFU_OBJECT_BAR_HANDLER(2) would
+ * define vfu_object_bar2_handler
+ */
+#define VFU_OBJECT_BAR_HANDLER(BAR_NO) \
+ static ssize_t vfu_object_bar##BAR_NO##_handler(vfu_ctx_t *vfu_ctx, \
+ char * const buf, size_t count, \
+ loff_t offset, const bool is_write) \
+ { \
+ VfuObject *o = vfu_get_private(vfu_ctx); \
+ PCIDevice *pci_dev = o->pci_dev; \
+ \
+ return vfu_object_bar_rw(pci_dev, BAR_NO, offset, \
+ buf, count, is_write); \
+ } \
+
+VFU_OBJECT_BAR_HANDLER(0)
+VFU_OBJECT_BAR_HANDLER(1)
+VFU_OBJECT_BAR_HANDLER(2)
+VFU_OBJECT_BAR_HANDLER(3)
+VFU_OBJECT_BAR_HANDLER(4)
+VFU_OBJECT_BAR_HANDLER(5)
+VFU_OBJECT_BAR_HANDLER(6)
+
+static vfu_region_access_cb_t *vfu_object_bar_handlers[PCI_NUM_REGIONS] = {
+ &vfu_object_bar0_handler,
+ &vfu_object_bar1_handler,
+ &vfu_object_bar2_handler,
+ &vfu_object_bar3_handler,
+ &vfu_object_bar4_handler,
+ &vfu_object_bar5_handler,
+ &vfu_object_bar6_handler,
+};
+
+/**
+ * vfu_object_register_bars - Identify active BAR regions of pdev and setup
+ * callbacks to handle read/write accesses
+ */
+static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
+{
+ int flags = VFU_REGION_FLAG_RW;
+ int i;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (!pdev->io_regions[i].size) {
+ continue;
+ }
+
+ if ((i == VFU_PCI_DEV_ROM_REGION_IDX) ||
+ pdev->io_regions[i].memory->readonly) {
+ flags &= ~VFU_REGION_FLAG_WRITE;
+ }
+
+ vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX + i,
+ (size_t)pdev->io_regions[i].size,
+ vfu_object_bar_handlers[i],
+ flags, NULL, 0, -1, 0);
+
+ trace_vfu_bar_register(i, pdev->io_regions[i].addr,
+ pdev->io_regions[i].size);
+ }
+}
+
/*
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
* properties. It also depends on devices instantiated in QEMU. These
@@ -420,6 +585,8 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
goto fail;
}
+ vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
+
ret = vfu_realize_ctx(o->vfu_ctx);
if (ret < 0) {
error_setg(errp, "vfu: Failed to realize device %s- %s",
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 4e1b27a20e..f9a68d1fe5 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -2719,7 +2719,7 @@ void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size)
invalidate_and_set_dirty(mr, addr, size);
}
-static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
+int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
{
unsigned access_size_max = mr->ops->valid.max_access_size;
@@ -2746,7 +2746,7 @@ static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
return l;
}
-static bool prepare_mmio_access(MemoryRegion *mr)
+bool prepare_mmio_access(MemoryRegion *mr)
{
bool release_lock = false;
diff --git a/tests/qtest/fuzz/generic_fuzz.c b/tests/qtest/fuzz/generic_fuzz.c
index dd7e25851c..77547fc1d8 100644
--- a/tests/qtest/fuzz/generic_fuzz.c
+++ b/tests/qtest/fuzz/generic_fuzz.c
@@ -144,7 +144,7 @@ static void *pattern_alloc(pattern p, size_t len)
return buf;
}
-static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
+static int fuzz_memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
{
unsigned access_size_max = mr->ops->valid.max_access_size;
@@ -242,11 +242,12 @@ void fuzz_dma_read_cb(size_t addr, size_t len, MemoryRegion *mr)
/*
* If mr1 isn't RAM, address_space_translate doesn't update l. Use
- * memory_access_size to identify the number of bytes that it is safe
- * to write without accidentally writing to another MemoryRegion.
+ * fuzz_memory_access_size to identify the number of bytes that it
+ * is safe to write without accidentally writing to another
+ * MemoryRegion.
*/
if (!memory_region_is_ram(mr1)) {
- l = memory_access_size(mr1, l, addr1);
+ l = fuzz_memory_access_size(mr1, l, addr1);
}
if (memory_region_is_ram(mr1) ||
memory_region_is_romd(mr1) ||
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
index f945c7e33b..847d50d88f 100644
--- a/hw/remote/trace-events
+++ b/hw/remote/trace-events
@@ -9,3 +9,6 @@ vfu_cfg_read(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u -> 0x%x"
vfu_cfg_write(uint32_t offset, uint32_t val) "vfu: cfg: 0x%u <- 0x%x"
vfu_dma_register(uint64_t gpa, size_t len) "vfu: registering GPA 0x%"PRIx64", %zu bytes"
vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
+vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
+vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
+vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
--
2.20.1
On Fri, Mar 25, 2022 at 03:19:43PM -0400, Jagannathan Raman wrote:
> @@ -324,6 +325,170 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
> trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
> }
>
> +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
> + hwaddr offset, char * const buf,
> + hwaddr len, const bool is_write)
> +{
> + uint8_t *ptr = (uint8_t *)buf;
> + uint8_t *ram_ptr = NULL;
> + bool release_lock = false;
> + MemoryRegionSection section = { 0 };
> + MemoryRegion *mr = NULL;
> + int access_size;
> + hwaddr size = 0;
> + MemTxResult result;
> + uint64_t val;
> +
> + section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
> + offset, len);
> +
> + if (!section.mr) {
> + return 0;
> + }
> +
> + mr = section.mr;
> + offset = section.offset_within_region;
> +
> + if (is_write && mr->readonly) {
> + warn_report("vfu: attempting to write to readonly region in "
> + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
> + pci_bar, offset, (offset + len));
> + return 0;
A mr reference is leaked. The return statement can be replaced with goto
exit.
> + }
> +
> + if (memory_access_is_direct(mr, is_write)) {
> + /**
> + * Some devices expose a PCI expansion ROM, which could be buffer
> + * based as compared to other regions which are primarily based on
> + * MemoryRegionOps. memory_region_find() would already check
> + * for buffer overflow, we don't need to repeat it here.
> + */
> + ram_ptr = memory_region_get_ram_ptr(mr);
> +
> + size = len;
This looks like it will access beyond the end of ram_ptr when
section.size < len after memory_region_find() returns.
> +
> + if (is_write) {
> + memcpy((ram_ptr + offset), buf, size);
> + } else {
> + memcpy(buf, (ram_ptr + offset), size);
> + }
> +
> + goto exit;
What happens when the access spans two adjacent MemoryRegions? I think
the while (len > 0) loop is needed even in the memory_access_is_direct()
case so we perform the full access instead of truncating it.
> + }
> +
> + while (len > 0) {
> On Mar 29, 2022, at 8:50 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Fri, Mar 25, 2022 at 03:19:43PM -0400, Jagannathan Raman wrote:
>> @@ -324,6 +325,170 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
>> trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
>> }
>>
>> +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
>> + hwaddr offset, char * const buf,
>> + hwaddr len, const bool is_write)
>> +{
>> + uint8_t *ptr = (uint8_t *)buf;
>> + uint8_t *ram_ptr = NULL;
>> + bool release_lock = false;
>> + MemoryRegionSection section = { 0 };
>> + MemoryRegion *mr = NULL;
>> + int access_size;
>> + hwaddr size = 0;
>> + MemTxResult result;
>> + uint64_t val;
>> +
>> + section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
>> + offset, len);
>> +
>> + if (!section.mr) {
>> + return 0;
>> + }
>> +
>> + mr = section.mr;
>> + offset = section.offset_within_region;
>> +
>> + if (is_write && mr->readonly) {
>> + warn_report("vfu: attempting to write to readonly region in "
>> + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
>> + pci_bar, offset, (offset + len));
>> + return 0;
>
> A mr reference is leaked. The return statement can be replaced with goto
> exit.
OK.
>
>> + }
>> +
>> + if (memory_access_is_direct(mr, is_write)) {
>> + /**
>> + * Some devices expose a PCI expansion ROM, which could be buffer
>> + * based as compared to other regions which are primarily based on
>> + * MemoryRegionOps. memory_region_find() would already check
>> + * for buffer overflow, we don't need to repeat it here.
>> + */
>> + ram_ptr = memory_region_get_ram_ptr(mr);
>> +
>> + size = len;
>
> This looks like it will access beyond the end of ram_ptr when
> section.size < len after memory_region_find() returns.
OK - will will handle shorter sections returned by memory_region_find().
>
>> +
>> + if (is_write) {
>> + memcpy((ram_ptr + offset), buf, size);
>> + } else {
>> + memcpy(buf, (ram_ptr + offset), size);
>> + }
>> +
>> + goto exit;
>
> What happens when the access spans two adjacent MemoryRegions? I think
> the while (len > 0) loop is needed even in the memory_access_is_direct()
> case so we perform the full access instead of truncating it.
OK - this should be covered by the shorter section handling above.
Thank you!
--
Jag
>
>> + }
>> +
>> + while (len > 0) {
On Tue, Mar 29, 2022 at 03:51:17PM +0000, Jag Raman wrote:
>
>
> > On Mar 29, 2022, at 8:50 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> >
> > On Fri, Mar 25, 2022 at 03:19:43PM -0400, Jagannathan Raman wrote:
> >> @@ -324,6 +325,170 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
> >> trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
> >> }
> >>
> >> +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
> >> + hwaddr offset, char * const buf,
> >> + hwaddr len, const bool is_write)
> >> +{
> >> + uint8_t *ptr = (uint8_t *)buf;
> >> + uint8_t *ram_ptr = NULL;
> >> + bool release_lock = false;
> >> + MemoryRegionSection section = { 0 };
> >> + MemoryRegion *mr = NULL;
> >> + int access_size;
> >> + hwaddr size = 0;
> >> + MemTxResult result;
> >> + uint64_t val;
> >> +
> >> + section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
> >> + offset, len);
> >> +
> >> + if (!section.mr) {
> >> + return 0;
> >> + }
> >> +
> >> + mr = section.mr;
> >> + offset = section.offset_within_region;
> >> +
> >> + if (is_write && mr->readonly) {
> >> + warn_report("vfu: attempting to write to readonly region in "
> >> + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
> >> + pci_bar, offset, (offset + len));
> >> + return 0;
> >
> > A mr reference is leaked. The return statement can be replaced with goto
> > exit.
>
> OK.
>
> >
> >> + }
> >> +
> >> + if (memory_access_is_direct(mr, is_write)) {
> >> + /**
> >> + * Some devices expose a PCI expansion ROM, which could be buffer
> >> + * based as compared to other regions which are primarily based on
> >> + * MemoryRegionOps. memory_region_find() would already check
> >> + * for buffer overflow, we don't need to repeat it here.
> >> + */
> >> + ram_ptr = memory_region_get_ram_ptr(mr);
> >> +
> >> + size = len;
> >
> > This looks like it will access beyond the end of ram_ptr when
> > section.size < len after memory_region_find() returns.
>
> OK - will will handle shorter sections returned by memory_region_find().
>
> >
> >> +
> >> + if (is_write) {
> >> + memcpy((ram_ptr + offset), buf, size);
> >> + } else {
> >> + memcpy(buf, (ram_ptr + offset), size);
> >> + }
> >> +
> >> + goto exit;
> >
> > What happens when the access spans two adjacent MemoryRegions? I think
> > the while (len > 0) loop is needed even in the memory_access_is_direct()
> > case so we perform the full access instead of truncating it.
>
> OK - this should be covered by the shorter section handling above.
No, shorter section handling truncates that access. I think the caller
expects all len bytes to be accessed, not just the first few bytes?
Stefan
> On Mar 30, 2022, at 6:05 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>
> On Tue, Mar 29, 2022 at 03:51:17PM +0000, Jag Raman wrote:
>>
>>
>>> On Mar 29, 2022, at 8:50 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
>>>
>>> On Fri, Mar 25, 2022 at 03:19:43PM -0400, Jagannathan Raman wrote:
>>>> @@ -324,6 +325,170 @@ static void dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info)
>>>> trace_vfu_dma_unregister((uint64_t)info->iova.iov_base);
>>>> }
>>>>
>>>> +static size_t vfu_object_bar_rw(PCIDevice *pci_dev, int pci_bar,
>>>> + hwaddr offset, char * const buf,
>>>> + hwaddr len, const bool is_write)
>>>> +{
>>>> + uint8_t *ptr = (uint8_t *)buf;
>>>> + uint8_t *ram_ptr = NULL;
>>>> + bool release_lock = false;
>>>> + MemoryRegionSection section = { 0 };
>>>> + MemoryRegion *mr = NULL;
>>>> + int access_size;
>>>> + hwaddr size = 0;
>>>> + MemTxResult result;
>>>> + uint64_t val;
>>>> +
>>>> + section = memory_region_find(pci_dev->io_regions[pci_bar].memory,
>>>> + offset, len);
>>>> +
>>>> + if (!section.mr) {
>>>> + return 0;
>>>> + }
>>>> +
>>>> + mr = section.mr;
>>>> + offset = section.offset_within_region;
>>>> +
>>>> + if (is_write && mr->readonly) {
>>>> + warn_report("vfu: attempting to write to readonly region in "
>>>> + "bar %d - [0x%"PRIx64" - 0x%"PRIx64"]",
>>>> + pci_bar, offset, (offset + len));
>>>> + return 0;
>>>
>>> A mr reference is leaked. The return statement can be replaced with goto
>>> exit.
>>
>> OK.
>>
>>>
>>>> + }
>>>> +
>>>> + if (memory_access_is_direct(mr, is_write)) {
>>>> + /**
>>>> + * Some devices expose a PCI expansion ROM, which could be buffer
>>>> + * based as compared to other regions which are primarily based on
>>>> + * MemoryRegionOps. memory_region_find() would already check
>>>> + * for buffer overflow, we don't need to repeat it here.
>>>> + */
>>>> + ram_ptr = memory_region_get_ram_ptr(mr);
>>>> +
>>>> + size = len;
>>>
>>> This looks like it will access beyond the end of ram_ptr when
>>> section.size < len after memory_region_find() returns.
>>
>> OK - will will handle shorter sections returned by memory_region_find().
>>
>>>
>>>> +
>>>> + if (is_write) {
>>>> + memcpy((ram_ptr + offset), buf, size);
>>>> + } else {
>>>> + memcpy(buf, (ram_ptr + offset), size);
>>>> + }
>>>> +
>>>> + goto exit;
>>>
>>> What happens when the access spans two adjacent MemoryRegions? I think
>>> the while (len > 0) loop is needed even in the memory_access_is_direct()
>>> case so we perform the full access instead of truncating it.
>>
>> OK - this should be covered by the shorter section handling above.
>
> No, shorter section handling truncates that access. I think the caller
> expects all len bytes to be accessed, not just the first few bytes?
Yes, I also think that the caller expects to access all len bytes.
I should’ve provided more details - but by shorter section handling I
meant iterating over all the sections that make up len bytes.
Thank you!
--
Jag
>
> Stefan
© 2016 - 2026 Red Hat, Inc.