Instead of requesting region information on demand with
VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
necessary for performance for vfio-user, where this call becomes a
message over the control socket, so is of higher overhead than the
traditional path.
We will also need it to generalize region accesses, as that means we
can't use ->config_offset for configuration space accesses, but must
look up the region offset (if relevant) each time.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/ccw.c | 5 -----
hw/vfio/container.c | 10 ++++++++++
hw/vfio/device.c | 31 +++++++++++++++++++++++++++----
hw/vfio/igd.c | 8 ++++----
hw/vfio/pci.c | 6 +++---
hw/vfio/region.c | 2 +-
include/hw/vfio/vfio-device.h | 1 +
7 files changed, 46 insertions(+), 17 deletions(-)
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index dac8769925..14dee7cd19 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -504,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
vcdev->io_region_offset = info->offset;
vcdev->io_region = g_malloc0(info->size);
- g_free(info);
/* check for the optional async command region */
ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -517,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
}
vcdev->async_cmd_region_offset = info->offset;
vcdev->async_cmd_region = g_malloc0(info->size);
- g_free(info);
}
ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -530,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
}
vcdev->schib_region_offset = info->offset;
vcdev->schib_region = g_malloc(info->size);
- g_free(info);
}
ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
@@ -544,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
}
vcdev->crw_region_offset = info->offset;
vcdev->crw_region = g_malloc(info->size);
- g_free(info);
}
return true;
@@ -554,7 +550,6 @@ out_err:
g_free(vcdev->schib_region);
g_free(vcdev->async_cmd_region);
g_free(vcdev->io_region);
- g_free(info);
return false;
}
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 37b1217fd8..61333d7fc4 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -857,6 +857,16 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
static void vfio_device_put(VFIODevice *vbasedev)
{
+ if (vbasedev->reginfo != NULL) {
+ int i;
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ g_free(vbasedev->reginfo[i]);
+ }
+ g_free(vbasedev->reginfo);
+ vbasedev->reginfo = NULL;
+ }
+
if (!vbasedev->group) {
return;
}
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 2966171118..102fa5a9b4 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -205,6 +205,17 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
{
size_t argsz = sizeof(struct vfio_region_info);
+ /* create region info cache */
+ if (vbasedev->reginfo == NULL) {
+ vbasedev->reginfo = g_new0(struct vfio_region_info *,
+ vbasedev->num_regions);
+ }
+ /* check cache */
+ if (vbasedev->reginfo[index] != NULL) {
+ *info = vbasedev->reginfo[index];
+ return 0;
+ }
+
*info = g_malloc0(argsz);
(*info)->index = index;
@@ -224,6 +235,9 @@ retry:
goto retry;
}
+ /* fill cache */
+ vbasedev->reginfo[index] = *info;
+
return 0;
}
@@ -242,7 +256,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
if (!hdr) {
- g_free(*info);
continue;
}
@@ -254,8 +267,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
if (cap_type->type == type && cap_type->subtype == subtype) {
return 0;
}
-
- g_free(*info);
}
*info = NULL;
@@ -264,7 +275,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
{
- g_autofree struct vfio_region_info *info = NULL;
+ struct vfio_region_info *info = NULL;
bool ret = false;
if (!vfio_device_get_region_info(vbasedev, region, &info)) {
@@ -427,6 +438,16 @@ void vfio_device_detach(VFIODevice *vbasedev)
VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
}
+static void vfio_device_get_all_region_info(VFIODevice *vbasedev)
+{
+ struct vfio_region_info *info;
+ int i;
+
+ for (i = 0; i < vbasedev->num_regions; i++) {
+ vfio_device_get_region_info(vbasedev, i, &info);
+ }
+}
+
void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
struct vfio_device_info *info)
{
@@ -439,4 +460,6 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
+
+ vfio_device_get_all_region_info(vbasedev);
}
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index e1cba16399..d70da1ce38 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -198,7 +198,7 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
{
- g_autofree struct vfio_region_info *opregion = NULL;
+ struct vfio_region_info *opregion = NULL;
int ret;
/* Hotplugging is not supported for opregion access */
@@ -361,8 +361,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
{
- g_autofree struct vfio_region_info *host = NULL;
- g_autofree struct vfio_region_info *lpc = NULL;
+ struct vfio_region_info *host = NULL;
+ struct vfio_region_info *lpc = NULL;
PCIDevice *lpc_bridge;
int ret;
@@ -526,7 +526,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
* - OpRegion
* - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
*/
- g_autofree struct vfio_region_info *rom = NULL;
+ struct vfio_region_info *rom = NULL;
legacy_mode_enabled = true;
info_report("IGD legacy mode enabled, "
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c3842d2f8d..b40d5abdfd 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -882,8 +882,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
{
- g_autofree struct vfio_region_info *reg_info = NULL;
VFIODevice *vbasedev = &vdev->vbasedev;
+ struct vfio_region_info *reg_info = NULL;
uint64_t size;
off_t off = 0;
ssize_t bytes;
@@ -2721,7 +2721,7 @@ static VFIODeviceOps vfio_pci_ops = {
bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
- g_autofree struct vfio_region_info *reg_info = NULL;
+ struct vfio_region_info *reg_info = NULL;
int ret;
ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info);
@@ -2786,7 +2786,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
{
VFIODevice *vbasedev = &vdev->vbasedev;
- g_autofree struct vfio_region_info *reg_info = NULL;
+ struct vfio_region_info *reg_info = NULL;
struct vfio_irq_info irq_info;
int i, ret = -1;
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 04bf9eb098..ef2630cac3 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -182,7 +182,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
int index, const char *name)
{
- g_autofree struct vfio_region_info *info = NULL;
+ struct vfio_region_info *info = NULL;
int ret;
ret = vfio_device_get_region_info(vbasedev, index, &info);
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 9522a09c48..967b07cd89 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -81,6 +81,7 @@ typedef struct VFIODevice {
IOMMUFDBackend *iommufd;
VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next;
+ struct vfio_region_info **reginfo;
} VFIODevice;
struct VFIODeviceOps {
--
2.34.1
On 4/9/25 15:48, John Levon wrote:
> Instead of requesting region information on demand with
> VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
> necessary for performance for vfio-user, where this call becomes a
> message over the control socket, so is of higher overhead than the
> traditional path.
>
> We will also need it to generalize region accesses, as that means we
> can't use ->config_offset for configuration space accesses, but must
> look up the region offset (if relevant) each time.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> hw/vfio/ccw.c | 5 -----
> hw/vfio/container.c | 10 ++++++++++
> hw/vfio/device.c | 31 +++++++++++++++++++++++++++----
> hw/vfio/igd.c | 8 ++++----
> hw/vfio/pci.c | 6 +++---
> hw/vfio/region.c | 2 +-
> include/hw/vfio/vfio-device.h | 1 +
> 7 files changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
> index dac8769925..14dee7cd19 100644
> --- a/hw/vfio/ccw.c
> +++ b/hw/vfio/ccw.c
> @@ -504,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
>
> vcdev->io_region_offset = info->offset;
> vcdev->io_region = g_malloc0(info->size);
> - g_free(info);
>
> /* check for the optional async command region */
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -517,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->async_cmd_region_offset = info->offset;
> vcdev->async_cmd_region = g_malloc0(info->size);
> - g_free(info);
> }
>
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -530,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->schib_region_offset = info->offset;
> vcdev->schib_region = g_malloc(info->size);
> - g_free(info);
> }
>
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -544,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->crw_region_offset = info->offset;
> vcdev->crw_region = g_malloc(info->size);
> - g_free(info);
> }
>
> return true;
> @@ -554,7 +550,6 @@ out_err:
> g_free(vcdev->schib_region);
> g_free(vcdev->async_cmd_region);
> g_free(vcdev->io_region);
> - g_free(info);
> return false;
> }
>
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 37b1217fd8..61333d7fc4 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -857,6 +857,16 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
>
> static void vfio_device_put(VFIODevice *vbasedev)
> {
> + if (vbasedev->reginfo != NULL) {
> + int i;
> +
> + for (i = 0; i < vbasedev->num_regions; i++) {
> + g_free(vbasedev->reginfo[i]);
> + }
> + g_free(vbasedev->reginfo);
> + vbasedev->reginfo = NULL;
> + }
> +
Can we have a vfio_device_unprepare() routine for symmetry with
routine vfio_device_get_all_region_info() ? Naming should be
improved too.
> if (!vbasedev->group) {> return;
> }
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 2966171118..102fa5a9b4 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -205,6 +205,17 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> {
> size_t argsz = sizeof(struct vfio_region_info);
>
> + /* create region info cache */
> + if (vbasedev->reginfo == NULL) {
> + vbasedev->reginfo = g_new0(struct vfio_region_info *,
> + vbasedev->num_regions);
> + }
I guess we could allocate ->reginfo[] array sooner in the VFIODevice
object life cycle. Since we lack a realize handler, may be in
vfio_device_prepare() ?
> + /* check cache */
> + if (vbasedev->reginfo[index] != NULL) {
> + *info = vbasedev->reginfo[index];
> + return 0;
> + }
> +
> *info = g_malloc0(argsz);
>
> (*info)->index = index;
> @@ -224,6 +235,9 @@ retry:
> goto retry;
> }
>
> + /* fill cache */
> + vbasedev->reginfo[index] = *info;
> +
> return 0;
> }
>
> @@ -242,7 +256,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
>
> hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
> if (!hdr) {
> - g_free(*info);
> continue;
> }
>
> @@ -254,8 +267,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
> if (cap_type->type == type && cap_type->subtype == subtype) {
> return 0;
> }
> -
> - g_free(*info);
> }
>
> *info = NULL;
> @@ -264,7 +275,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
>
> bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
> {
> - g_autofree struct vfio_region_info *info = NULL;
> + struct vfio_region_info *info = NULL;
> bool ret = false;
>
> if (!vfio_device_get_region_info(vbasedev, region, &info)) {
> @@ -427,6 +438,16 @@ void vfio_device_detach(VFIODevice *vbasedev)
> VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
> }
>
> +static void vfio_device_get_all_region_info(VFIODevice *vbasedev)
> +{
> + struct vfio_region_info *info;
> + int i;
> +
> + for (i = 0; i < vbasedev->num_regions; i++) {
> + vfio_device_get_region_info(vbasedev, i, &info);
> + }
> +}
> +
if the vfio_device_get_all_region_info() routine queries *all* region
infos to fill the ->reginfo[] cache array, why do we also need the
lazy cache filling method in vfio_device_get_region_info() ? This looks
redundant to me. I would rather have vfio_device_get_region_info()
operate on the cache only.
Thanks,
C.
> void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
> struct vfio_device_info *info)
> {
> @@ -439,4 +460,6 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
> QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
>
> QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
> +
> + vfio_device_get_all_region_info(vbasedev);
> }
> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> index e1cba16399..d70da1ce38 100644
> --- a/hw/vfio/igd.c
> +++ b/hw/vfio/igd.c
> @@ -198,7 +198,7 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>
> static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
> {
> - g_autofree struct vfio_region_info *opregion = NULL;
> + struct vfio_region_info *opregion = NULL;
> int ret;
>
> /* Hotplugging is not supported for opregion access */
> @@ -361,8 +361,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
>
> static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
> {
> - g_autofree struct vfio_region_info *host = NULL;
> - g_autofree struct vfio_region_info *lpc = NULL;
> + struct vfio_region_info *host = NULL;
> + struct vfio_region_info *lpc = NULL;
> PCIDevice *lpc_bridge;
> int ret;
>
> @@ -526,7 +526,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
> * - OpRegion
> * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
> */
> - g_autofree struct vfio_region_info *rom = NULL;
> + struct vfio_region_info *rom = NULL;
>
> legacy_mode_enabled = true;
> info_report("IGD legacy mode enabled, "
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c3842d2f8d..b40d5abdfd 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -882,8 +882,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>
> static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> {
> - g_autofree struct vfio_region_info *reg_info = NULL;
> VFIODevice *vbasedev = &vdev->vbasedev;
> + struct vfio_region_info *reg_info = NULL;
> uint64_t size;
> off_t off = 0;
> ssize_t bytes;
> @@ -2721,7 +2721,7 @@ static VFIODeviceOps vfio_pci_ops = {
> bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> {
> VFIODevice *vbasedev = &vdev->vbasedev;
> - g_autofree struct vfio_region_info *reg_info = NULL;
> + struct vfio_region_info *reg_info = NULL;
> int ret;
>
> ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info);
> @@ -2786,7 +2786,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
> {
> VFIODevice *vbasedev = &vdev->vbasedev;
> - g_autofree struct vfio_region_info *reg_info = NULL;
> + struct vfio_region_info *reg_info = NULL;
> struct vfio_irq_info irq_info;
> int i, ret = -1;
>
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 04bf9eb098..ef2630cac3 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -182,7 +182,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
> int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
> int index, const char *name)
> {
> - g_autofree struct vfio_region_info *info = NULL;
> + struct vfio_region_info *info = NULL;
> int ret;
>
> ret = vfio_device_get_region_info(vbasedev, index, &info);
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 9522a09c48..967b07cd89 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -81,6 +81,7 @@ typedef struct VFIODevice {
> IOMMUFDBackend *iommufd;
> VFIOIOASHwpt *hwpt;
> QLIST_ENTRY(VFIODevice) hwpt_next;
> + struct vfio_region_info **reginfo;
> } VFIODevice;
>
> struct VFIODeviceOps {
On Mon, Apr 28, 2025 at 05:39:46PM +0200, Cédric Le Goater wrote:
> > +static void vfio_device_get_all_region_info(VFIODevice *vbasedev)
> > +{
> > + struct vfio_region_info *info;
> > + int i;
> > +
> > + for (i = 0; i < vbasedev->num_regions; i++) {
> > + vfio_device_get_region_info(vbasedev, i, &info);
>
> if the vfio_device_get_all_region_info() routine queries *all* region
> infos to fill the ->reginfo[] cache array, why do we also need the
> lazy cache filling method in vfio_device_get_region_info() ? This looks
> redundant to me. I would rather have vfio_device_get_region_info()
> operate on the cache only.
I realised I'm not confident about doing this: in theory, a vfio device region
could later become valid based on some change in operation (and hence get region
info would then subsequently work post setup). Instead, I'm going to drop the
"get all" and operate only in caching mode, does that sound OK?
regards
john
On Mon, Apr 28, 2025 at 05:39:46PM +0200, Cédric Le Goater wrote:
> On 4/9/25 15:48, John Levon wrote:
> > Instead of requesting region information on demand with
> > VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
> > necessary for performance for vfio-user, where this call becomes a
> > message over the control socket, so is of higher overhead than the
> > traditional path.
>
> > +static void vfio_device_get_all_region_info(VFIODevice *vbasedev)
> > +{
> > + struct vfio_region_info *info;
> > + int i;
> > +
> > + for (i = 0; i < vbasedev->num_regions; i++) {
> > + vfio_device_get_region_info(vbasedev, i, &info);
> > + }
> > +}
> > +
>
> if the vfio_device_get_all_region_info() routine queries *all* region
> infos to fill the ->reginfo[] cache array, why do we also need the
> lazy cache filling method in vfio_device_get_region_info() ? This looks
> redundant to me. I would rather have vfio_device_get_region_info()
> operate on the cache only.
I think we briefly talked about this last time. I don't know why the cache fill
code is there; I can drop it.
regards
john
On 4/9/25 15:48, John Levon wrote:
> Instead of requesting region information on demand with
> VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
> necessary for performance for vfio-user, where this call becomes a
> message over the control socket, so is of higher overhead than the
> traditional path.
>
> We will also need it to generalize region accesses, as that means we
> can't use ->config_offset for configuration space accesses, but must
> look up the region offset (if relevant) each time.
This change is an optimization for vfio-user. I would prefer to keep it
for after enabling vfio-user.
Thanks,
C.
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> hw/vfio/ccw.c | 5 -----
> hw/vfio/container.c | 10 ++++++++++
> hw/vfio/device.c | 31 +++++++++++++++++++++++++++----
> hw/vfio/igd.c | 8 ++++----
> hw/vfio/pci.c | 6 +++---
> hw/vfio/region.c | 2 +-
> include/hw/vfio/vfio-device.h | 1 +
> 7 files changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
> index dac8769925..14dee7cd19 100644
> --- a/hw/vfio/ccw.c
> +++ b/hw/vfio/ccw.c
> @@ -504,7 +504,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
>
> vcdev->io_region_offset = info->offset;
> vcdev->io_region = g_malloc0(info->size);
> - g_free(info);
>
> /* check for the optional async command region */
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -517,7 +516,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->async_cmd_region_offset = info->offset;
> vcdev->async_cmd_region = g_malloc0(info->size);
> - g_free(info);
> }
>
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -530,7 +528,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->schib_region_offset = info->offset;
> vcdev->schib_region = g_malloc(info->size);
> - g_free(info);
> }
>
> ret = vfio_device_get_region_info_type(vdev, VFIO_REGION_TYPE_CCW,
> @@ -544,7 +541,6 @@ static bool vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
> }
> vcdev->crw_region_offset = info->offset;
> vcdev->crw_region = g_malloc(info->size);
> - g_free(info);
> }
>
> return true;
> @@ -554,7 +550,6 @@ out_err:
> g_free(vcdev->schib_region);
> g_free(vcdev->async_cmd_region);
> g_free(vcdev->io_region);
> - g_free(info);
> return false;
> }
>
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 37b1217fd8..61333d7fc4 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -857,6 +857,16 @@ static bool vfio_device_get(VFIOGroup *group, const char *name,
>
> static void vfio_device_put(VFIODevice *vbasedev)
> {
> + if (vbasedev->reginfo != NULL) {
> + int i;
> +
> + for (i = 0; i < vbasedev->num_regions; i++) {
> + g_free(vbasedev->reginfo[i]);
> + }
> + g_free(vbasedev->reginfo);
> + vbasedev->reginfo = NULL;
> + }
> +
> if (!vbasedev->group) {
> return;
> }
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index 2966171118..102fa5a9b4 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -205,6 +205,17 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> {
> size_t argsz = sizeof(struct vfio_region_info);
>
> + /* create region info cache */
> + if (vbasedev->reginfo == NULL) {
> + vbasedev->reginfo = g_new0(struct vfio_region_info *,
> + vbasedev->num_regions);
> + }
> + /* check cache */
> + if (vbasedev->reginfo[index] != NULL) {
> + *info = vbasedev->reginfo[index];
> + return 0;
> + }
> +
> *info = g_malloc0(argsz);
>
> (*info)->index = index;
> @@ -224,6 +235,9 @@ retry:
> goto retry;
> }
>
> + /* fill cache */
> + vbasedev->reginfo[index] = *info;
> +
> return 0;
> }
>
> @@ -242,7 +256,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
>
> hdr = vfio_get_region_info_cap(*info, VFIO_REGION_INFO_CAP_TYPE);
> if (!hdr) {
> - g_free(*info);
> continue;
> }
>
> @@ -254,8 +267,6 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
> if (cap_type->type == type && cap_type->subtype == subtype) {
> return 0;
> }
> -
> - g_free(*info);
> }
>
> *info = NULL;
> @@ -264,7 +275,7 @@ int vfio_device_get_region_info_type(VFIODevice *vbasedev, uint32_t type,
>
> bool vfio_device_has_region_cap(VFIODevice *vbasedev, int region, uint16_t cap_type)
> {
> - g_autofree struct vfio_region_info *info = NULL;
> + struct vfio_region_info *info = NULL;
> bool ret = false;
>
> if (!vfio_device_get_region_info(vbasedev, region, &info)) {
> @@ -427,6 +438,16 @@ void vfio_device_detach(VFIODevice *vbasedev)
> VFIO_IOMMU_GET_CLASS(vbasedev->bcontainer)->detach_device(vbasedev);
> }
>
> +static void vfio_device_get_all_region_info(VFIODevice *vbasedev)
> +{
> + struct vfio_region_info *info;
> + int i;
> +
> + for (i = 0; i < vbasedev->num_regions; i++) {
> + vfio_device_get_region_info(vbasedev, i, &info);
> + }
> +}
> +
> void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
> struct vfio_device_info *info)
> {
> @@ -439,4 +460,6 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
> QLIST_INSERT_HEAD(&bcontainer->device_list, vbasedev, container_next);
>
> QLIST_INSERT_HEAD(&vfio_device_list, vbasedev, global_next);
> +
> + vfio_device_get_all_region_info(vbasedev);
> }
> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> index e1cba16399..d70da1ce38 100644
> --- a/hw/vfio/igd.c
> +++ b/hw/vfio/igd.c
> @@ -198,7 +198,7 @@ static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>
> static bool vfio_pci_igd_setup_opregion(VFIOPCIDevice *vdev, Error **errp)
> {
> - g_autofree struct vfio_region_info *opregion = NULL;
> + struct vfio_region_info *opregion = NULL;
> int ret;
>
> /* Hotplugging is not supported for opregion access */
> @@ -361,8 +361,8 @@ static int vfio_pci_igd_lpc_init(VFIOPCIDevice *vdev,
>
> static bool vfio_pci_igd_setup_lpc_bridge(VFIOPCIDevice *vdev, Error **errp)
> {
> - g_autofree struct vfio_region_info *host = NULL;
> - g_autofree struct vfio_region_info *lpc = NULL;
> + struct vfio_region_info *host = NULL;
> + struct vfio_region_info *lpc = NULL;
> PCIDevice *lpc_bridge;
> int ret;
>
> @@ -526,7 +526,7 @@ static bool vfio_pci_igd_config_quirk(VFIOPCIDevice *vdev, Error **errp)
> * - OpRegion
> * - Same LPC bridge and Host bridge VID/DID/SVID/SSID as host
> */
> - g_autofree struct vfio_region_info *rom = NULL;
> + struct vfio_region_info *rom = NULL;
>
> legacy_mode_enabled = true;
> info_report("IGD legacy mode enabled, "
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index c3842d2f8d..b40d5abdfd 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -882,8 +882,8 @@ static void vfio_update_msi(VFIOPCIDevice *vdev)
>
> static void vfio_pci_load_rom(VFIOPCIDevice *vdev)
> {
> - g_autofree struct vfio_region_info *reg_info = NULL;
> VFIODevice *vbasedev = &vdev->vbasedev;
> + struct vfio_region_info *reg_info = NULL;
> uint64_t size;
> off_t off = 0;
> ssize_t bytes;
> @@ -2721,7 +2721,7 @@ static VFIODeviceOps vfio_pci_ops = {
> bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> {
> VFIODevice *vbasedev = &vdev->vbasedev;
> - g_autofree struct vfio_region_info *reg_info = NULL;
> + struct vfio_region_info *reg_info = NULL;
> int ret;
>
> ret = vfio_device_get_region_info(vbasedev, VFIO_PCI_VGA_REGION_INDEX, ®_info);
> @@ -2786,7 +2786,7 @@ bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
> static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
> {
> VFIODevice *vbasedev = &vdev->vbasedev;
> - g_autofree struct vfio_region_info *reg_info = NULL;
> + struct vfio_region_info *reg_info = NULL;
> struct vfio_irq_info irq_info;
> int i, ret = -1;
>
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 04bf9eb098..ef2630cac3 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -182,7 +182,7 @@ static int vfio_setup_region_sparse_mmaps(VFIORegion *region,
> int vfio_region_setup(Object *obj, VFIODevice *vbasedev, VFIORegion *region,
> int index, const char *name)
> {
> - g_autofree struct vfio_region_info *info = NULL;
> + struct vfio_region_info *info = NULL;
> int ret;
>
> ret = vfio_device_get_region_info(vbasedev, index, &info);
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 9522a09c48..967b07cd89 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -81,6 +81,7 @@ typedef struct VFIODevice {
> IOMMUFDBackend *iommufd;
> VFIOIOASHwpt *hwpt;
> QLIST_ENTRY(VFIODevice) hwpt_next;
> + struct vfio_region_info **reginfo;
> } VFIODevice;
>
> struct VFIODeviceOps {
On Thu, Apr 24, 2025 at 06:08:21PM +0200, Cédric Le Goater wrote:
> On 4/9/25 15:48, John Levon wrote:
> > Instead of requesting region information on demand with
> > VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
> > necessary for performance for vfio-user, where this call becomes a
> > message over the control socket, so is of higher overhead than the
> > traditional path.
> >
> > We will also need it to generalize region accesses, as that means we
> > can't use ->config_offset for configuration space accesses, but must
> > look up the region offset (if relevant) each time.
>
> This change is an optimization for vfio-user. I would prefer to keep it
> for after enabling vfio-user.
It's not vfio-user specific. Just to clarify, you want this code:
static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
uint32_t size, void *data, bool post)
{
struct vfio_region_info *info = vbasedev->regions[index];
int ret;
ret = pwrite(vbasedev->fd, data, size, info->offset + off);
return ret < 0 ? -errno : ret;
}
to become:
static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
uint32_t size, void *data, bool post)
{
struct vfio_region_info info;
ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &info);
struct vfio_region_info *info = vbasedev->regions[index];
int ret;
ret = pwrite(vbasedev->fd, data, size, info->offset + off);
return ret < 0 ? -errno : ret;
}
i.e. every region read/write needs to look up info each time?
If not, what are you suggesting?
regards
john
On 4/24/25 18:26, John Levon wrote:
> On Thu, Apr 24, 2025 at 06:08:21PM +0200, Cédric Le Goater wrote:
>
>> On 4/9/25 15:48, John Levon wrote:
>>> Instead of requesting region information on demand with
>>> VFIO_DEVICE_GET_REGION_INFO, maintain a cache: this will become
>>> necessary for performance for vfio-user, where this call becomes a
>>> message over the control socket, so is of higher overhead than the
>>> traditional path.
>>>
>>> We will also need it to generalize region accesses, as that means we
>>> can't use ->config_offset for configuration space accesses, but must
>>> look up the region offset (if relevant) each time.
>>
>> This change is an optimization for vfio-user. I would prefer to keep it
>> for after enabling vfio-user.
>
> It's not vfio-user specific. Just to clarify, you want this code:
>
> static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
> uint32_t size, void *data, bool post)
> {
> struct vfio_region_info *info = vbasedev->regions[index];
> int ret;
>
> ret = pwrite(vbasedev->fd, data, size, info->offset + off);
>
> return ret < 0 ? -errno : ret;
> }
>
> to become:
>
> static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
> uint32_t size, void *data, bool post)
> {
> struct vfio_region_info info;
>
> ioctl(vbasedev->fd, VFIO_DEVICE_GET_IRQ_INFO, &info);
> > struct vfio_region_info *info = vbasedev->regions[index];
> int ret;
>
> ret = pwrite(vbasedev->fd, data, size, info->offset + off);
>
> return ret < 0 ? -errno : ret;
> }
>
>
> i.e. every region read/write needs to look up info each time?
Oh I didn't this. So the introduction VFIODeviceIOOps is not seamless.
> If not, what are you suggesting?
vfio_device_io_region_read and vfio_device_io_region_write should come
separately in patch 13.
Let me comment more this patch.
Thanks,
C.
On Mon, Apr 28, 2025 at 05:16:50PM +0200, Cédric Le Goater wrote: > > i.e. every region read/write needs to look up info each time? > > Oh I didn't this. So the introduction VFIODeviceIOOps is not seamless. Correct. > > If not, what are you suggesting? > > vfio_device_io_region_read and vfio_device_io_region_write should come > separately in patch 13. OK, can do. thanks john
© 2016 - 2025 Red Hat, Inc.