Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/pci.c | 7 ++-
hw/vfio/user-protocol.h | 25 +++++++++
hw/vfio/user.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 166 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7abe44e..be39a4e 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -713,7 +713,8 @@ retry:
ret = vfio_enable_vectors(vdev, false);
if (ret) {
if (ret < 0) {
- error_report("vfio: Error: Failed to setup MSI fds: %m");
+ error_report("vfio: Error: Failed to setup MSI fds: %s",
+ strerror(-ret));
} else {
error_report("vfio: Error: Failed to enable %d "
"MSI vectors, retry with %d", vdev->nr_vectors, ret);
@@ -2712,6 +2713,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info);
+
if (ret) {
/* This can fail for an old kernel or legacy PCI dev */
trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
@@ -3593,6 +3595,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
goto out_teardown;
}
+ vfio_register_err_notifier(vdev);
+ vfio_register_req_notifier(vdev);
+
return;
out_teardown:
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 124340c..31704cf 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -141,6 +141,31 @@ typedef struct {
} VFIOUserRegionInfo;
/*
+ * VFIO_USER_DEVICE_GET_IRQ_INFO
+ * imported from struct vfio_irq_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t count;
+} VFIOUserIRQInfo;
+
+/*
+ * VFIO_USER_DEVICE_SET_IRQS
+ * imported from struct vfio_irq_set
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t start;
+ uint32_t count;
+} VFIOUserIRQSet;
+
+/*
* VFIO_USER_REGION_READ
* VFIO_USER_REGION_WRITE
*/
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 1453bb5..815385b 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -1164,6 +1164,117 @@ static int vfio_user_get_region_info(VFIOProxy *proxy,
return 0;
}
+static int vfio_user_get_irq_info(VFIOProxy *proxy,
+ struct vfio_irq_info *info)
+{
+ VFIOUserIRQInfo msg;
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
+ sizeof(msg), 0);
+ msg.argsz = info->argsz;
+ msg.index = info->index;
+
+ vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false);
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ return -msg.hdr.error_reply;
+ }
+
+ memcpy(info, &msg.argsz, sizeof(*info));
+ return 0;
+}
+
+static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
+{
+ int n = 0;
+
+ if (fdp[cur] != -1) {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] != -1);
+ } else {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] == -1);
+ }
+
+ return n;
+}
+
+static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq)
+{
+ g_autofree VFIOUserIRQSet *msgp = NULL;
+ uint32_t size, nfds, send_fds, sent_fds, max;
+
+ if (irq->argsz < sizeof(*irq)) {
+ error_printf("vfio_user_set_irqs argsz too small\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Handle simple case
+ */
+ if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
+ size = sizeof(VFIOUserHdr) + irq->argsz;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start;
+ msgp->count = irq->count;
+
+ vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Calculate the number of FDs to send
+ * and adjust argsz
+ */
+ nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
+ irq->argsz = sizeof(*irq);
+ msgp = g_malloc0(sizeof(*msgp));
+ /*
+ * Send in chunks if over max_send_fds
+ */
+ for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
+ VFIOUserFDs *arg_fds, loop_fds;
+
+ /* must send all valid FDs or all invalid FDs in single msg */
+ max = nfds - sent_fds;
+ if (max > proxy->max_send_fds) {
+ max = proxy->max_send_fds;
+ }
+ send_fds = irq_howmany((int *)irq->data, sent_fds, max);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
+ sizeof(*msgp), 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start + sent_fds;
+ msgp->count = send_fds;
+
+ loop_fds.send_fds = send_fds;
+ loop_fds.recv_fds = 0;
+ loop_fds.fds = (int *)irq->data + sent_fds;
+ arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
+
+ vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ }
+
+ return 0;
+}
+
static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
uint32_t count, void *data)
{
@@ -1277,6 +1388,28 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
return 0;
}
+static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
+ struct vfio_irq_info *irq)
+{
+ int ret;
+
+ ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
+ if (ret) {
+ return ret;
+ }
+
+ if (irq->index > vbasedev->num_irqs) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int vfio_user_io_set_irqs(VFIODevice *vbasedev,
+ struct vfio_irq_set *irqs)
+{
+ return vfio_user_set_irqs(vbasedev->proxy, irqs);
+}
+
static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
off_t off, uint32_t size, void *data)
{
@@ -1294,6 +1427,8 @@ static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
VFIODevIO vfio_dev_io_sock = {
.get_info = vfio_user_io_get_info,
.get_region_info = vfio_user_io_get_region_info,
+ .get_irq_info = vfio_user_io_get_irq_info,
+ .set_irqs = vfio_user_io_set_irqs,
.region_read = vfio_user_io_region_read,
.region_write = vfio_user_io_region_write,
};
--
1.8.3.1
On 11/9/22 00:13, John Johnson wrote:
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
> hw/vfio/pci.c | 7 ++-
> hw/vfio/user-protocol.h | 25 +++++++++
> hw/vfio/user.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 166 insertions(+), 1 deletion(-)
>
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 7abe44e..be39a4e 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -713,7 +713,8 @@ retry:
> ret = vfio_enable_vectors(vdev, false);
> if (ret) {
> if (ret < 0) {
> - error_report("vfio: Error: Failed to setup MSI fds: %m");
> + error_report("vfio: Error: Failed to setup MSI fds: %s",
> + strerror(-ret));
This change belongs to another patch.
> } else {
> error_report("vfio: Error: Failed to enable %d "
> "MSI vectors, retry with %d", vdev->nr_vectors, ret);
> @@ -2712,6 +2713,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
> irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
>
> ret = VDEV_GET_IRQ_INFO(vbasedev, &irq_info);
> +
> if (ret) {
> /* This can fail for an old kernel or legacy PCI dev */
> trace_vfio_populate_device_get_irq_info_failure(strerror(errno));
> @@ -3593,6 +3595,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> goto out_teardown;
> }
>
> + vfio_register_err_notifier(vdev);
> + vfio_register_req_notifier(vdev);
> +
> return;
>
> out_teardown:
> diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
> index 124340c..31704cf 100644
> --- a/hw/vfio/user-protocol.h
> +++ b/hw/vfio/user-protocol.h
> @@ -141,6 +141,31 @@ typedef struct {
> } VFIOUserRegionInfo;
>
> /*
> + * VFIO_USER_DEVICE_GET_IRQ_INFO
> + * imported from struct vfio_irq_info
> + */
> +typedef struct {
> + VFIOUserHdr hdr;
> + uint32_t argsz;
> + uint32_t flags;
> + uint32_t index;
> + uint32_t count;
> +} VFIOUserIRQInfo;
> +
> +/*
> + * VFIO_USER_DEVICE_SET_IRQS
> + * imported from struct vfio_irq_set
> + */
> +typedef struct {
> + VFIOUserHdr hdr;
> + uint32_t argsz;
> + uint32_t flags;
> + uint32_t index;
> + uint32_t start;
> + uint32_t count;
> +} VFIOUserIRQSet;
> +
> +/*
> * VFIO_USER_REGION_READ
> * VFIO_USER_REGION_WRITE
> */
> diff --git a/hw/vfio/user.c b/hw/vfio/user.c
> index 1453bb5..815385b 100644
> --- a/hw/vfio/user.c
> +++ b/hw/vfio/user.c
> @@ -1164,6 +1164,117 @@ static int vfio_user_get_region_info(VFIOProxy *proxy,
> return 0;
> }
>
> +static int vfio_user_get_irq_info(VFIOProxy *proxy,
> + struct vfio_irq_info *info)
> +{
> + VFIOUserIRQInfo msg;
> +
> + memset(&msg, 0, sizeof(msg));
> + vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
> + sizeof(msg), 0);
> + msg.argsz = info->argsz;
> + msg.index = info->index;
> +
> + vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, false);
> + if (msg.hdr.flags & VFIO_USER_ERROR) {
> + return -msg.hdr.error_reply;
> + }
> +
> + memcpy(info, &msg.argsz, sizeof(*info));
> + return 0;
> +}
> +
> +static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
intriguing routine. See comment below.
> +{
> + int n = 0;
> +
> + if (fdp[cur] != -1) {
> + do {
> + n++;
> + } while (n < max && fdp[cur + n] != -1);
> + } else {
> + do {
> + n++;
> + } while (n < max && fdp[cur + n] == -1);
> + }
> +
> + return n;
> +}
> +
> +static int vfio_user_set_irqs(VFIOProxy *proxy, struct vfio_irq_set *irq)
> +{
> + g_autofree VFIOUserIRQSet *msgp = NULL;
> + uint32_t size, nfds, send_fds, sent_fds, max;
> +
> + if (irq->argsz < sizeof(*irq)) {
> + error_printf("vfio_user_set_irqs argsz too small\n");
> + return -EINVAL;
> + }
> +
> + /*
> + * Handle simple case
> + */
> + if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
> + size = sizeof(VFIOUserHdr) + irq->argsz;
> + msgp = g_malloc0(size);
> +
> + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
> + msgp->argsz = irq->argsz;
> + msgp->flags = irq->flags;
> + msgp->index = irq->index;
> + msgp->start = irq->start;
> + msgp->count = irq->count;
> +
> + vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, false);
> + if (msgp->hdr.flags & VFIO_USER_ERROR) {
> + return -msgp->hdr.error_reply;
> + }
> +
> + return 0;
> + }
> +
> + /*
> + * Calculate the number of FDs to send
> + * and adjust argsz
> + */
> + nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
> + irq->argsz = sizeof(*irq);
> + msgp = g_malloc0(sizeof(*msgp));
> + /*
> + * Send in chunks if over max_send_fds
> + */
> + for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
> + VFIOUserFDs *arg_fds, loop_fds;
> +
> + /* must send all valid FDs or all invalid FDs in single msg */
why is that ?
> + max = nfds - sent_fds;
> + if (max > proxy->max_send_fds) {
> + max = proxy->max_send_fds;
> + }
> + send_fds = irq_howmany((int *)irq->data, sent_fds, max);
sent_fds can never be -1 but irq_howmany() is taking into account this
value. Why ?
> +
> + vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
> + sizeof(*msgp), 0);
> + msgp->argsz = irq->argsz;
> + msgp->flags = irq->flags;
> + msgp->index = irq->index;
> + msgp->start = irq->start + sent_fds;
> + msgp->count = send_fds;
> +
> + loop_fds.send_fds = send_fds;
> + loop_fds.recv_fds = 0;
> + loop_fds.fds = (int *)irq->data + sent_fds;
> + arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
> +
> + vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, false);
> + if (msgp->hdr.flags & VFIO_USER_ERROR) {
> + return -msgp->hdr.error_reply;
> + }
> + }
> +
> + return 0;
> +}
> +
> static int vfio_user_region_read(VFIOProxy *proxy, uint8_t index, off_t offset,
> uint32_t count, void *data)
> {
> @@ -1277,6 +1388,28 @@ static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
> return 0;
> }
>
> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
> + struct vfio_irq_info *irq)
> +{
> + int ret;
> +
> + ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
> + if (ret) {
> + return ret;
> + }
> +
> + if (irq->index > vbasedev->num_irqs) {
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +static int vfio_user_io_set_irqs(VFIODevice *vbasedev,
> + struct vfio_irq_set *irqs)
> +{
> + return vfio_user_set_irqs(vbasedev->proxy, irqs);
> +}
> +
> static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
> off_t off, uint32_t size, void *data)
> {
> @@ -1294,6 +1427,8 @@ static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
> VFIODevIO vfio_dev_io_sock = {
> .get_info = vfio_user_io_get_info,
> .get_region_info = vfio_user_io_get_region_info,
> + .get_irq_info = vfio_user_io_get_irq_info,
> + .set_irqs = vfio_user_io_set_irqs,
> .region_read = vfio_user_io_region_read,
> .region_write = vfio_user_io_region_write,
> };
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote:
>
> On 11/9/22 00:13, John Johnson wrote:
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 7abe44e..be39a4e 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -713,7 +713,8 @@ retry:
>> ret = vfio_enable_vectors(vdev, false);
>> if (ret) {
>> if (ret < 0) {
>> - error_report("vfio: Error: Failed to setup MSI fds: %m");
>> + error_report("vfio: Error: Failed to setup MSI fds: %s",
>> + strerror(-ret));
>
> This change belongs to another patch.
>
This is the patch where errno may not be set because
vfio_enable_vectors() didn’t make a syscall.
JJ
> On Dec 13, 2022, at 8:39 AM, Cédric Le Goater <clg@redhat.com> wrote: > > On 11/9/22 00:13, John Johnson wrote: >> >> + >> + /* must send all valid FDs or all invalid FDs in single msg */ > > why is that ? > This has to do with how VFIO sends FDs to the kernel. The ioctl() i/f has an array of FDs, with -1 signifying an invalid one. FDs can only be sent over a UNIX socket as a single array of them (all must be valid, no -1 holes in the array). In order to emulate the ioctl() use, we’d need to send an ancillary array to map the ordinal index of irq_data to the ordinal index of the FDs in the message. Since multi-FD sends are only done at device set-up, it seemed simpler to just break them up. JJ
On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote:
> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
> + struct vfio_irq_info *irq)
> +{
> + int ret;
> +
> + ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
> + if (ret) {
> + return ret;
> + }
> +
> + if (irq->index > vbasedev->num_irqs) {
> + return -EINVAL;
> + }
Why are we validating ->index *after* requesting the info? Seems a bit weird?
regards
john
> On Dec 9, 2022, at 9:29 AM, John Levon <levon@movementarian.org> wrote:
>
> On Tue, Nov 08, 2022 at 03:13:36PM -0800, John Johnson wrote:
>
>> +static int vfio_user_io_get_irq_info(VFIODevice *vbasedev,
>> + struct vfio_irq_info *irq)
>> +{
>> + int ret;
>> +
>> + ret = vfio_user_get_irq_info(vbasedev->proxy, irq);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + if (irq->index > vbasedev->num_irqs) {
>> + return -EINVAL;
>> + }
>
> Why are we validating ->index *after* requesting the info? Seems a bit weird?
>
That check is to validate the server return content (to the extent we can).
JJ
© 2016 - 2026 Red Hat, Inc.