drivers/virtio/Kconfig | 7 ++ drivers/virtio/virtio_mmio.c | 177 ++++++++++++++++++++++++++++++++++- 2 files changed, 183 insertions(+), 1 deletion(-)
The experimental virtio-mmio support for Xen was initially developed
on aarch64, so device trees were used to configure the mmio devices,
with arbitrary vGIC interrupts used by the hypervisor. On x86_64
however, the only reasonable way to interrupt the guest is over Xen
event channels, which can only be acquired by children of xenbus,
the virtual bus driven by Xen's configuration database, XenStore.
It is also a more convenient and "Xen-ish" way to provision devices.
Implement a xenbus client for virtio-mmio which negotiates an
event channel and provides it as a platform IRQ to the
virtio-mmio driver.
Signed-off-by: Val Packett <val@invisiblethingslab.com>
---
Hi,
I've been working on porting virtio-mmio support from Arm to x86_64,
with the goal of running vhost-user-gpu to power Wayland/GPU integration
for Qubes OS. (I'm aware of various proposals for alternative virtio
transports but virtio-mmio seems to be the only one that *is* upstream
already and just Works..) Setting up virtio-mmio through xenbus, initially
motivated just by event channels being the only real way to get interrupts
working on HVM, turned out to generally be quite pleasant and nice :)
I'd like to get some early feedback for this patch, particularly
the general stuff:
* is this whole thing acceptable in general?
* should it be extracted into a different file?
* (from the Xen side) any input on the xenstore keys, what goes where?
* anything else to keep in mind?
It does seem simple enough, so hopefully this can be done?
The corresponding userspace-side WIP is available at:
https://github.com/QubesOS/xen-vhost-frontend
And the required DMOP for firing the evtchn events will be sent
to xen-devel shortly as well.
Thanks,
~val
---
drivers/virtio/Kconfig | 7 ++
drivers/virtio/virtio_mmio.c | 177 ++++++++++++++++++++++++++++++++++-
2 files changed, 183 insertions(+), 1 deletion(-)
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index ce5bc0d9ea28..56bc2b10526b 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -171,6 +171,13 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
If unsure, say 'N'.
+config VIRTIO_MMIO_XENBUS
+ bool "Memory mapped virtio devices parameter parsing"
+ depends on VIRTIO_MMIO && XEN
+ select XEN_XENBUS_FRONTEND
+ help
+ Allow virtio-mmio devices instantiation for Xen guests via xenbus.
+
config VIRTIO_DMA_SHARED_BUFFER
tristate
depends on DMA_SHARED_BUFFER
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index 595c2274fbb5..32295284bdbf 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -70,6 +70,11 @@
#include <uapi/linux/virtio_mmio.h>
#include <linux/virtio_ring.h>
+#ifdef CONFIG_VIRTIO_MMIO_XENBUS
+#include <xen/xen.h>
+#include <xen/xenbus.h>
+#include <xen/events.h>
+#endif
/* The alignment to use between consumer and producer parts of vring.
@@ -810,13 +815,183 @@ static struct platform_driver virtio_mmio_driver = {
},
};
+#ifdef CONFIG_VIRTIO_MMIO_XENBUS
+struct virtio_mmio_xen_info {
+ struct resource resources[2];
+ unsigned int evtchn;
+ struct platform_device *pdev;
+};
+
+static int virtio_mmio_xen_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ int err;
+ long long base, size;
+ char *mem;
+ struct virtio_mmio_xen_info *info;
+ struct xenbus_transaction xbt;
+
+ /* TODO: allocate an unused address here and pass it to the host instead */
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "base", "0x%llx",
+ &base);
+ if (err < 0) {
+ xenbus_dev_fatal(dev, err, "reading base");
+ return -EINVAL;
+ }
+
+ mem = xenbus_read(XBT_NIL, dev->otherend, "size", NULL);
+ if (XENBUS_IS_ERR_READ(mem))
+ return PTR_ERR(mem);
+ size = memparse(mem, NULL);
+ kfree(mem);
+
+ info = kzalloc_obj(*info);
+ if (!info) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+ return -ENOMEM;
+ }
+
+ info->resources[0].flags = IORESOURCE_MEM;
+ info->resources[0].start = base;
+ info->resources[0].end = base + size - 1;
+
+ err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
+ goto error_info;
+ }
+
+ err = bind_evtchn_to_irq(info->evtchn);
+ if (err <= 0) {
+ xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
+ goto error_evtchan;
+ }
+
+ info->resources[1].flags = IORESOURCE_IRQ;
+ info->resources[1].start = info->resources[1].end = err;
+
+again:
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto error_irq;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
+ info->evtchn);
+ if (err) {
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
+ goto error_irq;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ if (err) {
+ if (err == -EAGAIN)
+ goto again;
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto error_irq;
+ }
+
+ dev_set_drvdata(&dev->dev, info);
+ xenbus_switch_state(dev, XenbusStateInitialised);
+ return 0;
+
+error_irq:
+ unbind_from_irqhandler(info->resources[1].start, info);
+error_evtchan:
+ xenbus_free_evtchn(dev, info->evtchn);
+error_info:
+ kfree(info);
+
+ return err;
+}
+
+static void virtio_mmio_xen_backend_changed(struct xenbus_device *dev,
+ enum xenbus_state backend_state)
+{
+ struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
+
+ switch (backend_state) {
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ case XenbusStateReconfiguring:
+ case XenbusStateReconfigured:
+ case XenbusStateUnknown:
+ break;
+
+ case XenbusStateConnected:
+ if (dev->state != XenbusStateInitialised) {
+ dev_warn(&dev->dev, "state %d on connect", dev->state);
+ break;
+ }
+ info->pdev = platform_device_register_resndata(&dev->dev,
+ "virtio-mmio", PLATFORM_DEVID_AUTO,
+ info->resources, ARRAY_SIZE(info->resources), NULL, 0);
+ xenbus_switch_state(dev, XenbusStateConnected);
+ break;
+
+ case XenbusStateClosed:
+ if (dev->state == XenbusStateClosed)
+ break;
+ fallthrough; /* Missed the backend's Closing state. */
+ case XenbusStateClosing:
+ platform_device_unregister(info->pdev);
+ xenbus_switch_state(dev, XenbusStateClosed);
+ break;
+
+ default:
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
+ backend_state);
+ break;
+ }
+}
+
+static void virtio_mmio_xen_remove(struct xenbus_device *dev)
+{
+ struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
+
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+}
+
+static const struct xenbus_device_id virtio_mmio_xen_ids[] = {
+ { "virtio" },
+ { "" },
+};
+
+static struct xenbus_driver virtio_mmio_xen_driver = {
+ .ids = virtio_mmio_xen_ids,
+ .probe = virtio_mmio_xen_probe,
+ .otherend_changed = virtio_mmio_xen_backend_changed,
+ .remove = virtio_mmio_xen_remove,
+};
+#endif
+
static int __init virtio_mmio_init(void)
{
- return platform_driver_register(&virtio_mmio_driver);
+ int ret;
+
+ ret = platform_driver_register(&virtio_mmio_driver);
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_VIRTIO_MMIO_XENBUS
+ if (xen_domain())
+ ret = xenbus_register_frontend(&virtio_mmio_xen_driver);
+#endif
+
+ return ret;
}
static void __exit virtio_mmio_exit(void)
{
+#ifdef CONFIG_VIRTIO_MMIO_XENBUS
+ if (xen_domain())
+ xenbus_unregister_driver(&virtio_mmio_xen_driver);
+#endif
+
platform_driver_unregister(&virtio_mmio_driver);
vm_unregister_cmdline_devices();
}
--
2.53.0
Some minor details from the Xen side of things:
On 29.04.26 15:52, Val Packett wrote:
> The experimental virtio-mmio support for Xen was initially developed
> on aarch64, so device trees were used to configure the mmio devices,
> with arbitrary vGIC interrupts used by the hypervisor. On x86_64
> however, the only reasonable way to interrupt the guest is over Xen
> event channels, which can only be acquired by children of xenbus,
More exact: interdomain event channels need to be connected to a xenbus
device. But you are needing those, so for your use case the above statement
is correct.
> the virtual bus driven by Xen's configuration database, XenStore.
> It is also a more convenient and "Xen-ish" way to provision devices.
>
> Implement a xenbus client for virtio-mmio which negotiates an
> event channel and provides it as a platform IRQ to the
> virtio-mmio driver.
>
>
> Signed-off-by: Val Packett <val@invisiblethingslab.com>
> ---
>
> Hi,
>
> I've been working on porting virtio-mmio support from Arm to x86_64,
> with the goal of running vhost-user-gpu to power Wayland/GPU integration
> for Qubes OS. (I'm aware of various proposals for alternative virtio
> transports but virtio-mmio seems to be the only one that *is* upstream
> already and just Works..) Setting up virtio-mmio through xenbus, initially
> motivated just by event channels being the only real way to get interrupts
> working on HVM, turned out to generally be quite pleasant and nice :)
>
> I'd like to get some early feedback for this patch, particularly
> the general stuff:
>
> * is this whole thing acceptable in general?
> * should it be extracted into a different file?
> * (from the Xen side) any input on the xenstore keys, what goes where?
You should add some documentation in the Xen source tree regarding the
Xenstore keys (see docs/misc/xenstore-paths.pandoc there).
> * anything else to keep in mind?
>
> It does seem simple enough, so hopefully this can be done?
>
> The corresponding userspace-side WIP is available at:
> https://github.com/QubesOS/xen-vhost-frontend
>
> And the required DMOP for firing the evtchn events will be sent
> to xen-devel shortly as well.
>
> Thanks,
> ~val
>
> ---
> drivers/virtio/Kconfig | 7 ++
> drivers/virtio/virtio_mmio.c | 177 ++++++++++++++++++++++++++++++++++-
> 2 files changed, 183 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index ce5bc0d9ea28..56bc2b10526b 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -171,6 +171,13 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
>
> If unsure, say 'N'.
>
> +config VIRTIO_MMIO_XENBUS
> + bool "Memory mapped virtio devices parameter parsing"
> + depends on VIRTIO_MMIO && XEN
> + select XEN_XENBUS_FRONTEND
> + help
> + Allow virtio-mmio devices instantiation for Xen guests via xenbus.
> +
> config VIRTIO_DMA_SHARED_BUFFER
> tristate
> depends on DMA_SHARED_BUFFER
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 595c2274fbb5..32295284bdbf 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -70,6 +70,11 @@
> #include <uapi/linux/virtio_mmio.h>
> #include <linux/virtio_ring.h>
>
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/events.h>
> +#endif
>
>
> /* The alignment to use between consumer and producer parts of vring.
> @@ -810,13 +815,183 @@ static struct platform_driver virtio_mmio_driver = {
> },
> };
>
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> +struct virtio_mmio_xen_info {
> + struct resource resources[2];
> + unsigned int evtchn;
> + struct platform_device *pdev;
> +};
> +
> +static int virtio_mmio_xen_probe(struct xenbus_device *dev,
> + const struct xenbus_device_id *id)
> +{
> + int err;
> + long long base, size;
> + char *mem;
> + struct virtio_mmio_xen_info *info;
> + struct xenbus_transaction xbt;
> +
> + /* TODO: allocate an unused address here and pass it to the host instead */
Indeed.
> + err = xenbus_scanf(XBT_NIL, dev->otherend, "base", "0x%llx",
> + &base);
> + if (err < 0) {
> + xenbus_dev_fatal(dev, err, "reading base");
> + return -EINVAL;
> + }
> +
> + mem = xenbus_read(XBT_NIL, dev->otherend, "size", NULL);
> + if (XENBUS_IS_ERR_READ(mem))
> + return PTR_ERR(mem);
> + size = memparse(mem, NULL);
> + kfree(mem);
> +
> + info = kzalloc_obj(*info);
> + if (!info) {
> + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
> + return -ENOMEM;
> + }
> +
> + info->resources[0].flags = IORESOURCE_MEM;
> + info->resources[0].start = base;
> + info->resources[0].end = base + size - 1;
> +
> + err = xenbus_alloc_evtchn(dev, &info->evtchn);
> + if (err) {
> + xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
> + goto error_info;
> + }
> +
> + err = bind_evtchn_to_irq(info->evtchn);
> + if (err <= 0) {
> + xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
> + goto error_evtchan;
> + }
> +
> + info->resources[1].flags = IORESOURCE_IRQ;
> + info->resources[1].start = info->resources[1].end = err;
> +
> +again:
> + err = xenbus_transaction_start(&xbt);
No need to use a Xenstore transaction here. The written node(s) are
regarded to be valid only after calling xenbus_switch_state() to set
the frontend state to XenbusStateInitialised.
> + if (err) {
> + xenbus_dev_fatal(dev, err, "starting transaction");
> + goto error_irq;
> + }
> +
> + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
> + info->evtchn);
With allocation of the base address you'd want to write it to another node,
of course.
> + if (err) {
> + xenbus_transaction_end(xbt, 1);
> + xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
> + goto error_irq;
> + }
> +
> + err = xenbus_transaction_end(xbt, 0);
> + if (err) {
> + if (err == -EAGAIN)
> + goto again;
> + xenbus_dev_fatal(dev, err, "completing transaction");
> + goto error_irq;
> + }
> +
> + dev_set_drvdata(&dev->dev, info);
> + xenbus_switch_state(dev, XenbusStateInitialised);
> + return 0;
> +
> +error_irq:
> + unbind_from_irqhandler(info->resources[1].start, info);
> +error_evtchan:
> + xenbus_free_evtchn(dev, info->evtchn);
> +error_info:
> + kfree(info);
> +
> + return err;
> +}
> +
> +static void virtio_mmio_xen_backend_changed(struct xenbus_device *dev,
> + enum xenbus_state backend_state)
> +{
> + struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
> +
> + switch (backend_state) {
> + case XenbusStateInitialising:
> + case XenbusStateInitWait:
> + case XenbusStateInitialised:
> + case XenbusStateReconfiguring:
> + case XenbusStateReconfigured:
> + case XenbusStateUnknown:
> + break;
> +
> + case XenbusStateConnected:
> + if (dev->state != XenbusStateInitialised) {
> + dev_warn(&dev->dev, "state %d on connect", dev->state);
> + break;
> + }
> + info->pdev = platform_device_register_resndata(&dev->dev,
> + "virtio-mmio", PLATFORM_DEVID_AUTO,
> + info->resources, ARRAY_SIZE(info->resources), NULL, 0);
> + xenbus_switch_state(dev, XenbusStateConnected);
> + break;
> +
> + case XenbusStateClosed:
> + if (dev->state == XenbusStateClosed)
> + break;
> + fallthrough; /* Missed the backend's Closing state. */
> + case XenbusStateClosing:
> + platform_device_unregister(info->pdev);
> + xenbus_switch_state(dev, XenbusStateClosed);
> + break;
> +
> + default:
> + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
> + backend_state);
> + break;
> + }
> +}
> +
> +static void virtio_mmio_xen_remove(struct xenbus_device *dev)
> +{
> + struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
> +
> + kfree(info);
> + dev_set_drvdata(&dev->dev, NULL);
> +}
> +
> +static const struct xenbus_device_id virtio_mmio_xen_ids[] = {
> + { "virtio" },
Please use "virtio-mmio" here, as I could imagine "virtio-pci" devices, too.
Juergen
> + { "" },
> +};
> +
> +static struct xenbus_driver virtio_mmio_xen_driver = {
> + .ids = virtio_mmio_xen_ids,
> + .probe = virtio_mmio_xen_probe,
> + .otherend_changed = virtio_mmio_xen_backend_changed,
> + .remove = virtio_mmio_xen_remove,
> +};
> +#endif
> +
> static int __init virtio_mmio_init(void)
> {
> - return platform_driver_register(&virtio_mmio_driver);
> + int ret;
> +
> + ret = platform_driver_register(&virtio_mmio_driver);
> + if (ret)
> + return ret;
> +
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> + if (xen_domain())
> + ret = xenbus_register_frontend(&virtio_mmio_xen_driver);
> +#endif
> +
> + return ret;
> }
>
> static void __exit virtio_mmio_exit(void)
> {
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> + if (xen_domain())
> + xenbus_unregister_driver(&virtio_mmio_xen_driver);
> +#endif
> +
> platform_driver_unregister(&virtio_mmio_driver);
> vm_unregister_cmdline_devices();
> }
On 4/29/26 12:35 PM, Jürgen Groß wrote:
> Some minor details from the Xen side of things:
>
> On 29.04.26 15:52, Val Packett wrote:
>> The experimental virtio-mmio support for Xen was initially developed
>> on aarch64, so device trees were used to configure the mmio devices,
>> with arbitrary vGIC interrupts used by the hypervisor. On x86_64
>> however, the only reasonable way to interrupt the guest is over Xen
>> event channels, which can only be acquired by children of xenbus,
>
> More exact: interdomain event channels need to be connected to a xenbus
> device. But you are needing those, so for your use case the above
> statement
> is correct.
>
>> the virtual bus driven by Xen's configuration database, XenStore.
>> It is also a more convenient and "Xen-ish" way to provision devices.
>>
>> Implement a xenbus client for virtio-mmio which negotiates an
>> event channel and provides it as a platform IRQ to the
>> virtio-mmio driver.
>>
>>
>> Signed-off-by: Val Packett <val@invisiblethingslab.com>
>> ---
>>
>> Hi,
>>
>> I've been working on porting virtio-mmio support from Arm to x86_64,
>> with the goal of running vhost-user-gpu to power Wayland/GPU integration
>> for Qubes OS. (I'm aware of various proposals for alternative virtio
>> transports but virtio-mmio seems to be the only one that *is* upstream
>> already and just Works..) Setting up virtio-mmio through xenbus,
>> initially
>> motivated just by event channels being the only real way to get
>> interrupts
>> working on HVM, turned out to generally be quite pleasant and nice :)
>>
>> I'd like to get some early feedback for this patch, particularly
>> the general stuff:
>>
>> * is this whole thing acceptable in general?
>> * should it be extracted into a different file?
>> * (from the Xen side) any input on the xenstore keys, what goes where?
>
> You should add some documentation in the Xen source tree regarding the
> Xenstore keys (see docs/misc/xenstore-paths.pandoc there).
Ack, thanks!
>> […]
>>
>> +again:
>> + err = xenbus_transaction_start(&xbt);
>
> No need to use a Xenstore transaction here. The written node(s) are
> regarded to be valid only after calling xenbus_switch_state() to set
> the frontend state to XenbusStateInitialised.
Oh, I assumed transactions were required for writing from the kernel to
work at all…
>> [..]
>>
>> +static const struct xenbus_device_id virtio_mmio_xen_ids[] = {
>> + { "virtio" },
>
> Please use "virtio-mmio" here, as I could imagine "virtio-pci"
> devices, too.
Ack. Would actually also distinguish it from the initial Arm
proof-of-concept version…
Thanks,
~val
Hello,
Le 29/04/2026 à 16:18, Val Packett a écrit :
> The experimental virtio-mmio support for Xen was initially developed
> on aarch64, so device trees were used to configure the mmio devices,
> with arbitrary vGIC interrupts used by the hypervisor. On x86_64
> however, the only reasonable way to interrupt the guest is over Xen
> event channels, which can only be acquired by children of xenbus,
> the virtual bus driven by Xen's configuration database, XenStore.
> It is also a more convenient and "Xen-ish" way to provision devices.
>
> Implement a xenbus client for virtio-mmio which negotiates an
> event channel and provides it as a platform IRQ to the
> virtio-mmio driver.
>
>
> Signed-off-by: Val Packett <val@invisiblethingslab.com>
> ---
>
> Hi,
>
> I've been working on porting virtio-mmio support from Arm to x86_64,
> with the goal of running vhost-user-gpu to power Wayland/GPU integration
> for Qubes OS. (I'm aware of various proposals for alternative virtio
> transports but virtio-mmio seems to be the only one that *is* upstream
> already and just Works..) Setting up virtio-mmio through xenbus, initially
> motivated just by event channels being the only real way to get interrupts
> working on HVM, turned out to generally be quite pleasant and nice :)
Is it HVM specific, or can we also make it work for PVH (we can actually
attach a ioreq server to PVH guests) ?
>
> I'd like to get some early feedback for this patch, particularly
> the general stuff:
>
> * is this whole thing acceptable in general?
> * should it be extracted into a different file?
> * (from the Xen side) any input on the xenstore keys, what goes where?
> * anything else to keep in mind?
>
> It does seem simple enough, so hopefully this can be done?
>
> The corresponding userspace-side WIP is available at:
> https://github.com/QubesOS/xen-vhost-frontend
>
> And the required DMOP for firing the evtchn events will be sent
> to xen-devel shortly as well.
Could that be done through evtchn_send (or its userland counterpart) ?
>
> Thanks,
> ~val
>
> ---
> drivers/virtio/Kconfig | 7 ++
> drivers/virtio/virtio_mmio.c | 177 ++++++++++++++++++++++++++++++++++-
> 2 files changed, 183 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index ce5bc0d9ea28..56bc2b10526b 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -171,6 +171,13 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
>
> If unsure, say 'N'.
>
> +config VIRTIO_MMIO_XENBUS
> + bool "Memory mapped virtio devices parameter parsing"
that text seems to miss the xenbus aspect
> + depends on VIRTIO_MMIO && XEN
> + select XEN_XENBUS_FRONTEND
> + help
> + Allow virtio-mmio devices instantiation for Xen guests via xenbus.
> +
> config VIRTIO_DMA_SHARED_BUFFER
> tristate
> depends on DMA_SHARED_BUFFER
> diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
> index 595c2274fbb5..32295284bdbf 100644
> --- a/drivers/virtio/virtio_mmio.c
> +++ b/drivers/virtio/virtio_mmio.c
> @@ -70,6 +70,11 @@
> #include <uapi/linux/virtio_mmio.h>
> #include <linux/virtio_ring.h>
>
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> +#include <xen/xen.h>
> +#include <xen/xenbus.h>
> +#include <xen/events.h>
> +#endif
>
>
> /* The alignment to use between consumer and producer parts of vring.
> @@ -810,13 +815,183 @@ static struct platform_driver virtio_mmio_driver = {
> },
> };
>
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> +struct virtio_mmio_xen_info {
> + struct resource resources[2];
> + unsigned int evtchn;
> + struct platform_device *pdev;
> +};
> +
> +static int virtio_mmio_xen_probe(struct xenbus_device *dev,
> + const struct xenbus_device_id *id)
> +{
> + int err;
> + long long base, size;
> + char *mem;
> + struct virtio_mmio_xen_info *info;
> + struct xenbus_transaction xbt;
> +
> + /* TODO: allocate an unused address here and pass it to the host instead */
> + err = xenbus_scanf(XBT_NIL, dev->otherend, "base", "0x%llx",
> + &base);
> + if (err < 0) {
> + xenbus_dev_fatal(dev, err, "reading base");
> + return -EINVAL;
> + }
> +
> + mem = xenbus_read(XBT_NIL, dev->otherend, "size", NULL);
> + if (XENBUS_IS_ERR_READ(mem))
> + return PTR_ERR(mem);
> + size = memparse(mem, NULL);
> + kfree(mem);
> +
> + info = kzalloc_obj(*info);
> + if (!info) {
> + xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
> + return -ENOMEM;
> + }
> +
> + info->resources[0].flags = IORESOURCE_MEM;
> + info->resources[0].start = base;
> + info->resources[0].end = base + size - 1;
> +
> + err = xenbus_alloc_evtchn(dev, &info->evtchn);
> + if (err) {
> + xenbus_dev_fatal(dev, err, "xenbus_alloc_evtchn");
> + goto error_info;
> + }
> +
> + err = bind_evtchn_to_irq(info->evtchn);
> + if (err <= 0) {
> + xenbus_dev_fatal(dev, err, "bind_evtchn_to_irq");
> + goto error_evtchan;
> + }
> +
> + info->resources[1].flags = IORESOURCE_IRQ;
> + info->resources[1].start = info->resources[1].end = err;
> +
> +again:
> + err = xenbus_transaction_start(&xbt);
> + if (err) {
> + xenbus_dev_fatal(dev, err, "starting transaction");
> + goto error_irq;
> + }
> +
> + err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
> + info->evtchn);
> + if (err) {
> + xenbus_transaction_end(xbt, 1);
> + xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
> + goto error_irq;
> + }
> +
> + err = xenbus_transaction_end(xbt, 0);
> + if (err) {
> + if (err == -EAGAIN)
> + goto again;
> + xenbus_dev_fatal(dev, err, "completing transaction");
> + goto error_irq;
> + }
> +
> + dev_set_drvdata(&dev->dev, info);
> + xenbus_switch_state(dev, XenbusStateInitialised);
> + return 0;
> +
> +error_irq:
> + unbind_from_irqhandler(info->resources[1].start, info);
> +error_evtchan:
> + xenbus_free_evtchn(dev, info->evtchn);
> +error_info:
> + kfree(info);
> +
> + return err;
> +}
> +
> +static void virtio_mmio_xen_backend_changed(struct xenbus_device *dev,
> + enum xenbus_state backend_state)
> +{
> + struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
> +
> + switch (backend_state) {
> + case XenbusStateInitialising:
> + case XenbusStateInitWait:
> + case XenbusStateInitialised:
> + case XenbusStateReconfiguring:
> + case XenbusStateReconfigured:
> + case XenbusStateUnknown:
> + break;
> +
> + case XenbusStateConnected:
> + if (dev->state != XenbusStateInitialised) {
> + dev_warn(&dev->dev, "state %d on connect", dev->state);
> + break;
> + }
> + info->pdev = platform_device_register_resndata(&dev->dev,
> + "virtio-mmio", PLATFORM_DEVID_AUTO,
> + info->resources, ARRAY_SIZE(info->resources), NULL, 0);
> + xenbus_switch_state(dev, XenbusStateConnected);
> + break;
> +
> + case XenbusStateClosed:
> + if (dev->state == XenbusStateClosed)
> + break;
> + fallthrough; /* Missed the backend's Closing state. */
> + case XenbusStateClosing:
> + platform_device_unregister(info->pdev);
> + xenbus_switch_state(dev, XenbusStateClosed);
> + break;
> +
> + default:
> + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
> + backend_state);
> + break;
> + }
> +}
> +
In some way, we're defining a new "PV driver" which is a virtio-mmio
one, I guess we can eventually specific some form of protocol that
backend/frontend would need to follow ?
> +static void virtio_mmio_xen_remove(struct xenbus_device *dev)
> +{
> + struct virtio_mmio_xen_info *info = dev_get_drvdata(&dev->dev);
> +
> + kfree(info);
> + dev_set_drvdata(&dev->dev, NULL);
> +}
> +
> +static const struct xenbus_device_id virtio_mmio_xen_ids[] = {
> + { "virtio" },
> + { "" },
> +};
> +
> +static struct xenbus_driver virtio_mmio_xen_driver = {
> + .ids = virtio_mmio_xen_ids,
> + .probe = virtio_mmio_xen_probe,
> + .otherend_changed = virtio_mmio_xen_backend_changed,
> + .remove = virtio_mmio_xen_remove,
> +};
> +#endif
> +
> static int __init virtio_mmio_init(void)
> {
> - return platform_driver_register(&virtio_mmio_driver);
> + int ret;
> +
> + ret = platform_driver_register(&virtio_mmio_driver);
> + if (ret)
> + return ret;
> +
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> + if (xen_domain())
> + ret = xenbus_register_frontend(&virtio_mmio_xen_driver);
> +#endif> +
> + return ret;
> }
>
> static void __exit virtio_mmio_exit(void)
> {
> +#ifdef CONFIG_VIRTIO_MMIO_XENBUS
> + if (xen_domain())
> + xenbus_unregister_driver(&virtio_mmio_xen_driver);
> +#endif
> +
> platform_driver_unregister(&virtio_mmio_driver);
> vm_unregister_cmdline_devices();
> }
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
On 4/29/26 11:41 AM, Teddy Astie wrote: > Hello, > > Le 29/04/2026 à 16:18, Val Packett a écrit : >> […] >> >> I've been working on porting virtio-mmio support from Arm to x86_64, >> with the goal of running vhost-user-gpu to power Wayland/GPU integration >> for Qubes OS. (I'm aware of various proposals for alternative virtio >> transports but virtio-mmio seems to be the only one that *is* upstream >> already and just Works..) Setting up virtio-mmio through xenbus, initially >> motivated just by event channels being the only real way to get interrupts >> working on HVM, turned out to generally be quite pleasant and nice :) > Is it HVM specific, or can we also make it work for PVH (we can actually > attach a ioreq server to PVH guests) ? Sorry, typo, I did mean PVH of course! I've been testing this with PVH guests + PV dom0, with my PV alloc_ioreq fix: https://lore.kernel.org/all/20251126062124.117425-1-val@invisiblethingslab.com/ (Time to resend that one as a non-RFC I guess…) HVM actually does have legacy ISA interrupts (which are often used with virtio-mmio on KVM), funnily enough, and I've tried firing those from a DMOP but that silly thing didn't work properly. >> I'd like to get some early feedback for this patch, particularly >> the general stuff: >> >> * is this whole thing acceptable in general? >> * should it be extracted into a different file? >> * (from the Xen side) any input on the xenstore keys, what goes where? >> * anything else to keep in mind? >> >> It does seem simple enough, so hopefully this can be done? >> >> The corresponding userspace-side WIP is available at: >> https://github.com/QubesOS/xen-vhost-frontend >> >> And the required DMOP for firing the evtchn events will be sent >> to xen-devel shortly as well. > Could that be done through evtchn_send (or its userland counterpart) ? Actually, yes… The use of DMOPs is only dictated by the current Linux privcmd.c code (the irqfds created by the kernel react to events by executing HYPERVISOR_dm_op with a stored operation), we can avoid the need to modify Xen by simply expanding the privcmd driver to make "evtchn fds". Sounds good, will do. >> [..] >> >> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig >> index ce5bc0d9ea28..56bc2b10526b 100644 >> --- a/drivers/virtio/Kconfig >> +++ b/drivers/virtio/Kconfig >> @@ -171,6 +171,13 @@ config VIRTIO_MMIO_CMDLINE_DEVICES >> >> If unsure, say 'N'. >> >> +config VIRTIO_MMIO_XENBUS >> + bool "Memory mapped virtio devices parameter parsing" > that text seems to miss the xenbus aspect Yep, didn't change that yet, ack >> [..] > In some way, we're defining a new "PV driver" which is a virtio-mmio > one, I guess we can eventually specific some form of protocol that > backend/frontend would need to follow ? Right, Jürgen mentioned documenting the keys in the xenstore-paths doc.. would the entire "protocol" (keys + state transition logic) fit into that? The keys are currently derived from the initial Arm prototype which wasn't actually using xenbus properly (the guest driver was configured by a device tree node, but the ioreq server used xenstore keys, without properly transitioning between states). Thanks, ~val
Le 30/04/2026 à 06:06, Val Packett a écrit :
>
> On 4/29/26 11:41 AM, Teddy Astie wrote:
>> Hello,
>>
>> Le 29/04/2026 à 16:18, Val Packett a écrit :
>>> […]
>>>
>>> I've been working on porting virtio-mmio support from Arm to x86_64,
>>> with the goal of running vhost-user-gpu to power Wayland/GPU integration
>>> for Qubes OS. (I'm aware of various proposals for alternative virtio
>>> transports but virtio-mmio seems to be the only one that *is* upstream
>>> already and just Works..) Setting up virtio-mmio through xenbus,
>>> initially
>>> motivated just by event channels being the only real way to get
>>> interrupts
>>> working on HVM, turned out to generally be quite pleasant and nice :)
>> Is it HVM specific, or can we also make it work for PVH (we can actually
>> attach a ioreq server to PVH guests) ?
>
> Sorry, typo, I did mean PVH of course!
>
> I've been testing this with PVH guests + PV dom0, with my PV alloc_ioreq
> fix:
> https://lore.kernel.org/all/20251126062124.117425-1-
> val@invisiblethingslab.com/
>
> (Time to resend that one as a non-RFC I guess…)
>
> HVM actually does have legacy ISA interrupts (which are often used with
> virtio-mmio on KVM), funnily enough, and I've tried firing those from a
> DMOP but that silly thing didn't work properly.
>
>>> I'd like to get some early feedback for this patch, particularly
>>> the general stuff:
>>>
>>> * is this whole thing acceptable in general?
>>> * should it be extracted into a different file?
>>> * (from the Xen side) any input on the xenstore keys, what goes where?
>>> * anything else to keep in mind?
>>>
>>> It does seem simple enough, so hopefully this can be done?
>>>
>>> The corresponding userspace-side WIP is available at:
>>> https://github.com/QubesOS/xen-vhost-frontend
>>>
>>> And the required DMOP for firing the evtchn events will be sent
>>> to xen-devel shortly as well.
>> Could that be done through evtchn_send (or its userland counterpart) ?
>
> Actually, yes… The use of DMOPs is only dictated by the current Linux
> privcmd.c code (the irqfds created by the kernel react to events by
> executing HYPERVISOR_dm_op with a stored operation), we can avoid the
> need to modify Xen by simply expanding the privcmd driver to make
> "evtchn fds". Sounds good, will do.
>
Given that the event channel used by device models is exposed through
ioreq.vp_eport ("evtchn for notifications to/from device model"). I
don't think you need to expand the privcmd interface, and you should be
able to do this instead :
open /dev/xen/evtchn
perform IOCTL_EVTCHN_BIND_INTERDOMAIN (for each guest vCPU)
with remote_domain=guest_domid, remote_port=ioreq.vp_eport
Then interact with the event channel through IOCTL_EVTCHN_NOTIFY (with
local port given by IOCTL_EVTCHN_BIND_INTERDOMAIN) and read/write on the
file descriptor.
I have some experimental Rust code to work with event channels [1], but
I think you can find similar code in multiples places.
[1]
https://github.com/TSnake41/rust-vmm-xen/blob/redesign-proposal/xen/src/event/mod.rs
https://github.com/TSnake41/rust-vmm-xen/blob/redesign-proposal/xen-unix/src/event/mod.rs
>>> [..]
>>>
>>> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
>>> index ce5bc0d9ea28..56bc2b10526b 100644
>>> --- a/drivers/virtio/Kconfig
>>> +++ b/drivers/virtio/Kconfig
>>> @@ -171,6 +171,13 @@ config VIRTIO_MMIO_CMDLINE_DEVICES
>>> If unsure, say 'N'.
>>> +config VIRTIO_MMIO_XENBUS
>>> + bool "Memory mapped virtio devices parameter parsing"
>> that text seems to miss the xenbus aspect
> Yep, didn't change that yet, ack
>>> [..]
>> In some way, we're defining a new "PV driver" which is a virtio-mmio
>> one, I guess we can eventually specific some form of protocol that
>> backend/frontend would need to follow ?
>
> Right, Jürgen mentioned documenting the keys in the xenstore-paths doc..
> would the entire "protocol" (keys + state transition logic) fit into that?
>
> The keys are currently derived from the initial Arm prototype which
> wasn't actually using xenbus properly (the guest driver was configured
> by a device tree node, but the ioreq server used xenstore keys, without
> properly transitioning between states).
>
>
> Thanks,
> ~val
>
>
Teddy
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
On 4/30/26 5:11 AM, Teddy Astie wrote:
> Le 30/04/2026 à 06:06, Val Packett a écrit :
>> On 4/29/26 11:41 AM, Teddy Astie wrote:
>>> Hello,
>>>
>>> Le 29/04/2026 à 16:18, Val Packett a écrit :
>>>> […]
>>>>
>>>> I've been working on porting virtio-mmio support from Arm to x86_64,
>>>> with the goal of running vhost-user-gpu to power Wayland/GPU integration
>>>> for Qubes OS. (I'm aware of various proposals for alternative virtio
>>>> transports but virtio-mmio seems to be the only one that *is* upstream
>>>> already and just Works..) Setting up virtio-mmio through xenbus,
>>>> initially
>>>> motivated just by event channels being the only real way to get
>>>> interrupts
>>>> working on HVM, turned out to generally be quite pleasant and nice :)
>>> Is it HVM specific, or can we also make it work for PVH (we can actually
>>> attach a ioreq server to PVH guests) ?
>> Sorry, typo, I did mean PVH of course!
>>
>> I've been testing this with PVH guests + PV dom0, with my PV alloc_ioreq
>> fix:
>> https://lore.kernel.org/all/20251126062124.117425-1-
>> val@invisiblethingslab.com/
>>
>> (Time to resend that one as a non-RFC I guess…)
>>
>> HVM actually does have legacy ISA interrupts (which are often used with
>> virtio-mmio on KVM), funnily enough, and I've tried firing those from a
>> DMOP but that silly thing didn't work properly.
>>
>>>> I'd like to get some early feedback for this patch, particularly
>>>> the general stuff:
>>>>
>>>> * is this whole thing acceptable in general?
>>>> * should it be extracted into a different file?
>>>> * (from the Xen side) any input on the xenstore keys, what goes where?
>>>> * anything else to keep in mind?
>>>>
>>>> It does seem simple enough, so hopefully this can be done?
>>>>
>>>> The corresponding userspace-side WIP is available at:
>>>> https://github.com/QubesOS/xen-vhost-frontend
>>>>
>>>> And the required DMOP for firing the evtchn events will be sent
>>>> to xen-devel shortly as well.
>>> Could that be done through evtchn_send (or its userland counterpart) ?
>> Actually, yes… The use of DMOPs is only dictated by the current Linux
>> privcmd.c code (the irqfds created by the kernel react to events by
>> executing HYPERVISOR_dm_op with a stored operation), we can avoid the
>> need to modify Xen by simply expanding the privcmd driver to make
>> "evtchn fds". Sounds good, will do.
>>
> Given that the event channel used by device models is exposed through
> ioreq.vp_eport ("evtchn for notifications to/from device model"). I
> don't think you need to expand the privcmd interface, and you should be
> able to do this instead :
>
> open /dev/xen/evtchn
> perform IOCTL_EVTCHN_BIND_INTERDOMAIN (for each guest vCPU)
> with remote_domain=guest_domid, remote_port=ioreq.vp_eport
>
> Then interact with the event channel through IOCTL_EVTCHN_NOTIFY (with
> local port given by IOCTL_EVTCHN_BIND_INTERDOMAIN) and read/write on the
> file descriptor.
So the reason there's currently an ioctl to bind an eventfd to fire a
stored DMOP is that the whole idea is to (efficiently!) support generic,
hypervisor-neutral device server implementations via the vhost-user
protocol.
Now of course, the current implementation isn't *entirely*
hypervisor-neutral as e.g. the vm-memory Rust crate (inside of the
"neutral" vhost-user device servers) does need to be built with the
`xen` feature. But still, that's how it works. What can be made generic
is generic.
xen-vhost-frontend, which is the thing that integrates these with Xen,
actually used to handle the interrupts in userspace[1] by firing the
DMOP itself (which is where I could "just replace that with
IOCTL_EVTCHN_NOTIFY") but that was offloaded to the kernel with the
introduction of IOCTL_PRIVCMD_IRQFD[2], similarly to KVM_IRQFD.
Switching back to handling the eventfd in userspace would be a literal
deoptimization :)
While throwing away the whole generic layer to do a fully integrated
use-case-specific thing sounds more difficult/tedious than this, and not
necessarily desirable in general.
[1]:
https://github.com/vireshk/xen-vhost-frontend/commit/06d59035f8a387c0f600931d09dfaa27b80ede7f
[2]:
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?id=f8941e6c4c712948663ec5d7bbb546f1a0f4e3f6
~val
Le 30/04/2026 à 10:51, Val Packett a écrit :
>
> On 4/30/26 5:11 AM, Teddy Astie wrote:
>> Le 30/04/2026 à 06:06, Val Packett a écrit :
>>> On 4/29/26 11:41 AM, Teddy Astie wrote:
>>>> Hello,
>>>>
>>>> Le 29/04/2026 à 16:18, Val Packett a écrit :
>>>>> […]
>>>>>
>>>>> I've been working on porting virtio-mmio support from Arm to x86_64,
>>>>> with the goal of running vhost-user-gpu to power Wayland/GPU
>>>>> integration
>>>>> for Qubes OS. (I'm aware of various proposals for alternative virtio
>>>>> transports but virtio-mmio seems to be the only one that *is* upstream
>>>>> already and just Works..) Setting up virtio-mmio through xenbus,
>>>>> initially
>>>>> motivated just by event channels being the only real way to get
>>>>> interrupts
>>>>> working on HVM, turned out to generally be quite pleasant and nice :)
>>>> Is it HVM specific, or can we also make it work for PVH (we can
>>>> actually
>>>> attach a ioreq server to PVH guests) ?
>>> Sorry, typo, I did mean PVH of course!
>>>
>>> I've been testing this with PVH guests + PV dom0, with my PV alloc_ioreq
>>> fix:
>>> https://lore.kernel.org/all/20251126062124.117425-1-
>>> val@invisiblethingslab.com/
>>>
>>> (Time to resend that one as a non-RFC I guess…)
>>>
>>> HVM actually does have legacy ISA interrupts (which are often used with
>>> virtio-mmio on KVM), funnily enough, and I've tried firing those from a
>>> DMOP but that silly thing didn't work properly.
>>>
>>>>> I'd like to get some early feedback for this patch, particularly
>>>>> the general stuff:
>>>>>
>>>>> * is this whole thing acceptable in general?
>>>>> * should it be extracted into a different file?
>>>>> * (from the Xen side) any input on the xenstore keys, what goes where?
>>>>> * anything else to keep in mind?
>>>>>
>>>>> It does seem simple enough, so hopefully this can be done?
>>>>>
>>>>> The corresponding userspace-side WIP is available at:
>>>>> https://github.com/QubesOS/xen-vhost-frontend
>>>>>
>>>>> And the required DMOP for firing the evtchn events will be sent
>>>>> to xen-devel shortly as well.
>>>> Could that be done through evtchn_send (or its userland counterpart) ?
>>> Actually, yes… The use of DMOPs is only dictated by the current Linux
>>> privcmd.c code (the irqfds created by the kernel react to events by
>>> executing HYPERVISOR_dm_op with a stored operation), we can avoid the
>>> need to modify Xen by simply expanding the privcmd driver to make
>>> "evtchn fds". Sounds good, will do.
>>>
>> Given that the event channel used by device models is exposed through
>> ioreq.vp_eport ("evtchn for notifications to/from device model"). I
>> don't think you need to expand the privcmd interface, and you should be
>> able to do this instead :
>>
>> open /dev/xen/evtchn
>> perform IOCTL_EVTCHN_BIND_INTERDOMAIN (for each guest vCPU)
>> with remote_domain=guest_domid, remote_port=ioreq.vp_eport
>>
>> Then interact with the event channel through IOCTL_EVTCHN_NOTIFY (with
>> local port given by IOCTL_EVTCHN_BIND_INTERDOMAIN) and read/write on the
>> file descriptor.
>
> So the reason there's currently an ioctl to bind an eventfd to fire a
> stored DMOP is that the whole idea is to (efficiently!) support generic,
> hypervisor-neutral device server implementations via the vhost-user
> protocol.
>
> Now of course, the current implementation isn't *entirely* hypervisor-
> neutral as e.g. the vm-memory Rust crate (inside of the "neutral" vhost-
> user device servers) does need to be built with the `xen` feature. But
> still, that's how it works. What can be made generic is generic.
>
> xen-vhost-frontend, which is the thing that integrates these with Xen,
> actually used to handle the interrupts in userspace[1] by firing the
> DMOP itself (which is where I could "just replace that with
> IOCTL_EVTCHN_NOTIFY") but that was offloaded to the kernel with the
> introduction of IOCTL_PRIVCMD_IRQFD[2], similarly to KVM_IRQFD.
>
I think what would be preferable for your usecase would be to have a way
to bind a event channel with a eventfd object, which should be a
primitive that lives in the evtchn device.
The current interface kinda assume that you're looking to emulate a
completely emulated virtio device with no Xen specifics, it looks like
it's not exactly what you're implementing.
As you actually plan to switch to using event channels for notifying the
guest, I think it would be preferable to do the same the other way
(event channels to notify the host) so you only have event channels to
worry about here.
> Switching back to handling the eventfd in userspace would be a literal
> deoptimization :)
> > While throwing away the whole generic layer to do a fully integrated
> use-case-specific thing sounds more difficult/tedious than this, and not
> necessarily desirable in general.
>
> [1]: https://github.com/vireshk/xen-vhost-frontend/
> commit/06d59035f8a387c0f600931d09dfaa27b80ede7f
> [2]: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-
> next.git/commit/?id=f8941e6c4c712948663ec5d7bbb546f1a0f4e3f6
>
> ~val
>
>
--
Teddy Astie | Vates XCP-ng Developer
XCP-ng & Xen Orchestra - Vates solutions
web: https://vates.tech
On 4/30/26 10:47 AM, Teddy Astie wrote:
> Le 30/04/2026 à 10:51, Val Packett a écrit :
>> On 4/30/26 5:11 AM, Teddy Astie wrote:
>>> Le 30/04/2026 à 06:06, Val Packett a écrit :
>>>> [..]
>>>>>> I'd like to get some early feedback for this patch, particularly
>>>>>> the general stuff:
>>>>>>
>>>>>> * is this whole thing acceptable in general?
>>>>>> * should it be extracted into a different file?
>>>>>> * (from the Xen side) any input on the xenstore keys, what goes where?
>>>>>> * anything else to keep in mind?
>>>>>>
>>>>>> It does seem simple enough, so hopefully this can be done?
>>>>>>
>>>>>> The corresponding userspace-side WIP is available at:
>>>>>> https://github.com/QubesOS/xen-vhost-frontend
>>>>>>
>>>>>> And the required DMOP for firing the evtchn events will be sent
>>>>>> to xen-devel shortly as well.
>>>>> Could that be done through evtchn_send (or its userland counterpart) ?
>>>> Actually, yes… The use of DMOPs is only dictated by the current Linux
>>>> privcmd.c code (the irqfds created by the kernel react to events by
>>>> executing HYPERVISOR_dm_op with a stored operation), we can avoid the
>>>> need to modify Xen by simply expanding the privcmd driver to make
>>>> "evtchn fds". Sounds good, will do.
>>>>
>>> Given that the event channel used by device models is exposed through
>>> ioreq.vp_eport ("evtchn for notifications to/from device model"). I
>>> don't think you need to expand the privcmd interface, and you should be
>>> able to do this instead :
>>>
>>> open /dev/xen/evtchn
>>> perform IOCTL_EVTCHN_BIND_INTERDOMAIN (for each guest vCPU)
>>> with remote_domain=guest_domid, remote_port=ioreq.vp_eport
>>>
>>> Then interact with the event channel through IOCTL_EVTCHN_NOTIFY (with
>>> local port given by IOCTL_EVTCHN_BIND_INTERDOMAIN) and read/write on the
>>> file descriptor.
>> So the reason there's currently an ioctl to bind an eventfd to fire a
>> stored DMOP is that the whole idea is to (efficiently!) support generic,
>> hypervisor-neutral device server implementations via the vhost-user
>> protocol.
>>
>> Now of course, the current implementation isn't *entirely* hypervisor-
>> neutral as e.g. the vm-memory Rust crate (inside of the "neutral" vhost-
>> user device servers) does need to be built with the `xen` feature. But
>> still, that's how it works. What can be made generic is generic.
>>
>> xen-vhost-frontend, which is the thing that integrates these with Xen,
>> actually used to handle the interrupts in userspace[1] by firing the
>> DMOP itself (which is where I could "just replace that with
>> IOCTL_EVTCHN_NOTIFY") but that was offloaded to the kernel with the
>> introduction of IOCTL_PRIVCMD_IRQFD[2], similarly to KVM_IRQFD.
>>
> I think what would be preferable for your usecase would be to have a way
> to bind a event channel with a eventfd object, which should be a
> primitive that lives in the evtchn device.
Yeah, it would be an ioctl on the evtchn device, definitely. I wasn't
being exact when I said "extend privcmd", sorry. I just meant "handling
it on the Linux side" generally!
> The current interface kinda assume that you're looking to emulate a
> completely emulated virtio device with no Xen specifics, it looks like
> it's not exactly what you're implementing.
It's already implemented, and I'm not looking to change it much, just to
make it work on x86_64. The only thing that wasn't already compatible
was firing the host-to-guest interrupt, because on x86_64 we don't have
anything like the (v)GIC with its massive arbitrary IRQ number space.
Event channels are the only way to interrupt a PVH guest, hence using
xenbus in the guest to provision the device.
> As you actually plan to switch to using event channels for notifying the
> guest, I think it would be preferable to do the same the other way
> (event channels to notify the host) so you only have event channels to
> worry about here.
The other direction is already implemented perfectly well in
IOCTL_PRIVCMD_IOEVENTFD. The MMIO area is set up like so:
- ioreq is mapped with
IOCTL_PRIVCMD_MMAP_RESOURCE(XENMEM_resource_ioreq_server, ..);
- vp_eport event channels (per cpu) are bound to the current domain via
IOCTL_EVTCHN_BIND_INTERDOMAIN;
- those are passed, along with the ioreq page itself, to
IOCTL_PRIVCMD_IOEVENTFD to get an eventfd that fires when a virtqueue is
ready;
- which is an eventfd that xen-vhost-frontend passes to the vhost-user
device server.
So for this direction, it's not a 1:1 mapping but rather a specific
contraption designed to efficiently handle this use case:
- when an ioreq event channel (for any of the vcpus) fires,
- the kernel handler (ioeventfd_interrupt) checks if it's specifically
an IOREQ_WRITE write to the VIRTIO_MMIO_QUEUE_NOTIFY offset,
- and if so, it signals the eventfd for any virtqueue that has new data
(waking the generic device server which has the eventfd, so bypassing
xen-vhost-frontend), pings the guest back via evtchn, and returns
IRQ_HANDLED;
- otherwise the request is handled in userspace by xen-vhost-frontend
(virtio configuration register access).
It just works :)
~val
© 2016 - 2026 Red Hat, Inc.