[PATCH] qemu: Add vhost-user-blk unix socket to support server mode

Zhenguo Yao posted 1 patch 1 year, 1 month ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/libvirt tags/patchew/20230309085807.70848-1-yaozhenguo1@gmail.com
src/conf/domain_conf.c                             | 14 ++++++++++++++
src/conf/schemas/domaincommon.rng                  |  8 ++++++++
.../disk-vhostuser-numa.x86_64-4.2.0.args          |  6 ++++--
.../disk-vhostuser-numa.x86_64-latest.args         |  6 ++++--
tests/qemuxml2argvdata/disk-vhostuser-numa.xml     |  9 +++++++--
.../disk-vhostuser.x86_64-latest.args              |  6 ++++--
tests/qemuxml2argvdata/disk-vhostuser.xml          |  9 +++++++--
.../disk-vhostuser.x86_64-latest.xml               | 12 +++++++++---
8 files changed, 57 insertions(+), 13 deletions(-)
[PATCH] qemu: Add vhost-user-blk unix socket to support server mode
Posted by Zhenguo Yao 1 year, 1 month ago
qemu support server mode when using vhost-user-blk disk.
Let libvirt to support this.

Signed-off-by: Zhenguo Yao <yaozhenguo1@gmail.com>
---
 src/conf/domain_conf.c                             | 14 ++++++++++++++
 src/conf/schemas/domaincommon.rng                  |  8 ++++++++
 .../disk-vhostuser-numa.x86_64-4.2.0.args          |  6 ++++--
 .../disk-vhostuser-numa.x86_64-latest.args         |  6 ++++--
 tests/qemuxml2argvdata/disk-vhostuser-numa.xml     |  9 +++++++--
 .../disk-vhostuser.x86_64-latest.args              |  6 ++++--
 tests/qemuxml2argvdata/disk-vhostuser.xml          |  9 +++++++--
 .../disk-vhostuser.x86_64-latest.xml               | 12 +++++++++---
 8 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index a5578324b9..61addbe222 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7283,6 +7283,7 @@ virDomainDiskSourceVHostUserParse(xmlNodePtr node,
 {
     g_autofree char *type = virXMLPropString(node, "type");
     g_autofree char *path = virXMLPropString(node, "path");
+    g_autofree char *mode = virXMLPropString(node,  "mode");
 
     if (!type) {
         virReportError(VIR_ERR_XML_ERROR, "%s",
@@ -7313,6 +7314,17 @@ virDomainDiskSourceVHostUserParse(xmlNodePtr node,
                                                ctxt) < 0) {
         return -1;
     }
+    if (mode && STREQ(mode, "server")) {
+        src->vhostuser->data.nix.listen = true;
+        if (src->vhostuser->data.nix.reconnect.enabled == VIR_TRISTATE_BOOL_YES) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+                _("'reconnect' attribute  unsupported "
+                "'server' mode for `<disk type='vhostuser'>`"));
+            return -1;
+        }
+    } else if (mode && STREQ(mode, "client")) {
+        src->vhostuser->data.nix.listen = false;
+    }
 
     return 0;
 }
@@ -22121,6 +22133,8 @@ virDomainDiskSourceVhostuserFormat(virBuffer *attrBuf,
     virBufferAddLit(attrBuf, " type='unix'");
     virBufferAsprintf(attrBuf, " path='%s'", vhostuser->data.nix.path);
 
+    if (vhostuser->data.nix.listen)
+	    virBufferAsprintf(attrBuf, " mode='%s'", "server");
     virDomainChrSourceReconnectDefFormat(childBuf, &vhostuser->data.nix.reconnect);
 }
 
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index a57dd212ab..d926195d69 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -2359,6 +2359,14 @@
       <attribute name="path">
         <ref name="absFilePath"/>
       </attribute>
+      <optional>
+        <attribute name="mode">
+          <choice>
+            <value>server</value>
+            <value>client</value>
+          </choice>
+        </attribute>
+      </optional>
       <optional>
          <ref name="reconnect"/>
       </optional>
diff --git a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-4.2.0.args b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-4.2.0.args
index 4480266e75..afd0be4ea5 100644
--- a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-4.2.0.args
+++ b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-4.2.0.args
@@ -31,9 +31,11 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
 -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 \
 -chardev socket,id=chr-vu-virtio-disk0,path=/tmp/vhost1.sock \
 -device vhost-user-blk-pci,bus=pci.0,addr=0x2,chardev=chr-vu-virtio-disk0,id=virtio-disk0,bootindex=1 \
--chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,reconnect=10 \
+-chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,server=on,wait=off \
 -device vhost-user-blk-pci,iommu_platform=on,ats=on,packed=on,bus=pci.0,addr=0x3,chardev=chr-vu-virtio-disk1,id=virtio-disk1 \
+-chardev socket,id=chr-vu-virtio-disk2,path=/tmp/vhost1.sock,reconnect=10 \
+-device vhost-user-blk-pci,iommu_platform=on,ats=on,packed=on,bus=pci.0,addr=0x4,chardev=chr-vu-virtio-disk2,id=virtio-disk2 \
 -audiodev '{"id":"audio1","driver":"none"}' \
--device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x4 \
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 \
 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
 -msg timestamp=on
diff --git a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
index 75b3232dad..ea4b227328 100644
--- a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
+++ b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
@@ -31,9 +31,11 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
 -device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \
 -chardev socket,id=chr-vu-virtio-disk0,path=/tmp/vhost1.sock \
 -device '{"driver":"vhost-user-blk-pci","bus":"pci.0","addr":"0x2","chardev":"chr-vu-virtio-disk0","id":"virtio-disk0","bootindex":1}' \
--chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,reconnect=10 \
+-chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,server=on,wait=off \
 -device '{"driver":"vhost-user-blk-pci","iommu_platform":true,"ats":true,"packed":true,"bus":"pci.0","addr":"0x3","chardev":"chr-vu-virtio-disk1","id":"virtio-disk1"}' \
+-chardev socket,id=chr-vu-virtio-disk2,path=/tmp/vhost1.sock,reconnect=10 \
+-device '{"driver":"vhost-user-blk-pci","iommu_platform":true,"ats":true,"packed":true,"bus":"pci.0","addr":"0x4","chardev":"chr-vu-virtio-disk2","id":"virtio-disk2"}' \
 -audiodev '{"id":"audio1","driver":"none"}' \
--device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x4"}' \
+-device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x5"}' \
 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
 -msg timestamp=on
diff --git a/tests/qemuxml2argvdata/disk-vhostuser-numa.xml b/tests/qemuxml2argvdata/disk-vhostuser-numa.xml
index 49efbae0a2..1135b51b8a 100644
--- a/tests/qemuxml2argvdata/disk-vhostuser-numa.xml
+++ b/tests/qemuxml2argvdata/disk-vhostuser-numa.xml
@@ -23,10 +23,15 @@
     </disk>
     <disk type='vhostuser' device='disk'>
       <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
-      <source type='unix' path='/tmp/vhost1.sock'>
+      <source type='unix' path='/tmp/vhost1.sock' mode='server'/>
+      <target dev='vdb' bus='virtio'/>
+    </disk>
+    <disk type='vhostuser' device='disk'>
+      <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
+      <source type='unix' path='/tmp/vhost1.sock' mode='client'>
         <reconnect enabled='yes' timeout='10'/>
       </source>
-      <target dev='vdb' bus='virtio'/>
+      <target dev='vdc' bus='virtio'/>
     </disk>
   </devices>
 </domain>
diff --git a/tests/qemuxml2argvdata/disk-vhostuser.x86_64-latest.args b/tests/qemuxml2argvdata/disk-vhostuser.x86_64-latest.args
index 31428fc697..4391776b88 100644
--- a/tests/qemuxml2argvdata/disk-vhostuser.x86_64-latest.args
+++ b/tests/qemuxml2argvdata/disk-vhostuser.x86_64-latest.args
@@ -30,9 +30,11 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
 -device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \
 -chardev socket,id=chr-vu-virtio-disk0,path=/tmp/vhost1.sock \
 -device '{"driver":"vhost-user-blk-pci","bus":"pci.0","addr":"0x2","chardev":"chr-vu-virtio-disk0","id":"virtio-disk0","bootindex":1}' \
--chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,reconnect=10 \
+-chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,server=on,wait=off \
 -device '{"driver":"vhost-user-blk-pci","iommu_platform":true,"ats":true,"packed":true,"bus":"pci.0","addr":"0x3","chardev":"chr-vu-virtio-disk1","id":"virtio-disk1"}' \
+-chardev socket,id=chr-vu-virtio-disk2,path=/tmp/vhost1.sock,reconnect=10 \
+-device '{"driver":"vhost-user-blk-pci","iommu_platform":true,"ats":true,"packed":true,"bus":"pci.0","addr":"0x4","chardev":"chr-vu-virtio-disk2","id":"virtio-disk2"}' \
 -audiodev '{"id":"audio1","driver":"none"}' \
--device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x4"}' \
+-device '{"driver":"virtio-balloon-pci","id":"balloon0","bus":"pci.0","addr":"0x5"}' \
 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \
 -msg timestamp=on
diff --git a/tests/qemuxml2argvdata/disk-vhostuser.xml b/tests/qemuxml2argvdata/disk-vhostuser.xml
index a0a3df4dd0..33fc0b885e 100644
--- a/tests/qemuxml2argvdata/disk-vhostuser.xml
+++ b/tests/qemuxml2argvdata/disk-vhostuser.xml
@@ -21,10 +21,15 @@
     </disk>
     <disk type='vhostuser' device='disk'>
       <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
-      <source type='unix' path='/tmp/vhost1.sock'>
+      <source type='unix' path='/tmp/vhost1.sock' mode='server'/>
+      <target dev='vdb' bus='virtio'/>
+    </disk>
+    <disk type='vhostuser' device='disk'>
+      <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
+      <source type='unix' path='/tmp/vhost1.sock' mode='client'>
         <reconnect enabled='yes' timeout='10'/>
       </source>
-      <target dev='vdb' bus='virtio'/>
+      <target dev='vdc' bus='virtio'/>
     </disk>
   </devices>
 </domain>
diff --git a/tests/qemuxml2xmloutdata/disk-vhostuser.x86_64-latest.xml b/tests/qemuxml2xmloutdata/disk-vhostuser.x86_64-latest.xml
index c64621fe93..cd955570f3 100644
--- a/tests/qemuxml2xmloutdata/disk-vhostuser.x86_64-latest.xml
+++ b/tests/qemuxml2xmloutdata/disk-vhostuser.x86_64-latest.xml
@@ -27,13 +27,19 @@
       <boot order='1'/>
       <address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0'/>
     </disk>
+    <disk type='vhostuser' device='disk' snapshot='no'>
+      <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
+      <source type='unix' path='/tmp/vhost1.sock' mode='server'/>
+      <target dev='vdb' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+    </disk>
     <disk type='vhostuser' device='disk' snapshot='no'>
       <driver name='qemu' type='raw' iommu='on' ats='on' packed='on'/>
       <source type='unix' path='/tmp/vhost1.sock'>
         <reconnect enabled='yes' timeout='10'/>
       </source>
-      <target dev='vdb' bus='virtio'/>
-      <address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
+      <target dev='vdc' bus='virtio'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
     </disk>
     <controller type='usb' index='0' model='piix3-uhci'>
       <address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
@@ -43,7 +49,7 @@
     <input type='keyboard' bus='ps2'/>
     <audio id='1' type='none'/>
     <memballoon model='virtio'>
-      <address type='pci' domain='0x0000' bus='0x00' slot='0x04' function='0x0'/>
+      <address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
     </memballoon>
   </devices>
 </domain>
-- 
2.32.0
Re: [PATCH] qemu: Add vhost-user-blk unix socket to support server mode
Posted by Peter Krempa 1 year ago
On Thu, Mar 09, 2023 at 16:58:07 +0800, Zhenguo Yao wrote:
> qemu support server mode when using vhost-user-blk disk.
> Let libvirt to support this.

Could you please elaborate how you expect to use this?

I'm asking because server mode comes with a integrated set of race
conditions:

> diff --git a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> index 75b3232dad..ea4b227328 100644
> --- a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> +++ b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> @@ -31,9 +31,11 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
>  -device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \
>  -chardev socket,id=chr-vu-virtio-disk0,path=/tmp/vhost1.sock \
>  -device '{"driver":"vhost-user-blk-pci","bus":"pci.0","addr":"0x2","chardev":"chr-vu-virtio-disk0","id":"virtio-disk0","bootindex":1}' \
> --chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,reconnect=10 \
> +-chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,server=on,wait=off \

Emphasizing that 'wait=off' is used for the chardev backend.

When starting such a VM assuming only 1 storage device is being started
you have a very short window where the client must connect to it and
start the vhost session. Otherwise the disk will not be visible by the
VM and it will not boot.

This is a bit different than vhost-user network as it's required right
at the start.

Even then vhost-user networking which supports server mode sets
'wait=on'. But this is problematic on a different level. The VM is stuck
until the vhost connection is established

Similarly it also creates a problem when hotpluggingg as there 'wait=on'
doesn't really exist, thus it behaves differently.

Thus unless there is a good reason to do so I find that the 'server'
mode is not useful here.
Re: [PATCH] qemu: Add vhost-user-blk unix socket to support server mode
Posted by Zhenguo Yao 1 year ago
Peter Krempa <pkrempa@redhat.com> 于2023年3月16日周四 21:51写道:
>
> On Thu, Mar 09, 2023 at 16:58:07 +0800, Zhenguo Yao wrote:
> > qemu support server mode when using vhost-user-blk disk.
> > Let libvirt to support this.
>
> Could you please elaborate how you expect to use this?
>
> I'm asking because server mode comes with a integrated set of race
> conditions:
>
We want QEMU to server mode because when other side(for example SPDK
or DPDK) acted as server,
it restarts because of some reasons. Plants of clients will try to
reconnect to the server together. This
will take pressure on the server. So, we set QEMU to server mode  in
vhost-user network. And, we
follow this in vhost-user blk.
Could you please show me which race conditions will come with when
using server mode in vhost-user blk?

> > diff --git a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> > index 75b3232dad..ea4b227328 100644
> > --- a/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> > +++ b/tests/qemuxml2argvdata/disk-vhostuser-numa.x86_64-latest.args
> > @@ -31,9 +31,11 @@ XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
> >  -device '{"driver":"piix3-usb-uhci","id":"usb","bus":"pci.0","addr":"0x1.0x2"}' \
> >  -chardev socket,id=chr-vu-virtio-disk0,path=/tmp/vhost1.sock \
> >  -device '{"driver":"vhost-user-blk-pci","bus":"pci.0","addr":"0x2","chardev":"chr-vu-virtio-disk0","id":"virtio-disk0","bootindex":1}' \
> > --chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,reconnect=10 \
> > +-chardev socket,id=chr-vu-virtio-disk1,path=/tmp/vhost1.sock,server=on,wait=off \
>
> Emphasizing that 'wait=off' is used for the chardev backend.
>
> When starting such a VM assuming only 1 storage device is being started
> you have a very short window where the client must connect to it and
> start the vhost session. Otherwise the disk will not be visible by the
> VM and it will not boot.
>
> This is a bit different than vhost-user network as it's required right
> at the start.
>
> Even then vhost-user networking which supports server mode sets
> 'wait=on'. But this is problematic on a different level. The VM is stuck
> until the vhost connection is established
>
> Similarly it also creates a problem when hotpluggingg as there 'wait=on'
> doesn't really exist, thus it behaves differently.
>
Yes,you are right. wait=on is needed in this situation.

> Thus unless there is a good reason to do so I find that the 'server'
> mode is not useful here.
>
Re: [PATCH] qemu: Add vhost-user-blk unix socket to support server mode
Posted by Peter Krempa 1 year ago
On Tue, Mar 21, 2023 at 09:46:44 +0800, Zhenguo Yao wrote:
> Peter Krempa <pkrempa@redhat.com> 于2023年3月16日周四 21:51写道:
> >
> > On Thu, Mar 09, 2023 at 16:58:07 +0800, Zhenguo Yao wrote:
> > > qemu support server mode when using vhost-user-blk disk.
> > > Let libvirt to support this.
> >
> > Could you please elaborate how you expect to use this?
> >
> > I'm asking because server mode comes with a integrated set of race
> > conditions:
> >
> We want QEMU to server mode because when other side(for example SPDK
> or DPDK) acted as server,
> it restarts because of some reasons. Plants of clients will try to
> reconnect to the server together. This
> will take pressure on the server.

I don't really find this argument as compelling. If the server doesn't
want to accept the connection it can refuse it.

> So, we set QEMU to server mode  in
> vhost-user network. And, we
> follow this in vhost-user blk.

I really don't want us to add another instance of this. I think that the
server mode for vhost-user network was a mistake. It's really poorly
documented and the semantics are very non-obvius.

Specifically qemu being stuck until the "clients" connect to it is a
very broken semantic and users don't expect it.

Additionally you have the issue of distinct behaviour with hotplug where
the VM is not stopped.

> Could you please show me which race conditions will come with when
> using server mode in vhost-user blk?

The race condition is when you don't use 'wait=on'. The race is between
qemu starting up and wanting to use the disk and your "client"
connecting to it.

'wait=off' doesn't have that but has awful semantics which I really
don't want to add more of to libvirt.

Your use case seems more of a workaround than a real need for this
feature.