[PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa

Si-Wei Liu posted 1 patch 1 year, 6 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/1665215938-24473-1-git-send-email-si-wei.liu@oracle.com
Maintainers: Jason Wang <jasowang@redhat.com>, Eric Blake <eblake@redhat.com>, Markus Armbruster <armbru@redhat.com>
net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
qapi/net.json    |  3 +++
qemu-options.hx  |  6 ++++--
3 files changed, 27 insertions(+), 7 deletions(-)
[PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago
Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
backend as another parameter to instantiate vhost-vdpa net client.
This would benefit the use case where only open file descriptors, as
opposed to raw vhost-vdpa device paths, are accessible from the QEMU
process.

(qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Acked-by: Eugenio Pérez <eperezma@redhat.com>

---
v2:
  - fixed typo in commit message
  - s/fd's/file descriptors/
---
 net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
 qapi/net.json    |  3 +++
 qemu-options.hx  |  6 ++++--
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 182b3a1..366b070 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
 
     assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
     opts = &netdev->u.vhost_vdpa;
-    if (!opts->vhostdev) {
-        error_setg(errp, "vdpa character device not specified with vhostdev");
+    if (!opts->has_vhostdev && !opts->has_vhostfd) {
+        error_setg(errp,
+                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
         return -1;
     }
 
-    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
-    if (vdpa_device_fd == -1) {
-        return -errno;
+    if (opts->has_vhostdev && opts->has_vhostfd) {
+        error_setg(errp,
+                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
+        return -1;
+    }
+
+    if (opts->has_vhostdev) {
+        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
+        if (vdpa_device_fd == -1) {
+            return -errno;
+        }
+    } else if (opts->has_vhostfd) {
+        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
+        if (vdpa_device_fd == -1) {
+            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
+            return -1;
+        }
     }
 
     r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
diff --git a/qapi/net.json b/qapi/net.json
index dd088c0..926ecc8 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -442,6 +442,8 @@
 # @vhostdev: path of vhost-vdpa device
 #            (default:'/dev/vhost-vdpa-0')
 #
+# @vhostfd: file descriptor of an already opened vhost vdpa device
+#
 # @queues: number of queues to be created for multiqueue vhost-vdpa
 #          (default: 1)
 #
@@ -456,6 +458,7 @@
 { 'struct': 'NetdevVhostVDPAOptions',
   'data': {
     '*vhostdev':     'str',
+    '*vhostfd':      'str',
     '*queues':       'int',
     '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 913c71e..c040f74 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
     "                configure a vhost-user network, backed by a chardev 'dev'\n"
 #endif
 #ifdef __linux__
-    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
+    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
     "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
+    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
+    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
 #endif
 #ifdef CONFIG_VMNET
     "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
@@ -3280,7 +3282,7 @@ SRST
              -netdev type=vhost-user,id=net0,chardev=chr0 \
              -device virtio-net-pci,netdev=net0
 
-``-netdev vhost-vdpa,vhostdev=/path/to/dev``
+``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
     Establish a vhost-vdpa netdev.
 
     vDPA device is a device that uses a datapath which complies with
-- 
1.8.3.1


Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Jason Wang 1 year, 6 months ago
On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
> backend as another parameter to instantiate vhost-vdpa net client.
> This would benefit the use case where only open file descriptors, as
> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
> process.
>
> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1

Adding Cindy.

This has been discussed before, we've already had
vhostdev=/dev/fdset/$fd which should be functional equivalent to what
has been proposed here. (And this is how libvirt works if I understand
correctly).

Thanks

>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> Acked-by: Eugenio Pérez <eperezma@redhat.com>
>
> ---
> v2:
>   - fixed typo in commit message
>   - s/fd's/file descriptors/
> ---
>  net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>  qapi/net.json    |  3 +++
>  qemu-options.hx  |  6 ++++--
>  3 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 182b3a1..366b070 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>
>      assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>      opts = &netdev->u.vhost_vdpa;
> -    if (!opts->vhostdev) {
> -        error_setg(errp, "vdpa character device not specified with vhostdev");
> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
> +        error_setg(errp,
> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
>          return -1;
>      }
>
> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> -    if (vdpa_device_fd == -1) {
> -        return -errno;
> +    if (opts->has_vhostdev && opts->has_vhostfd) {
> +        error_setg(errp,
> +                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
> +        return -1;
> +    }
> +
> +    if (opts->has_vhostdev) {
> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> +        if (vdpa_device_fd == -1) {
> +            return -errno;
> +        }
> +    } else if (opts->has_vhostfd) {
> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
> +        if (vdpa_device_fd == -1) {
> +            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
> +            return -1;
> +        }
>      }
>
>      r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
> diff --git a/qapi/net.json b/qapi/net.json
> index dd088c0..926ecc8 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -442,6 +442,8 @@
>  # @vhostdev: path of vhost-vdpa device
>  #            (default:'/dev/vhost-vdpa-0')
>  #
> +# @vhostfd: file descriptor of an already opened vhost vdpa device
> +#
>  # @queues: number of queues to be created for multiqueue vhost-vdpa
>  #          (default: 1)
>  #
> @@ -456,6 +458,7 @@
>  { 'struct': 'NetdevVhostVDPAOptions',
>    'data': {
>      '*vhostdev':     'str',
> +    '*vhostfd':      'str',
>      '*queues':       'int',
>      '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 913c71e..c040f74 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>      "                configure a vhost-user network, backed by a chardev 'dev'\n"
>  #endif
>  #ifdef __linux__
> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>      "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
> +    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
> +    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
>  #endif
>  #ifdef CONFIG_VMNET
>      "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
> @@ -3280,7 +3282,7 @@ SRST
>               -netdev type=vhost-user,id=net0,chardev=chr0 \
>               -device virtio-net-pci,netdev=net0
>
> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>      Establish a vhost-vdpa netdev.
>
>      vDPA device is a device that uses a datapath which complies with
> --
> 1.8.3.1
>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago

On 10/8/2022 10:43 PM, Jason Wang wrote:
> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>> backend as another parameter to instantiate vhost-vdpa net client.
>> This would benefit the use case where only open file descriptors, as
>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>> process.
>>
>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
> Adding Cindy.
>
> This has been discussed before, we've already had
> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
> has been proposed here. (And this is how libvirt works if I understand
> correctly).
Yes, I was aware of that discussion. However, our implementation of the 
management software is a bit different from libvirt, in which the paths 
in /dev/fdset/NNN can't be dynamically passed to the container where 
QEMU is running. By using a specific vhostfd property with existing 
code, it would allow our mgmt software smooth adaption without having to 
add too much infra code to support the /dev/fdset/NNN trick.

On the other hand, the other vhost backends, e.g. tap (via vhost-net), 
vhost-scsi and vhost-vsock all accept vhostfd as parameter to 
instantiate device, although the /dev/fdset trick also works there. I 
think vhost-vdpa is not  unprecedented in this case?

Thanks,
-Siwei


>
> Thanks
>
>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>
>> ---
>> v2:
>>    - fixed typo in commit message
>>    - s/fd's/file descriptors/
>> ---
>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>   qapi/net.json    |  3 +++
>>   qemu-options.hx  |  6 ++++--
>>   3 files changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index 182b3a1..366b070 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>>
>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>       opts = &netdev->u.vhost_vdpa;
>> -    if (!opts->vhostdev) {
>> -        error_setg(errp, "vdpa character device not specified with vhostdev");
>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>> +        error_setg(errp,
>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
>>           return -1;
>>       }
>>
>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>> -    if (vdpa_device_fd == -1) {
>> -        return -errno;
>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>> +        error_setg(errp,
>> +                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
>> +        return -1;
>> +    }
>> +
>> +    if (opts->has_vhostdev) {
>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>> +        if (vdpa_device_fd == -1) {
>> +            return -errno;
>> +        }
>> +    } else if (opts->has_vhostfd) {
>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
>> +        if (vdpa_device_fd == -1) {
>> +            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
>> +            return -1;
>> +        }
>>       }
>>
>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
>> diff --git a/qapi/net.json b/qapi/net.json
>> index dd088c0..926ecc8 100644
>> --- a/qapi/net.json
>> +++ b/qapi/net.json
>> @@ -442,6 +442,8 @@
>>   # @vhostdev: path of vhost-vdpa device
>>   #            (default:'/dev/vhost-vdpa-0')
>>   #
>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
>> +#
>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
>>   #          (default: 1)
>>   #
>> @@ -456,6 +458,7 @@
>>   { 'struct': 'NetdevVhostVDPAOptions',
>>     'data': {
>>       '*vhostdev':     'str',
>> +    '*vhostfd':      'str',
>>       '*queues':       'int',
>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 913c71e..c040f74 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>       "                configure a vhost-user network, backed by a chardev 'dev'\n"
>>   #endif
>>   #ifdef __linux__
>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>       "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
>> +    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
>> +    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
>>   #endif
>>   #ifdef CONFIG_VMNET
>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>> @@ -3280,7 +3282,7 @@ SRST
>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
>>                -device virtio-net-pci,netdev=net0
>>
>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>       Establish a vhost-vdpa netdev.
>>
>>       vDPA device is a device that uses a datapath which complies with
>> --
>> 1.8.3.1
>>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Jason Wang 1 year, 6 months ago
On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 10/8/2022 10:43 PM, Jason Wang wrote:
>
> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
> backend as another parameter to instantiate vhost-vdpa net client.
> This would benefit the use case where only open file descriptors, as
> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
> process.
>
> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>
> Adding Cindy.
>
> This has been discussed before, we've already had
> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
> has been proposed here. (And this is how libvirt works if I understand
> correctly).
>
> Yes, I was aware of that discussion. However, our implementation of the management software is a bit different from libvirt, in which the paths in /dev/fdset/NNN can't be dynamically passed to the container where QEMU is running. By using a specific vhostfd property with existing code, it would allow our mgmt software smooth adaption without having to add too much infra code to support the /dev/fdset/NNN trick.

I think fdset has extra flexibility in e.g hot-plug to allow the file
descriptor to be passed with SCM_RIGHTS. It would still be good to add
the support.

>
> On the other hand, the other vhost backends, e.g. tap (via vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as parameter to instantiate device, although the /dev/fdset trick also works there. I think vhost-vdpa is not  unprecedented in this case?

Yes.

Thanks

>
> Thanks,
> -Siwei
>
>
>
> Thanks
>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> Acked-by: Eugenio Pérez <eperezma@redhat.com>
>
> ---
> v2:
>   - fixed typo in commit message
>   - s/fd's/file descriptors/
> ---
>  net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>  qapi/net.json    |  3 +++
>  qemu-options.hx  |  6 ++++--
>  3 files changed, 27 insertions(+), 7 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 182b3a1..366b070 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>
>      assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>      opts = &netdev->u.vhost_vdpa;
> -    if (!opts->vhostdev) {
> -        error_setg(errp, "vdpa character device not specified with vhostdev");
> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
> +        error_setg(errp,
> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
>          return -1;
>      }
>
> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> -    if (vdpa_device_fd == -1) {
> -        return -errno;
> +    if (opts->has_vhostdev && opts->has_vhostfd) {
> +        error_setg(errp,
> +                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
> +        return -1;
> +    }
> +
> +    if (opts->has_vhostdev) {
> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> +        if (vdpa_device_fd == -1) {
> +            return -errno;
> +        }
> +    } else if (opts->has_vhostfd) {
> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
> +        if (vdpa_device_fd == -1) {
> +            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
> +            return -1;
> +        }
>      }
>
>      r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
> diff --git a/qapi/net.json b/qapi/net.json
> index dd088c0..926ecc8 100644
> --- a/qapi/net.json
> +++ b/qapi/net.json
> @@ -442,6 +442,8 @@
>  # @vhostdev: path of vhost-vdpa device
>  #            (default:'/dev/vhost-vdpa-0')
>  #
> +# @vhostfd: file descriptor of an already opened vhost vdpa device
> +#
>  # @queues: number of queues to be created for multiqueue vhost-vdpa
>  #          (default: 1)
>  #
> @@ -456,6 +458,7 @@
>  { 'struct': 'NetdevVhostVDPAOptions',
>    'data': {
>      '*vhostdev':     'str',
> +    '*vhostfd':      'str',
>      '*queues':       'int',
>      '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
>
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 913c71e..c040f74 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>      "                configure a vhost-user network, backed by a chardev 'dev'\n"
>  #endif
>  #ifdef __linux__
> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>      "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
> +    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
> +    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
>  #endif
>  #ifdef CONFIG_VMNET
>      "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
> @@ -3280,7 +3282,7 @@ SRST
>               -netdev type=vhost-user,id=net0,chardev=chr0 \
>               -device virtio-net-pci,netdev=net0
>
> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>      Establish a vhost-vdpa netdev.
>
>      vDPA device is a device that uses a datapath which complies with
> --
> 1.8.3.1
>
>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago

On 10/11/2022 8:09 PM, Jason Wang wrote:
> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>>
>>
>> On 10/8/2022 10:43 PM, Jason Wang wrote:
>>
>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>>
>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>> backend as another parameter to instantiate vhost-vdpa net client.
>> This would benefit the use case where only open file descriptors, as
>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>> process.
>>
>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>>
>> Adding Cindy.
>>
>> This has been discussed before, we've already had
>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
>> has been proposed here. (And this is how libvirt works if I understand
>> correctly).
>>
>> Yes, I was aware of that discussion. However, our implementation of the management software is a bit different from libvirt, in which the paths in /dev/fdset/NNN can't be dynamically passed to the container where QEMU is running. By using a specific vhostfd property with existing code, it would allow our mgmt software smooth adaption without having to add too much infra code to support the /dev/fdset/NNN trick.
> I think fdset has extra flexibility in e.g hot-plug to allow the file
> descriptor to be passed with SCM_RIGHTS.
Yes, that's exactly the use case we'd like to support. Though the 
difference in our mgmt software stack from libvirt is that any dynamic 
path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ) can't be 
allowed to get passed through to the container running QEMU on the fly 
for security reasons. fd passing is allowed, though, with very strict 
security checks. That's the main motivation for this direct vhostfd 
passing support (noted fdset doesn't need to be used along with 
/dev/fdset node).

Having it said, I found there's also nuance in the 
vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation: the 
fd to open has to be dup'ed from the original one passed via SCM_RIGHTS. 
This also has implication on security that any ioctl call from QEMU 
can't be audited through the original fd. With this regard, I think 
vhostfd offers more flexibility than work around those qemu_open() 
specifics. Would these justify the use case of concern?

Thanks,
-Siwei

>   It would still be good to add
> the support.
>
>> On the other hand, the other vhost backends, e.g. tap (via vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as parameter to instantiate device, although the /dev/fdset trick also works there. I think vhost-vdpa is not  unprecedented in this case?
> Yes.
>
> Thanks
>
>> Thanks,
>> -Siwei
>>
>>
>>
>> Thanks
>>
>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>
>> ---
>> v2:
>>    - fixed typo in commit message
>>    - s/fd's/file descriptors/
>> ---
>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>   qapi/net.json    |  3 +++
>>   qemu-options.hx  |  6 ++++--
>>   3 files changed, 27 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>> index 182b3a1..366b070 100644
>> --- a/net/vhost-vdpa.c
>> +++ b/net/vhost-vdpa.c
>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>>
>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>       opts = &netdev->u.vhost_vdpa;
>> -    if (!opts->vhostdev) {
>> -        error_setg(errp, "vdpa character device not specified with vhostdev");
>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>> +        error_setg(errp,
>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
>>           return -1;
>>       }
>>
>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>> -    if (vdpa_device_fd == -1) {
>> -        return -errno;
>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>> +        error_setg(errp,
>> +                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
>> +        return -1;
>> +    }
>> +
>> +    if (opts->has_vhostdev) {
>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>> +        if (vdpa_device_fd == -1) {
>> +            return -errno;
>> +        }
>> +    } else if (opts->has_vhostfd) {
>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
>> +        if (vdpa_device_fd == -1) {
>> +            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
>> +            return -1;
>> +        }
>>       }
>>
>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
>> diff --git a/qapi/net.json b/qapi/net.json
>> index dd088c0..926ecc8 100644
>> --- a/qapi/net.json
>> +++ b/qapi/net.json
>> @@ -442,6 +442,8 @@
>>   # @vhostdev: path of vhost-vdpa device
>>   #            (default:'/dev/vhost-vdpa-0')
>>   #
>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
>> +#
>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
>>   #          (default: 1)
>>   #
>> @@ -456,6 +458,7 @@
>>   { 'struct': 'NetdevVhostVDPAOptions',
>>     'data': {
>>       '*vhostdev':     'str',
>> +    '*vhostfd':      'str',
>>       '*queues':       'int',
>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
>>
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index 913c71e..c040f74 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>       "                configure a vhost-user network, backed by a chardev 'dev'\n"
>>   #endif
>>   #ifdef __linux__
>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>       "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
>> +    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
>> +    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
>>   #endif
>>   #ifdef CONFIG_VMNET
>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>> @@ -3280,7 +3282,7 @@ SRST
>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
>>                -device virtio-net-pci,netdev=net0
>>
>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>       Establish a vhost-vdpa netdev.
>>
>>       vDPA device is a device that uses a datapath which complies with
>> --
>> 1.8.3.1
>>
>>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Jason Wang 1 year, 6 months ago
在 2022/10/12 13:59, Si-Wei Liu 写道:
>
>
> On 10/11/2022 8:09 PM, Jason Wang wrote:
>> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>>> On 10/8/2022 10:43 PM, Jason Wang wrote:
>>>
>>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>>>
>>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>>> backend as another parameter to instantiate vhost-vdpa net client.
>>> This would benefit the use case where only open file descriptors, as
>>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>>> process.
>>>
>>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>>>
>>> Adding Cindy.
>>>
>>> This has been discussed before, we've already had
>>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
>>> has been proposed here. (And this is how libvirt works if I understand
>>> correctly).
>>>
>>> Yes, I was aware of that discussion. However, our implementation of the management software is a bit different from libvirt, in which the paths in /dev/fdset/NNN can't be dynamically passed to the container where QEMU is running. By using a specific vhostfd property with existing code, it would allow our mgmt software smooth adaption without having to add too much infra code to support the /dev/fdset/NNN trick.
>> I think fdset has extra flexibility in e.g hot-plug to allow the file
>> descriptor to be passed with SCM_RIGHTS.
> Yes, that's exactly the use case we'd like to support. Though the 
> difference in our mgmt software stack from libvirt is that any dynamic 
> path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ) can't be 
> allowed to get passed through to the container running QEMU on the fly 
> for security reasons. fd passing is allowed, though, with very strict 
> security checks.


Interesting, any reason for disallowing fd passing? I'm asking since 
it's the way that libvirt work and it seems to me we don't get any 
complaints in the past.


> That's the main motivation for this direct vhostfd passing support 
> (noted fdset doesn't need to be used along with /dev/fdset node).
>
> Having it said, I found there's also nuance in the 
> vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation: 
> the fd to open has to be dup'ed from the original one passed via 
> SCM_RIGHTS. This also has implication on security that any ioctl call 
> from QEMU can't be audited through the original fd.


I'm not sure I get this, but management layer can enforce a ioctl 
whiltelist for safety.

Thanks


> With this regard, I think vhostfd offers more flexibility than work 
> around those qemu_open() specifics. Would these justify the use case 
> of concern?
>
> Thanks,
> -Siwei
>
>>   It would still be good to add
>> the support.
>>
>>> On the other hand, the other vhost backends, e.g. tap (via vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as parameter to instantiate device, although the /dev/fdset trick also works there. I think vhost-vdpa is not  unprecedented in this case?
>> Yes.
>>
>> Thanks
>>
>>> Thanks,
>>> -Siwei
>>>
>>>
>>>
>>> Thanks
>>>
>>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>>
>>> ---
>>> v2:
>>>    - fixed typo in commit message
>>>    - s/fd's/file descriptors/
>>> ---
>>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>>   qapi/net.json    |  3 +++
>>>   qemu-options.hx  |  6 ++++--
>>>   3 files changed, 27 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>> index 182b3a1..366b070 100644
>>> --- a/net/vhost-vdpa.c
>>> +++ b/net/vhost-vdpa.c
>>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
>>>
>>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>       opts = &netdev->u.vhost_vdpa;
>>> -    if (!opts->vhostdev) {
>>> -        error_setg(errp, "vdpa character device not specified with vhostdev");
>>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>>> +        error_setg(errp,
>>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
>>>           return -1;
>>>       }
>>>
>>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>> -    if (vdpa_device_fd == -1) {
>>> -        return -errno;
>>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>>> +        error_setg(errp,
>>> +                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (opts->has_vhostdev) {
>>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>> +        if (vdpa_device_fd == -1) {
>>> +            return -errno;
>>> +        }
>>> +    } else if (opts->has_vhostfd) {
>>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
>>> +        if (vdpa_device_fd == -1) {
>>> +            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
>>> +            return -1;
>>> +        }
>>>       }
>>>
>>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
>>> diff --git a/qapi/net.json b/qapi/net.json
>>> index dd088c0..926ecc8 100644
>>> --- a/qapi/net.json
>>> +++ b/qapi/net.json
>>> @@ -442,6 +442,8 @@
>>>   # @vhostdev: path of vhost-vdpa device
>>>   #            (default:'/dev/vhost-vdpa-0')
>>>   #
>>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
>>> +#
>>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
>>>   #          (default: 1)
>>>   #
>>> @@ -456,6 +458,7 @@
>>>   { 'struct': 'NetdevVhostVDPAOptions',
>>>     'data': {
>>>       '*vhostdev':     'str',
>>> +    '*vhostfd':      'str',
>>>       '*queues':       'int',
>>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] } } }
>>>
>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>> index 913c71e..c040f74 100644
>>> --- a/qemu-options.hx
>>> +++ b/qemu-options.hx
>>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>>       "                configure a vhost-user network, backed by a chardev 'dev'\n"
>>>   #endif
>>>   #ifdef __linux__
>>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>>> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>>       "                configure a vhost-vdpa network,Establish a vhost-vdpa netdev\n"
>>> +    "                use 'vhostdev=/path/to/dev' to open a vhost vdpa device\n"
>>> +    "                use 'vhostfd=h' to connect to an already opened vhost vdpa device\n"
>>>   #endif
>>>   #ifdef CONFIG_VMNET
>>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>>> @@ -3280,7 +3282,7 @@ SRST
>>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
>>>                -device virtio-net-pci,netdev=net0
>>>
>>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>>       Establish a vhost-vdpa netdev.
>>>
>>>       vDPA device is a device that uses a datapath which complies with
>>> --
>>> 1.8.3.1
>>>
>>>
>


Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago
Jason,

On 10/12/2022 10:02 PM, Jason Wang wrote:
>
> 在 2022/10/12 13:59, Si-Wei Liu 写道:
>>
>>
>> On 10/11/2022 8:09 PM, Jason Wang wrote:
>>> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com>  
>>> wrote:
>>>> On 10/8/2022 10:43 PM, Jason Wang wrote:
>>>>
>>>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>  
>>>> wrote:
>>>>
>>>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>>>> backend as another parameter to instantiate vhost-vdpa net client.
>>>> This would benefit the use case where only open file descriptors, as
>>>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>>>> process.
>>>>
>>>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>>>>
>>>> Adding Cindy.
>>>>
>>>> This has been discussed before, we've already had
>>>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
>>>> has been proposed here. (And this is how libvirt works if I understand
>>>> correctly).
>>>>
>>>> Yes, I was aware of that discussion. However, our implementation of 
>>>> the management software is a bit different from libvirt, in which 
>>>> the paths in /dev/fdset/NNN can't be dynamically passed to the 
>>>> container where QEMU is running. By using a specific vhostfd 
>>>> property with existing code, it would allow our mgmt software 
>>>> smooth adaption without having to add too much infra code to 
>>>> support the /dev/fdset/NNN trick.
>>> I think fdset has extra flexibility in e.g hot-plug to allow the file
>>> descriptor to be passed with SCM_RIGHTS.
>> Yes, that's exactly the use case we'd like to support. Though the 
>> difference in our mgmt software stack from libvirt is that any 
>> dynamic path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ) 
>> can't be allowed to get passed through to the container running QEMU 
>> on the fly for security reasons. fd passing is allowed, though, with 
>> very strict security checks.
>
>
> Interesting, any reason for disallowing fd passing?
For our mgmt software stack, QEMU is running in a secured container with 
its own namespace(s) with minimally well known and trusted devices from 
root ns exposed (only) at the time when QEMU is being started.  Direct 
fd passing via SCM_RIGHTS is allowed, but fdset device node exposure is 
not allowed and not even considered useful to us, as it adds an 
unwarranted attack surface to the QEMU's secured container 
unnecessarily. This has been the case and our security model for a while 
now w.r.t hot plugging vhost-net/tap and vhost-scsi devices, so will do 
for vhost-vdpa with vhostfd. It's not an open source project, though 
what I can share is that it's not a simple script that can be easily 
changed, and allow passing extra devices e.g. fdset especially on the 
fly is not even in consideration per suggested security guideline. I 
think we don't do anything special here as with other secured containers 
that disallow dynamic device injection on the fly.

> I'm asking since it's the way that libvirt work and it seems to me we 
> don't get any complaints in the past.
I guess it was because libvirt doesn't run QEMU in a container with very 
limited device exposure, otherwise this sort of constraints would pop 
up. Anyway the point and the way I see it is that passing vhostfd is 
proved to be working well and secure with other vhost devices, I don't 
see why vhost-vdpa is treated special here that would need to enforce 
the fdset usage. It's an edge case for libvirt maybe, but supporting 
QEMU's vhost-vdpa device to run in a securely contained environment with 
no dynamic device injection shouldn't be an odd or bizarre use case.


Thanks,
-Siwei

>
>
>> That's the main motivation for this direct vhostfd passing support 
>> (noted fdset doesn't need to be used along with /dev/fdset node).
>>
>> Having it said, I found there's also nuance in the 
>> vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation: 
>> the fd to open has to be dup'ed from the original one passed via 
>> SCM_RIGHTS. This also has implication on security that any ioctl call 
>> from QEMU can't be audited through the original fd.
>
>
> I'm not sure I get this, but management layer can enforce a ioctl 
> whiltelist for safety.
>
> Thanks
>
>
>> With this regard, I think vhostfd offers more flexibility than work 
>> around those qemu_open() specifics. Would these justify the use case 
>> of concern?
>>
>> Thanks,
>> -Siwei
>>
>>>   It would still be good to add
>>> the support.
>>>
>>>> On the other hand, the other vhost backends, e.g. tap (via 
>>>> vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as 
>>>> parameter to instantiate device, although the /dev/fdset trick also 
>>>> works there. I think vhost-vdpa is not  unprecedented in this case?
>>> Yes.
>>>
>>> Thanks
>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>
>>>>
>>>> Thanks
>>>>
>>>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>>>
>>>> ---
>>>> v2:
>>>>    - fixed typo in commit message
>>>>    - s/fd's/file descriptors/
>>>> ---
>>>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>>>   qapi/net.json    |  3 +++
>>>>   qemu-options.hx  |  6 ++++--
>>>>   3 files changed, 27 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>> index 182b3a1..366b070 100644
>>>> --- a/net/vhost-vdpa.c
>>>> +++ b/net/vhost-vdpa.c
>>>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev *netdev, 
>>>> const char *name,
>>>>
>>>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>>       opts = &netdev->u.vhost_vdpa;
>>>> -    if (!opts->vhostdev) {
>>>> -        error_setg(errp, "vdpa character device not specified with 
>>>> vhostdev");
>>>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>>>> +        error_setg(errp,
>>>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= was 
>>>> specified");
>>>>           return -1;
>>>>       }
>>>>
>>>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>> -    if (vdpa_device_fd == -1) {
>>>> -        return -errno;
>>>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>>>> +        error_setg(errp,
>>>> +                   "vhost-vdpa: vhostdev= and vhostfd= are 
>>>> mutually exclusive");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (opts->has_vhostdev) {
>>>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>> +        if (vdpa_device_fd == -1) {
>>>> +            return -errno;
>>>> +        }
>>>> +    } else if (opts->has_vhostfd) {
>>>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), 
>>>> opts->vhostfd, errp);
>>>> +        if (vdpa_device_fd == -1) {
>>>> +            error_prepend(errp, "vhost-vdpa: unable to parse 
>>>> vhostfd: ");
>>>> +            return -1;
>>>> +        }
>>>>       }
>>>>
>>>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
>>>> diff --git a/qapi/net.json b/qapi/net.json
>>>> index dd088c0..926ecc8 100644
>>>> --- a/qapi/net.json
>>>> +++ b/qapi/net.json
>>>> @@ -442,6 +442,8 @@
>>>>   # @vhostdev: path of vhost-vdpa device
>>>>   #            (default:'/dev/vhost-vdpa-0')
>>>>   #
>>>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
>>>> +#
>>>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
>>>>   #          (default: 1)
>>>>   #
>>>> @@ -456,6 +458,7 @@
>>>>   { 'struct': 'NetdevVhostVDPAOptions',
>>>>     'data': {
>>>>       '*vhostdev':     'str',
>>>> +    '*vhostfd':      'str',
>>>>       '*queues':       'int',
>>>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] 
>>>> } } }
>>>>
>>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>>> index 913c71e..c040f74 100644
>>>> --- a/qemu-options.hx
>>>> +++ b/qemu-options.hx
>>>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>>>       "                configure a vhost-user network, backed by a 
>>>> chardev 'dev'\n"
>>>>   #endif
>>>>   #ifdef __linux__
>>>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>>>> +    "-netdev vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>>>       "                configure a vhost-vdpa network,Establish a 
>>>> vhost-vdpa netdev\n"
>>>> +    "                use 'vhostdev=/path/to/dev' to open a vhost 
>>>> vdpa device\n"
>>>> +    "                use 'vhostfd=h' to connect to an already 
>>>> opened vhost vdpa device\n"
>>>>   #endif
>>>>   #ifdef CONFIG_VMNET
>>>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>>>> @@ -3280,7 +3282,7 @@ SRST
>>>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
>>>>                -device virtio-net-pci,netdev=net0
>>>>
>>>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>>>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>>>       Establish a vhost-vdpa netdev.
>>>>
>>>>       vDPA device is a device that uses a datapath which complies with
>>>> -- 
>>>> 1.8.3.1
>>>>
>>>>
>>
>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago
Hi Jason,

Sorry for top posting, but are you going to queue this patch? It looks 
like the discussion has been settled and no further comment I got for 2 
weeks for this patch.

Thanks,
-Siwei

On 10/13/2022 4:12 PM, Si-Wei Liu wrote:
> Jason,
>
> On 10/12/2022 10:02 PM, Jason Wang wrote:
>>
>> 在 2022/10/12 13:59, Si-Wei Liu 写道:
>>>
>>>
>>> On 10/11/2022 8:09 PM, Jason Wang wrote:
>>>> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com> 
>>>> wrote:
>>>>> On 10/8/2022 10:43 PM, Jason Wang wrote:
>>>>>
>>>>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com> 
>>>>> wrote:
>>>>>
>>>>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>>>>> backend as another parameter to instantiate vhost-vdpa net client.
>>>>> This would benefit the use case where only open file descriptors, as
>>>>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>>>>> process.
>>>>>
>>>>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>>>>>
>>>>> Adding Cindy.
>>>>>
>>>>> This has been discussed before, we've already had
>>>>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
>>>>> has been proposed here. (And this is how libvirt works if I 
>>>>> understand
>>>>> correctly).
>>>>>
>>>>> Yes, I was aware of that discussion. However, our implementation 
>>>>> of the management software is a bit different from libvirt, in 
>>>>> which the paths in /dev/fdset/NNN can't be dynamically passed to 
>>>>> the container where QEMU is running. By using a specific vhostfd 
>>>>> property with existing code, it would allow our mgmt software 
>>>>> smooth adaption without having to add too much infra code to 
>>>>> support the /dev/fdset/NNN trick.
>>>> I think fdset has extra flexibility in e.g hot-plug to allow the file
>>>> descriptor to be passed with SCM_RIGHTS.
>>> Yes, that's exactly the use case we'd like to support. Though the 
>>> difference in our mgmt software stack from libvirt is that any 
>>> dynamic path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ) 
>>> can't be allowed to get passed through to the container running QEMU 
>>> on the fly for security reasons. fd passing is allowed, though, with 
>>> very strict security checks.
>>
>>
>> Interesting, any reason for disallowing fd passing?
> For our mgmt software stack, QEMU is running in a secured container 
> with its own namespace(s) with minimally well known and trusted 
> devices from root ns exposed (only) at the time when QEMU is being 
> started.  Direct fd passing via SCM_RIGHTS is allowed, but fdset 
> device node exposure is not allowed and not even considered useful to 
> us, as it adds an unwarranted attack surface to the QEMU's secured 
> container unnecessarily. This has been the case and our security model 
> for a while now w.r.t hot plugging vhost-net/tap and vhost-scsi 
> devices, so will do for vhost-vdpa with vhostfd. It's not an open 
> source project, though what I can share is that it's not a simple 
> script that can be easily changed, and allow passing extra devices 
> e.g. fdset especially on the fly is not even in consideration per 
> suggested security guideline. I think we don't do anything special 
> here as with other secured containers that disallow dynamic device 
> injection on the fly.
>
>> I'm asking since it's the way that libvirt work and it seems to me we 
>> don't get any complaints in the past.
> I guess it was because libvirt doesn't run QEMU in a container with 
> very limited device exposure, otherwise this sort of constraints would 
> pop up. Anyway the point and the way I see it is that passing vhostfd 
> is proved to be working well and secure with other vhost devices, I 
> don't see why vhost-vdpa is treated special here that would need to 
> enforce the fdset usage. It's an edge case for libvirt maybe, but 
> supporting QEMU's vhost-vdpa device to run in a securely contained 
> environment with no dynamic device injection shouldn't be an odd or 
> bizarre use case.
>
>
> Thanks,
> -Siwei
>
>>
>>
>>> That's the main motivation for this direct vhostfd passing support 
>>> (noted fdset doesn't need to be used along with /dev/fdset node).
>>>
>>> Having it said, I found there's also nuance in the 
>>> vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation: 
>>> the fd to open has to be dup'ed from the original one passed via 
>>> SCM_RIGHTS. This also has implication on security that any ioctl 
>>> call from QEMU can't be audited through the original fd.
>>
>>
>> I'm not sure I get this, but management layer can enforce a ioctl 
>> whiltelist for safety.
>>
>> Thanks
>>
>>
>>> With this regard, I think vhostfd offers more flexibility than work 
>>> around those qemu_open() specifics. Would these justify the use case 
>>> of concern?
>>>
>>> Thanks,
>>> -Siwei
>>>
>>>>   It would still be good to add
>>>> the support.
>>>>
>>>>> On the other hand, the other vhost backends, e.g. tap (via 
>>>>> vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as 
>>>>> parameter to instantiate device, although the /dev/fdset trick 
>>>>> also works there. I think vhost-vdpa is not  unprecedented in this 
>>>>> case?
>>>> Yes.
>>>>
>>>> Thanks
>>>>
>>>>> Thanks,
>>>>> -Siwei
>>>>>
>>>>>
>>>>>
>>>>> Thanks
>>>>>
>>>>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>>>>
>>>>> ---
>>>>> v2:
>>>>>    - fixed typo in commit message
>>>>>    - s/fd's/file descriptors/
>>>>> ---
>>>>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>>>>   qapi/net.json    |  3 +++
>>>>>   qemu-options.hx  |  6 ++++--
>>>>>   3 files changed, 27 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>>> index 182b3a1..366b070 100644
>>>>> --- a/net/vhost-vdpa.c
>>>>> +++ b/net/vhost-vdpa.c
>>>>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev 
>>>>> *netdev, const char *name,
>>>>>
>>>>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>>>       opts = &netdev->u.vhost_vdpa;
>>>>> -    if (!opts->vhostdev) {
>>>>> -        error_setg(errp, "vdpa character device not specified 
>>>>> with vhostdev");
>>>>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>>>>> +        error_setg(errp,
>>>>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd= 
>>>>> was specified");
>>>>>           return -1;
>>>>>       }
>>>>>
>>>>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>>> -    if (vdpa_device_fd == -1) {
>>>>> -        return -errno;
>>>>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>>>>> +        error_setg(errp,
>>>>> +                   "vhost-vdpa: vhostdev= and vhostfd= are 
>>>>> mutually exclusive");
>>>>> +        return -1;
>>>>> +    }
>>>>> +
>>>>> +    if (opts->has_vhostdev) {
>>>>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>>> +        if (vdpa_device_fd == -1) {
>>>>> +            return -errno;
>>>>> +        }
>>>>> +    } else if (opts->has_vhostfd) {
>>>>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(), 
>>>>> opts->vhostfd, errp);
>>>>> +        if (vdpa_device_fd == -1) {
>>>>> +            error_prepend(errp, "vhost-vdpa: unable to parse 
>>>>> vhostfd: ");
>>>>> +            return -1;
>>>>> +        }
>>>>>       }
>>>>>
>>>>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
>>>>> diff --git a/qapi/net.json b/qapi/net.json
>>>>> index dd088c0..926ecc8 100644
>>>>> --- a/qapi/net.json
>>>>> +++ b/qapi/net.json
>>>>> @@ -442,6 +442,8 @@
>>>>>   # @vhostdev: path of vhost-vdpa device
>>>>>   #            (default:'/dev/vhost-vdpa-0')
>>>>>   #
>>>>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
>>>>> +#
>>>>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
>>>>>   #          (default: 1)
>>>>>   #
>>>>> @@ -456,6 +458,7 @@
>>>>>   { 'struct': 'NetdevVhostVDPAOptions',
>>>>>     'data': {
>>>>>       '*vhostdev':     'str',
>>>>> +    '*vhostfd':      'str',
>>>>>       '*queues':       'int',
>>>>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable'] 
>>>>> } } }
>>>>>
>>>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>>>> index 913c71e..c040f74 100644
>>>>> --- a/qemu-options.hx
>>>>> +++ b/qemu-options.hx
>>>>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>>>>       "                configure a vhost-user network, backed by a 
>>>>> chardev 'dev'\n"
>>>>>   #endif
>>>>>   #ifdef __linux__
>>>>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>>>>> +    "-netdev 
>>>>> vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>>>>       "                configure a vhost-vdpa network,Establish a 
>>>>> vhost-vdpa netdev\n"
>>>>> +    "                use 'vhostdev=/path/to/dev' to open a vhost 
>>>>> vdpa device\n"
>>>>> +    "                use 'vhostfd=h' to connect to an already 
>>>>> opened vhost vdpa device\n"
>>>>>   #endif
>>>>>   #ifdef CONFIG_VMNET
>>>>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>>>>> @@ -3280,7 +3282,7 @@ SRST
>>>>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
>>>>>                -device virtio-net-pci,netdev=net0
>>>>>
>>>>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>>>>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>>>>       Establish a vhost-vdpa netdev.
>>>>>
>>>>>       vDPA device is a device that uses a datapath which complies 
>>>>> with
>>>>> -- 
>>>>> 1.8.3.1
>>>>>
>>>>>
>>>
>>
>


Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Jason Wang 1 year, 6 months ago
On Fri, Oct 28, 2022 at 5:56 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Hi Jason,
>
> Sorry for top posting, but are you going to queue this patch? It looks
> like the discussion has been settled and no further comment I got for 2
> weeks for this patch.

Yes, I've queued this.

Thanks

>
> Thanks,
> -Siwei
>
> On 10/13/2022 4:12 PM, Si-Wei Liu wrote:
> > Jason,
> >
> > On 10/12/2022 10:02 PM, Jason Wang wrote:
> >>
> >> 在 2022/10/12 13:59, Si-Wei Liu 写道:
> >>>
> >>>
> >>> On 10/11/2022 8:09 PM, Jason Wang wrote:
> >>>> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com>
> >>>> wrote:
> >>>>> On 10/8/2022 10:43 PM, Jason Wang wrote:
> >>>>>
> >>>>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>
> >>>>> wrote:
> >>>>>
> >>>>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
> >>>>> backend as another parameter to instantiate vhost-vdpa net client.
> >>>>> This would benefit the use case where only open file descriptors, as
> >>>>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
> >>>>> process.
> >>>>>
> >>>>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
> >>>>>
> >>>>> Adding Cindy.
> >>>>>
> >>>>> This has been discussed before, we've already had
> >>>>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
> >>>>> has been proposed here. (And this is how libvirt works if I
> >>>>> understand
> >>>>> correctly).
> >>>>>
> >>>>> Yes, I was aware of that discussion. However, our implementation
> >>>>> of the management software is a bit different from libvirt, in
> >>>>> which the paths in /dev/fdset/NNN can't be dynamically passed to
> >>>>> the container where QEMU is running. By using a specific vhostfd
> >>>>> property with existing code, it would allow our mgmt software
> >>>>> smooth adaption without having to add too much infra code to
> >>>>> support the /dev/fdset/NNN trick.
> >>>> I think fdset has extra flexibility in e.g hot-plug to allow the file
> >>>> descriptor to be passed with SCM_RIGHTS.
> >>> Yes, that's exactly the use case we'd like to support. Though the
> >>> difference in our mgmt software stack from libvirt is that any
> >>> dynamic path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ)
> >>> can't be allowed to get passed through to the container running QEMU
> >>> on the fly for security reasons. fd passing is allowed, though, with
> >>> very strict security checks.
> >>
> >>
> >> Interesting, any reason for disallowing fd passing?
> > For our mgmt software stack, QEMU is running in a secured container
> > with its own namespace(s) with minimally well known and trusted
> > devices from root ns exposed (only) at the time when QEMU is being
> > started.  Direct fd passing via SCM_RIGHTS is allowed, but fdset
> > device node exposure is not allowed and not even considered useful to
> > us, as it adds an unwarranted attack surface to the QEMU's secured
> > container unnecessarily. This has been the case and our security model
> > for a while now w.r.t hot plugging vhost-net/tap and vhost-scsi
> > devices, so will do for vhost-vdpa with vhostfd. It's not an open
> > source project, though what I can share is that it's not a simple
> > script that can be easily changed, and allow passing extra devices
> > e.g. fdset especially on the fly is not even in consideration per
> > suggested security guideline. I think we don't do anything special
> > here as with other secured containers that disallow dynamic device
> > injection on the fly.
> >
> >> I'm asking since it's the way that libvirt work and it seems to me we
> >> don't get any complaints in the past.
> > I guess it was because libvirt doesn't run QEMU in a container with
> > very limited device exposure, otherwise this sort of constraints would
> > pop up. Anyway the point and the way I see it is that passing vhostfd
> > is proved to be working well and secure with other vhost devices, I
> > don't see why vhost-vdpa is treated special here that would need to
> > enforce the fdset usage. It's an edge case for libvirt maybe, but
> > supporting QEMU's vhost-vdpa device to run in a securely contained
> > environment with no dynamic device injection shouldn't be an odd or
> > bizarre use case.
> >
> >
> > Thanks,
> > -Siwei
> >
> >>
> >>
> >>> That's the main motivation for this direct vhostfd passing support
> >>> (noted fdset doesn't need to be used along with /dev/fdset node).
> >>>
> >>> Having it said, I found there's also nuance in the
> >>> vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation:
> >>> the fd to open has to be dup'ed from the original one passed via
> >>> SCM_RIGHTS. This also has implication on security that any ioctl
> >>> call from QEMU can't be audited through the original fd.
> >>
> >>
> >> I'm not sure I get this, but management layer can enforce a ioctl
> >> whiltelist for safety.
> >>
> >> Thanks
> >>
> >>
> >>> With this regard, I think vhostfd offers more flexibility than work
> >>> around those qemu_open() specifics. Would these justify the use case
> >>> of concern?
> >>>
> >>> Thanks,
> >>> -Siwei
> >>>
> >>>>   It would still be good to add
> >>>> the support.
> >>>>
> >>>>> On the other hand, the other vhost backends, e.g. tap (via
> >>>>> vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as
> >>>>> parameter to instantiate device, although the /dev/fdset trick
> >>>>> also works there. I think vhost-vdpa is not  unprecedented in this
> >>>>> case?
> >>>> Yes.
> >>>>
> >>>> Thanks
> >>>>
> >>>>> Thanks,
> >>>>> -Siwei
> >>>>>
> >>>>>
> >>>>>
> >>>>> Thanks
> >>>>>
> >>>>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
> >>>>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
> >>>>>
> >>>>> ---
> >>>>> v2:
> >>>>>    - fixed typo in commit message
> >>>>>    - s/fd's/file descriptors/
> >>>>> ---
> >>>>>   net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
> >>>>>   qapi/net.json    |  3 +++
> >>>>>   qemu-options.hx  |  6 ++++--
> >>>>>   3 files changed, 27 insertions(+), 7 deletions(-)
> >>>>>
> >>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> >>>>> index 182b3a1..366b070 100644
> >>>>> --- a/net/vhost-vdpa.c
> >>>>> +++ b/net/vhost-vdpa.c
> >>>>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev
> >>>>> *netdev, const char *name,
> >>>>>
> >>>>>       assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
> >>>>>       opts = &netdev->u.vhost_vdpa;
> >>>>> -    if (!opts->vhostdev) {
> >>>>> -        error_setg(errp, "vdpa character device not specified
> >>>>> with vhostdev");
> >>>>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
> >>>>> +        error_setg(errp,
> >>>>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd=
> >>>>> was specified");
> >>>>>           return -1;
> >>>>>       }
> >>>>>
> >>>>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> >>>>> -    if (vdpa_device_fd == -1) {
> >>>>> -        return -errno;
> >>>>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
> >>>>> +        error_setg(errp,
> >>>>> +                   "vhost-vdpa: vhostdev= and vhostfd= are
> >>>>> mutually exclusive");
> >>>>> +        return -1;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (opts->has_vhostdev) {
> >>>>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
> >>>>> +        if (vdpa_device_fd == -1) {
> >>>>> +            return -errno;
> >>>>> +        }
> >>>>> +    } else if (opts->has_vhostfd) {
> >>>>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(),
> >>>>> opts->vhostfd, errp);
> >>>>> +        if (vdpa_device_fd == -1) {
> >>>>> +            error_prepend(errp, "vhost-vdpa: unable to parse
> >>>>> vhostfd: ");
> >>>>> +            return -1;
> >>>>> +        }
> >>>>>       }
> >>>>>
> >>>>>       r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
> >>>>> diff --git a/qapi/net.json b/qapi/net.json
> >>>>> index dd088c0..926ecc8 100644
> >>>>> --- a/qapi/net.json
> >>>>> +++ b/qapi/net.json
> >>>>> @@ -442,6 +442,8 @@
> >>>>>   # @vhostdev: path of vhost-vdpa device
> >>>>>   #            (default:'/dev/vhost-vdpa-0')
> >>>>>   #
> >>>>> +# @vhostfd: file descriptor of an already opened vhost vdpa device
> >>>>> +#
> >>>>>   # @queues: number of queues to be created for multiqueue vhost-vdpa
> >>>>>   #          (default: 1)
> >>>>>   #
> >>>>> @@ -456,6 +458,7 @@
> >>>>>   { 'struct': 'NetdevVhostVDPAOptions',
> >>>>>     'data': {
> >>>>>       '*vhostdev':     'str',
> >>>>> +    '*vhostfd':      'str',
> >>>>>       '*queues':       'int',
> >>>>>       '*x-svq':        {'type': 'bool', 'features' : [ 'unstable']
> >>>>> } } }
> >>>>>
> >>>>> diff --git a/qemu-options.hx b/qemu-options.hx
> >>>>> index 913c71e..c040f74 100644
> >>>>> --- a/qemu-options.hx
> >>>>> +++ b/qemu-options.hx
> >>>>> @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
> >>>>>       "                configure a vhost-user network, backed by a
> >>>>> chardev 'dev'\n"
> >>>>>   #endif
> >>>>>   #ifdef __linux__
> >>>>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
> >>>>> +    "-netdev
> >>>>> vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
> >>>>>       "                configure a vhost-vdpa network,Establish a
> >>>>> vhost-vdpa netdev\n"
> >>>>> +    "                use 'vhostdev=/path/to/dev' to open a vhost
> >>>>> vdpa device\n"
> >>>>> +    "                use 'vhostfd=h' to connect to an already
> >>>>> opened vhost vdpa device\n"
> >>>>>   #endif
> >>>>>   #ifdef CONFIG_VMNET
> >>>>>       "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
> >>>>> @@ -3280,7 +3282,7 @@ SRST
> >>>>>                -netdev type=vhost-user,id=net0,chardev=chr0 \
> >>>>>                -device virtio-net-pci,netdev=net0
> >>>>>
> >>>>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
> >>>>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
> >>>>>       Establish a vhost-vdpa netdev.
> >>>>>
> >>>>>       vDPA device is a device that uses a datapath which complies
> >>>>> with
> >>>>> --
> >>>>> 1.8.3.1
> >>>>>
> >>>>>
> >>>
> >>
> >
>
Re: [PATCH v2] vhost-vdpa: allow passing opened vhostfd to vhost-vdpa
Posted by Si-Wei Liu 1 year, 6 months ago

On 10/27/2022 6:50 PM, Jason Wang wrote:
> On Fri, Oct 28, 2022 at 5:56 AM Si-Wei Liu<si-wei.liu@oracle.com>  wrote:
>> Hi Jason,
>>
>> Sorry for top posting, but are you going to queue this patch? It looks
>> like the discussion has been settled and no further comment I got for 2
>> weeks for this patch.
> Yes, I've queued this.
Excellent, thanks Jason. I see it gets pulled.

-Siwei
>
> Thanks
>
>> Thanks,
>> -Siwei
>>
>> On 10/13/2022 4:12 PM, Si-Wei Liu wrote:
>>> Jason,
>>>
>>> On 10/12/2022 10:02 PM, Jason Wang wrote:
>>>> 在 2022/10/12 13:59, Si-Wei Liu 写道:
>>>>>
>>>>> On 10/11/2022 8:09 PM, Jason Wang wrote:
>>>>>> On Tue, Oct 11, 2022 at 1:18 AM Si-Wei Liu<si-wei.liu@oracle.com>
>>>>>> wrote:
>>>>>>> On 10/8/2022 10:43 PM, Jason Wang wrote:
>>>>>>>
>>>>>>> On Sat, Oct 8, 2022 at 5:04 PM Si-Wei Liu<si-wei.liu@oracle.com>
>>>>>>> wrote:
>>>>>>>
>>>>>>> Similar to other vhost backends, vhostfd can be passed to vhost-vdpa
>>>>>>> backend as another parameter to instantiate vhost-vdpa net client.
>>>>>>> This would benefit the use case where only open file descriptors, as
>>>>>>> opposed to raw vhost-vdpa device paths, are accessible from the QEMU
>>>>>>> process.
>>>>>>>
>>>>>>> (qemu) netdev_add type=vhost-vdpa,vhostfd=61,id=vhost-vdpa1
>>>>>>>
>>>>>>> Adding Cindy.
>>>>>>>
>>>>>>> This has been discussed before, we've already had
>>>>>>> vhostdev=/dev/fdset/$fd which should be functional equivalent to what
>>>>>>> has been proposed here. (And this is how libvirt works if I
>>>>>>> understand
>>>>>>> correctly).
>>>>>>>
>>>>>>> Yes, I was aware of that discussion. However, our implementation
>>>>>>> of the management software is a bit different from libvirt, in
>>>>>>> which the paths in /dev/fdset/NNN can't be dynamically passed to
>>>>>>> the container where QEMU is running. By using a specific vhostfd
>>>>>>> property with existing code, it would allow our mgmt software
>>>>>>> smooth adaption without having to add too much infra code to
>>>>>>> support the /dev/fdset/NNN trick.
>>>>>> I think fdset has extra flexibility in e.g hot-plug to allow the file
>>>>>> descriptor to be passed with SCM_RIGHTS.
>>>>> Yes, that's exactly the use case we'd like to support. Though the
>>>>> difference in our mgmt software stack from libvirt is that any
>>>>> dynamic path in /dev (like /dev/fdset/ABC or /dev/vhost-vdpa-XYZ)
>>>>> can't be allowed to get passed through to the container running QEMU
>>>>> on the fly for security reasons. fd passing is allowed, though, with
>>>>> very strict security checks.
>>>>
>>>> Interesting, any reason for disallowing fd passing?
>>> For our mgmt software stack, QEMU is running in a secured container
>>> with its own namespace(s) with minimally well known and trusted
>>> devices from root ns exposed (only) at the time when QEMU is being
>>> started.  Direct fd passing via SCM_RIGHTS is allowed, but fdset
>>> device node exposure is not allowed and not even considered useful to
>>> us, as it adds an unwarranted attack surface to the QEMU's secured
>>> container unnecessarily. This has been the case and our security model
>>> for a while now w.r.t hot plugging vhost-net/tap and vhost-scsi
>>> devices, so will do for vhost-vdpa with vhostfd. It's not an open
>>> source project, though what I can share is that it's not a simple
>>> script that can be easily changed, and allow passing extra devices
>>> e.g. fdset especially on the fly is not even in consideration per
>>> suggested security guideline. I think we don't do anything special
>>> here as with other secured containers that disallow dynamic device
>>> injection on the fly.
>>>
>>>> I'm asking since it's the way that libvirt work and it seems to me we
>>>> don't get any complaints in the past.
>>> I guess it was because libvirt doesn't run QEMU in a container with
>>> very limited device exposure, otherwise this sort of constraints would
>>> pop up. Anyway the point and the way I see it is that passing vhostfd
>>> is proved to be working well and secure with other vhost devices, I
>>> don't see why vhost-vdpa is treated special here that would need to
>>> enforce the fdset usage. It's an edge case for libvirt maybe, but
>>> supporting QEMU's vhost-vdpa device to run in a securely contained
>>> environment with no dynamic device injection shouldn't be an odd or
>>> bizarre use case.
>>>
>>>
>>> Thanks,
>>> -Siwei
>>>
>>>>
>>>>> That's the main motivation for this direct vhostfd passing support
>>>>> (noted fdset doesn't need to be used along with /dev/fdset node).
>>>>>
>>>>> Having it said, I found there's also nuance in the
>>>>> vhostdev=/dev/fdset/XyZ interface besides the /dev node limitation:
>>>>> the fd to open has to be dup'ed from the original one passed via
>>>>> SCM_RIGHTS. This also has implication on security that any ioctl
>>>>> call from QEMU can't be audited through the original fd.
>>>>
>>>> I'm not sure I get this, but management layer can enforce a ioctl
>>>> whiltelist for safety.
>>>>
>>>> Thanks
>>>>
>>>>
>>>>> With this regard, I think vhostfd offers more flexibility than work
>>>>> around those qemu_open() specifics. Would these justify the use case
>>>>> of concern?
>>>>>
>>>>> Thanks,
>>>>> -Siwei
>>>>>
>>>>>>    It would still be good to add
>>>>>> the support.
>>>>>>
>>>>>>> On the other hand, the other vhost backends, e.g. tap (via
>>>>>>> vhost-net), vhost-scsi and vhost-vsock all accept vhostfd as
>>>>>>> parameter to instantiate device, although the /dev/fdset trick
>>>>>>> also works there. I think vhost-vdpa is not  unprecedented in this
>>>>>>> case?
>>>>>> Yes.
>>>>>>
>>>>>> Thanks
>>>>>>
>>>>>>> Thanks,
>>>>>>> -Siwei
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>>> Signed-off-by: Si-Wei Liu<si-wei.liu@oracle.com>
>>>>>>> Acked-by: Eugenio Pérez<eperezma@redhat.com>
>>>>>>>
>>>>>>> ---
>>>>>>> v2:
>>>>>>>     - fixed typo in commit message
>>>>>>>     - s/fd's/file descriptors/
>>>>>>> ---
>>>>>>>    net/vhost-vdpa.c | 25 ++++++++++++++++++++-----
>>>>>>>    qapi/net.json    |  3 +++
>>>>>>>    qemu-options.hx  |  6 ++++--
>>>>>>>    3 files changed, 27 insertions(+), 7 deletions(-)
>>>>>>>
>>>>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>>>>> index 182b3a1..366b070 100644
>>>>>>> --- a/net/vhost-vdpa.c
>>>>>>> +++ b/net/vhost-vdpa.c
>>>>>>> @@ -683,14 +683,29 @@ int net_init_vhost_vdpa(const Netdev
>>>>>>> *netdev, const char *name,
>>>>>>>
>>>>>>>        assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
>>>>>>>        opts = &netdev->u.vhost_vdpa;
>>>>>>> -    if (!opts->vhostdev) {
>>>>>>> -        error_setg(errp, "vdpa character device not specified
>>>>>>> with vhostdev");
>>>>>>> +    if (!opts->has_vhostdev && !opts->has_vhostfd) {
>>>>>>> +        error_setg(errp,
>>>>>>> +                   "vhost-vdpa: neither vhostdev= nor vhostfd=
>>>>>>> was specified");
>>>>>>>            return -1;
>>>>>>>        }
>>>>>>>
>>>>>>> -    vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>>>>> -    if (vdpa_device_fd == -1) {
>>>>>>> -        return -errno;
>>>>>>> +    if (opts->has_vhostdev && opts->has_vhostfd) {
>>>>>>> +        error_setg(errp,
>>>>>>> +                   "vhost-vdpa: vhostdev= and vhostfd= are
>>>>>>> mutually exclusive");
>>>>>>> +        return -1;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (opts->has_vhostdev) {
>>>>>>> +        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
>>>>>>> +        if (vdpa_device_fd == -1) {
>>>>>>> +            return -errno;
>>>>>>> +        }
>>>>>>> +    } else if (opts->has_vhostfd) {
>>>>>>> +        vdpa_device_fd = monitor_fd_param(monitor_cur(),
>>>>>>> opts->vhostfd, errp);
>>>>>>> +        if (vdpa_device_fd == -1) {
>>>>>>> +            error_prepend(errp, "vhost-vdpa: unable to parse
>>>>>>> vhostfd:"); + return -1; + } } r = 
>>>>>>> vhost_vdpa_get_features(vdpa_device_fd, &features, errp); diff 
>>>>>>> --git a/qapi/net.json b/qapi/net.json index dd088c0..926ecc8 
>>>>>>> 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -442,6 +442,8 
>>>>>>> @@ # @vhostdev: path of vhost-vdpa device # 
>>>>>>> (default:'/dev/vhost-vdpa-0') # +# @vhostfd: file descriptor of 
>>>>>>> an already opened vhost vdpa device +# # @queues: number of 
>>>>>>> queues to be created for multiqueue vhost-vdpa # (default: 1) # 
>>>>>>> @@ -456,6 +458,7 @@ { 'struct': 'NetdevVhostVDPAOptions', 
>>>>>>> 'data': { '*vhostdev': 'str', + '*vhostfd': 'str', '*queues': 
>>>>>>> 'int', '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } 
>>>>>>> } diff --git a/qemu-options.hx b/qemu-options.hx index 
>>>>>>> 913c71e..c040f74 100644 --- a/qemu-options.hx +++ 
>>>>>>> b/qemu-options.hx @@ -2774,8 +2774,10 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
>>>>>>>        "                configure a vhost-user network, backed by a
>>>>>>> chardev 'dev'\n"
>>>>>>>    #endif
>>>>>>>    #ifdef __linux__
>>>>>>> -    "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
>>>>>>> +    "-netdev
>>>>>>> vhost-vdpa,id=str[,vhostdev=/path/to/dev][,vhostfd=h]\n"
>>>>>>>        "                configure a vhost-vdpa network,Establish a
>>>>>>> vhost-vdpa netdev\n"
>>>>>>> +    "                use 'vhostdev=/path/to/dev' to open a vhost
>>>>>>> vdpa device\n"
>>>>>>> +    "                use 'vhostfd=h' to connect to an already
>>>>>>> opened vhost vdpa device\n"
>>>>>>>    #endif
>>>>>>>    #ifdef CONFIG_VMNET
>>>>>>>        "-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
>>>>>>> @@ -3280,7 +3282,7 @@ SRST
>>>>>>>                 -netdev type=vhost-user,id=net0,chardev=chr0 \
>>>>>>>                 -device virtio-net-pci,netdev=net0
>>>>>>>
>>>>>>> -``-netdev vhost-vdpa,vhostdev=/path/to/dev``
>>>>>>> +``-netdev vhost-vdpa[,vhostdev=/path/to/dev][,vhostfd=h]``
>>>>>>>        Establish a vhost-vdpa netdev.
>>>>>>>
>>>>>>>        vDPA device is a device that uses a datapath which complies
>>>>>>> with
>>>>>>> --
>>>>>>> 1.8.3.1
>>>>>>>
>>>>>>>