[PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping

Dr. David Alan Gilbert (git) posted 24 patches 4 years, 12 months ago
Maintainers: Gerd Hoffmann <kraxel@redhat.com>, Coiby Xu <Coiby.Xu@gmail.com>, "Michael S. Tsirkin" <mst@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Raphael Norwitz <raphael.norwitz@nutanix.com>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Kevin Wolf <kwolf@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Max Reitz <mreitz@redhat.com>
There is a newer version of this series
[PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Dr. David Alan Gilbert (git) 4 years, 12 months ago
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

The daemon may request that fd's be mapped into the virtio-fs cache
visible to the guest.
These mappings are triggered by commands sent over the slave fd
from the daemon.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 docs/interop/vhost-user.rst               | 20 +++++++++++++++++++
 hw/virtio/vhost-user-fs.c                 | 14 +++++++++++++
 hw/virtio/vhost-user.c                    | 14 +++++++++++++
 include/hw/virtio/vhost-user-fs.h         | 24 +++++++++++++++++++++++
 subprojects/libvhost-user/libvhost-user.h |  2 ++
 5 files changed, 74 insertions(+)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index d6085f7045..1deedd3407 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -1432,6 +1432,26 @@ Slave message types
 
   The state.num field is currently reserved and must be set to 0.
 
+``VHOST_USER_SLAVE_FS_MAP``
+  :id: 6
+  :equivalent ioctl: N/A
+  :slave payload: fd + n * (offset + address + len)
+  :master payload: N/A
+
+  Requests that the QEMU mmap the given fd into the virtio-fs cache;
+  multiple chunks can be mapped in one command.
+  A reply is generated indicating whether mapping succeeded.
+
+``VHOST_USER_SLAVE_FS_UNMAP``
+  :id: 7
+  :equivalent ioctl: N/A
+  :slave payload: n * (address + len)
+  :master payload: N/A
+
+  Requests that the QEMU un-mmap the given range in the virtio-fs cache;
+  multiple chunks can be unmapped in one command.
+  A reply is generated indicating whether unmapping succeeded.
+
 .. _reply_ack:
 
 VHOST_USER_PROTOCOL_F_REPLY_ACK
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index b077d8e705..78401d2ff1 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -34,6 +34,20 @@
 #define DAX_WINDOW_PROT PROT_NONE
 #endif
 
+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
+                                 int fd)
+{
+    /* TODO */
+    return (uint64_t)-1;
+}
+
+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev,
+                                   VhostUserFSSlaveMsg *sm)
+{
+    /* TODO */
+    return (uint64_t)-1;
+}
+
 static void vuf_get_config(VirtIODevice *vdev, uint8_t *config)
 {
     VHostUserFS *fs = VHOST_USER_FS(vdev);
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 13789cc55e..21e40ff91a 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -12,6 +12,7 @@
 #include "qapi/error.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
+#include "hw/virtio/vhost-user-fs.h"
 #include "hw/virtio/vhost-backend.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-net.h"
@@ -132,6 +133,10 @@ typedef enum VhostUserSlaveRequest {
     VHOST_USER_SLAVE_IOTLB_MSG = 1,
     VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2,
     VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
+    VHOST_USER_SLAVE_VRING_CALL = 4,
+    VHOST_USER_SLAVE_VRING_ERR = 5,
+    VHOST_USER_SLAVE_FS_MAP = 6,
+    VHOST_USER_SLAVE_FS_UNMAP = 7,
     VHOST_USER_SLAVE_MAX
 }  VhostUserSlaveRequest;
 
@@ -218,6 +223,7 @@ typedef union {
         VhostUserCryptoSession session;
         VhostUserVringArea area;
         VhostUserInflight inflight;
+        VhostUserFSSlaveMsg fs;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
@@ -1472,6 +1478,14 @@ static void slave_read(void *opaque)
         ret = vhost_user_slave_handle_vring_host_notifier(dev, &payload.area,
                                                           fd[0]);
         break;
+#ifdef CONFIG_VHOST_USER_FS
+    case VHOST_USER_SLAVE_FS_MAP:
+        ret = vhost_user_fs_slave_map(dev, &payload.fs, fd[0]);
+        break;
+    case VHOST_USER_SLAVE_FS_UNMAP:
+        ret = vhost_user_fs_slave_unmap(dev, &payload.fs);
+        break;
+#endif
     default:
         error_report("Received unexpected msg type: %d.", hdr.request);
         ret = (uint64_t)-EINVAL;
diff --git a/include/hw/virtio/vhost-user-fs.h b/include/hw/virtio/vhost-user-fs.h
index 04596799e3..25e14ab17a 100644
--- a/include/hw/virtio/vhost-user-fs.h
+++ b/include/hw/virtio/vhost-user-fs.h
@@ -23,6 +23,24 @@
 #define TYPE_VHOST_USER_FS "vhost-user-fs-device"
 OBJECT_DECLARE_SIMPLE_TYPE(VHostUserFS, VHOST_USER_FS)
 
+/* Structures carried over the slave channel back to QEMU */
+#define VHOST_USER_FS_SLAVE_ENTRIES 8
+
+/* For the flags field of VhostUserFSSlaveMsg */
+#define VHOST_USER_FS_FLAG_MAP_R (1ull << 0)
+#define VHOST_USER_FS_FLAG_MAP_W (1ull << 1)
+
+typedef struct {
+    /* Offsets within the file being mapped */
+    uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
+    /* Offsets within the cache */
+    uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
+    /* Lengths of sections */
+    uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
+    /* Flags, from VHOST_USER_FS_FLAG_* */
+    uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
+} VhostUserFSSlaveMsg;
+
 typedef struct {
     CharBackend chardev;
     char *tag;
@@ -46,4 +64,10 @@ struct VHostUserFS {
     MemoryRegion cache;
 };
 
+/* Callbacks from the vhost-user code for slave commands */
+uint64_t vhost_user_fs_slave_map(struct vhost_dev *dev, VhostUserFSSlaveMsg *sm,
+                                 int fd);
+uint64_t vhost_user_fs_slave_unmap(struct vhost_dev *dev,
+                                   VhostUserFSSlaveMsg *sm);
+
 #endif /* _QEMU_VHOST_USER_FS_H */
diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h
index e12e9c1532..150b1121cc 100644
--- a/subprojects/libvhost-user/libvhost-user.h
+++ b/subprojects/libvhost-user/libvhost-user.h
@@ -119,6 +119,8 @@ typedef enum VhostUserSlaveRequest {
     VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3,
     VHOST_USER_SLAVE_VRING_CALL = 4,
     VHOST_USER_SLAVE_VRING_ERR = 5,
+    VHOST_USER_SLAVE_FS_MAP = 6,
+    VHOST_USER_SLAVE_FS_UNMAP = 7,
     VHOST_USER_SLAVE_MAX
 }  VhostUserSlaveRequest;
 
-- 
2.29.2


Re: [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Stefan Hajnoczi 4 years, 12 months ago
On Tue, Feb 09, 2021 at 07:02:07PM +0000, Dr. David Alan Gilbert (git) wrote:
> diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> index d6085f7045..1deedd3407 100644
> --- a/docs/interop/vhost-user.rst
> +++ b/docs/interop/vhost-user.rst
> @@ -1432,6 +1432,26 @@ Slave message types
>  
>    The state.num field is currently reserved and must be set to 0.
>  
> +``VHOST_USER_SLAVE_FS_MAP``
> +  :id: 6
> +  :equivalent ioctl: N/A
> +  :slave payload: fd + n * (offset + address + len)

I'm not sure I understand this notation. '+' means field concatenation?
Is 'fd' a field or does it indicate file descriptor passing?

I suggest using a struct name instead of informal notation so that the
payload size and representation is clear.

The same applies for VHOST_USER_SLAVE_FS_UNMAP.

> +  :master payload: N/A
> +
> +  Requests that the QEMU mmap the given fd into the virtio-fs cache;

s/QEMU mmap the given fd/given fd be mmapped/

Please avoid mentioning QEMU specifically. Any VMM should be able to
implement this spec.

The same applies for VHOST_USER_SLAVE_FS_UNMAP.

> +  multiple chunks can be mapped in one command.
> +  A reply is generated indicating whether mapping succeeded.
> +
> +``VHOST_USER_SLAVE_FS_UNMAP``
> +  :id: 7
> +  :equivalent ioctl: N/A
> +  :slave payload: n * (address + len)
> +  :master payload: N/A
> +
> +  Requests that the QEMU un-mmap the given range in the virtio-fs cache;
> +  multiple chunks can be unmapped in one command.
> +  A reply is generated indicating whether unmapping succeeded.
Re: [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Dr. David Alan Gilbert 4 years, 11 months ago
* Stefan Hajnoczi (stefanha@redhat.com) wrote:
> On Tue, Feb 09, 2021 at 07:02:07PM +0000, Dr. David Alan Gilbert (git) wrote:
> > diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
> > index d6085f7045..1deedd3407 100644
> > --- a/docs/interop/vhost-user.rst
> > +++ b/docs/interop/vhost-user.rst
> > @@ -1432,6 +1432,26 @@ Slave message types
> >  
> >    The state.num field is currently reserved and must be set to 0.
> >  
> > +``VHOST_USER_SLAVE_FS_MAP``
> > +  :id: 6
> > +  :equivalent ioctl: N/A
> > +  :slave payload: fd + n * (offset + address + len)
> 
> I'm not sure I understand this notation. '+' means field concatenation?
> Is 'fd' a field or does it indicate file descriptor passing?
> 
> I suggest using a struct name instead of informal notation so that the
> payload size and representation is clear.
> 
> The same applies for VHOST_USER_SLAVE_FS_UNMAP.
> 
> > +  :master payload: N/A
> > +
> > +  Requests that the QEMU mmap the given fd into the virtio-fs cache;
> 
> s/QEMU mmap the given fd/given fd be mmapped/
> 
> Please avoid mentioning QEMU specifically. Any VMM should be able to
> implement this spec.
> 
> The same applies for VHOST_USER_SLAVE_FS_UNMAP.

OK, I've changed this to:

+``VHOST_USER_SLAVE_FS_MAP``
+  :id: 6
+  :equivalent ioctl: N/A
+  :slave payload: ``struct VhostUserFSSlaveMsg``
+  :master payload: N/A
+
+  Requests that an fd, provided in the ancillary data, be mmapped
+  into the virtio-fs cache; multiple chunks can be mapped in one
+  command.
+  A reply is generated indicating whether mapping succeeded.
+
+``VHOST_USER_SLAVE_FS_UNMAP``
+  :id: 7
+  :equivalent ioctl: N/A
+  :slave payload: ``struct VhostUserFSSlaveMsg``
+  :master payload: N/A
+
+  Requests that the range in the virtio-fs cache is unmapped;
+  multiple chunks can be unmapped in one command.
+  A reply is generated indicating whether unmapping succeeded.
+

(Although it'll get a little more complicated as I rework for
Chirantan's comment)

Dave

> > +  multiple chunks can be mapped in one command.
> > +  A reply is generated indicating whether mapping succeeded.
> > +
> > +``VHOST_USER_SLAVE_FS_UNMAP``
> > +  :id: 7
> > +  :equivalent ioctl: N/A
> > +  :slave payload: n * (address + len)
> > +  :master payload: N/A
> > +
> > +  Requests that the QEMU un-mmap the given range in the virtio-fs cache;
> > +  multiple chunks can be unmapped in one command.
> > +  A reply is generated indicating whether unmapping succeeded.


-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


Re: [Virtio-fs] [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Chirantan Ekbote 4 years, 11 months ago
On Wed, Feb 10, 2021 at 4:04 AM Dr. David Alan Gilbert (git)
<dgilbert@redhat.com> wrote:
>
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> +
> +typedef struct {
> +    /* Offsets within the file being mapped */
> +    uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> +    /* Offsets within the cache */
> +    uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> +    /* Lengths of sections */
> +    uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
> +    /* Flags, from VHOST_USER_FS_FLAG_* */
> +    uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
> +} VhostUserFSSlaveMsg;
> +

Is it too late to change this?  This struct allocates space for up to
8 entries but most of the time the server will only try to set up one
mapping at a time so only 32 out of the 256 bytes in the message are
actually being used.  We're just wasting time memcpy'ing bytes that
will never be used.  Is there some reason this can't be dynamically
sized?  Something like:

typedef struct {
    /* Number of mapping requests */
    uint16_t num_requests;
    /* `num_requests` mapping requests */
   MappingRequest requests[];
} VhostUserFSSlaveMsg;

typedef struct {
    /* Offset within the file being mapped */
    uint64_t fd_offset;
    /* Offset within the cache */
    uint64_t c_offset;
    /* Length of section */
    uint64_t len;
    /* Flags, from VHOST_USER_FS_FLAG_* */
    uint64_t flags;
} MappingRequest;

The current pre-allocated structure both wastes space when there are
fewer than 8 requests and requires extra messages to be sent when
there are more than 8 requests.  I realize that in the grand scheme of
things copying 224 extra bytes is basically not noticeable but it just
irks me that we could fix this really easily before it gets propagated
to too many other places.

Chirantan

> --
> 2.29.2
>
> _______________________________________________
> Virtio-fs mailing list
> Virtio-fs@redhat.com
> https://www.redhat.com/mailman/listinfo/virtio-fs
>

Re: [Virtio-fs] [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Dr. David Alan Gilbert 4 years, 11 months ago
* Chirantan Ekbote (chirantan@chromium.org) wrote:
> On Wed, Feb 10, 2021 at 4:04 AM Dr. David Alan Gilbert (git)
> <dgilbert@redhat.com> wrote:
> >
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > +
> > +typedef struct {
> > +    /* Offsets within the file being mapped */
> > +    uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Offsets within the cache */
> > +    uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Lengths of sections */
> > +    uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Flags, from VHOST_USER_FS_FLAG_* */
> > +    uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
> > +} VhostUserFSSlaveMsg;
> > +
> 
> Is it too late to change this?

No; this is a message defined as part of this series so still up for
review.  It's not guest visible; just on the vhist-user pipe.

> This struct allocates space for up to
> 8 entries but most of the time the server will only try to set up one
> mapping at a time so only 32 out of the 256 bytes in the message are
> actually being used.  We're just wasting time memcpy'ing bytes that
> will never be used.  Is there some reason this can't be dynamically
> sized?  Something like:
> 
> typedef struct {
>     /* Number of mapping requests */
>     uint16_t num_requests;
>     /* `num_requests` mapping requests */
>    MappingRequest requests[];
> } VhostUserFSSlaveMsg;
> 
> typedef struct {
>     /* Offset within the file being mapped */
>     uint64_t fd_offset;
>     /* Offset within the cache */
>     uint64_t c_offset;
>     /* Length of section */
>     uint64_t len;
>     /* Flags, from VHOST_USER_FS_FLAG_* */
>     uint64_t flags;
> } MappingRequest;
> 
> The current pre-allocated structure both wastes space when there are
> fewer than 8 requests and requires extra messages to be sent when
> there are more than 8 requests.  I realize that in the grand scheme of
> things copying 224 extra bytes is basically not noticeable but it just
> irks me that we could fix this really easily before it gets propagated
> to too many other places.

Sure; I'll have a look.  I think at the moment the only
more-than-one-entry case is the remove mapping case.

Dave

> Chirantan
> 
> > --
> > 2.29.2
> >
> > _______________________________________________
> > Virtio-fs mailing list
> > Virtio-fs@redhat.com
> > https://www.redhat.com/mailman/listinfo/virtio-fs
> >
> 
-- 
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK


Re: [Virtio-fs] [PATCH 07/24] DAX: virtio-fs: Add vhost-user slave commands for mapping
Posted by Vivek Goyal 4 years, 11 months ago
On Mon, Feb 15, 2021 at 07:35:53PM +0900, Chirantan Ekbote wrote:
> On Wed, Feb 10, 2021 at 4:04 AM Dr. David Alan Gilbert (git)
> <dgilbert@redhat.com> wrote:
> >
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > +
> > +typedef struct {
> > +    /* Offsets within the file being mapped */
> > +    uint64_t fd_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Offsets within the cache */
> > +    uint64_t c_offset[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Lengths of sections */
> > +    uint64_t len[VHOST_USER_FS_SLAVE_ENTRIES];
> > +    /* Flags, from VHOST_USER_FS_FLAG_* */
> > +    uint64_t flags[VHOST_USER_FS_SLAVE_ENTRIES];
> > +} VhostUserFSSlaveMsg;
> > +
> 
> Is it too late to change this?  This struct allocates space for up to
> 8 entries but most of the time the server will only try to set up one
> mapping at a time so only 32 out of the 256 bytes in the message are
> actually being used.  We're just wasting time memcpy'ing bytes that
> will never be used.  Is there some reason this can't be dynamically
> sized?  Something like:
> 
> typedef struct {
>     /* Number of mapping requests */
>     uint16_t num_requests;
>     /* `num_requests` mapping requests */
>    MappingRequest requests[];
> } VhostUserFSSlaveMsg;
> 
> typedef struct {
>     /* Offset within the file being mapped */
>     uint64_t fd_offset;
>     /* Offset within the cache */
>     uint64_t c_offset;
>     /* Length of section */
>     uint64_t len;
>     /* Flags, from VHOST_USER_FS_FLAG_* */
>     uint64_t flags;
> } MappingRequest;
> 
> The current pre-allocated structure both wastes space when there are
> fewer than 8 requests and requires extra messages to be sent when
> there are more than 8 requests.  I realize that in the grand scheme of
> things copying 224 extra bytes is basically not noticeable but it just
> irks me that we could fix this really easily before it gets propagated
> to too many other places.

Sounds like a reasonable idea. We probably will have to dynamically
allocate memory for removemapping, hopefully that does not have a
performance impact.

Vivek