[v15] block: Image locking series

[Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Fam Zheng 8 years, 9 months ago

This extends the permission bits of op blocker API to external using
Linux OFD locks.

Each permission in @perm and @shared_perm is represented by a locked
byte in the image file.  Requesting a permission in @perm is translated
to a shared lock of the corresponding byte; rejecting to share the same
permission is translated to a shared lock of a separate byte. With that,
we use 2x number of bytes of distinct permission types.

virtlockd in libvirt locks the first byte, so we do locking from a
higher offset.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/file-posix.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 264 insertions(+), 3 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 2114720..b92fdc3 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -129,12 +129,28 @@ do { \
 
 #define MAX_BLOCKSIZE	4096
 
+/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
+ * leaving a few more bytes for its future use. */
+#define RAW_LOCK_PERM_BASE             100
+#define RAW_LOCK_SHARED_BASE           200
+#ifdef F_OFD_SETLK
+#define RAW_LOCK_SUPPORTED 1
+#else
+#define RAW_LOCK_SUPPORTED 0
+#endif
+
 typedef struct BDRVRawState {
     int fd;
+    int lock_fd;
+    bool use_lock;
     int type;
     int open_flags;
     size_t buf_align;
 
+    /* The current permissions. */
+    uint64_t perm;
+    uint64_t shared_perm;
+
 #ifdef CONFIG_XFS
     bool is_xfs:1;
 #endif
@@ -440,6 +456,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
 
+    s->use_lock = qemu_opt_get_bool(opts, "locking", true);
+
     s->open_flags = open_flags;
     raw_parse_flags(bdrv_flags, &s->open_flags);
 
@@ -455,6 +473,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
     }
     s->fd = fd;
 
+    s->lock_fd = -1;
+    fd = qemu_open(filename, O_RDONLY);
+    if (fd < 0) {
+        if (RAW_LOCK_SUPPORTED) {
+            ret = -errno;
+            error_setg_errno(errp, errno, "Could not open '%s' for locking",
+                             filename);
+            qemu_close(s->fd);
+            goto fail;
+        }
+    }
+    s->lock_fd = fd;
+    s->perm = 0;
+    s->shared_perm = BLK_PERM_ALL;
+
 #ifdef CONFIG_LINUX_AIO
      /* Currently Linux does AIO only for files opened with O_DIRECT */
     if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
@@ -542,6 +575,156 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
     return raw_open_common(bs, options, flags, 0, errp);
 }
 
+typedef enum {
+    RAW_PL_PREPARE,
+    RAW_PL_COMMIT,
+    RAW_PL_ABORT,
+} RawPermLockOp;
+
+/* Lock wanted bytes by @perm and ~@shared_perm in the file; if @unlock ==
+ * true, also unlock the unneeded bytes. */
+static int raw_apply_lock_bytes(BDRVRawState *s,
+                                uint64_t perm_lock_bits,
+                                uint64_t shared_perm_lock_bits,
+                                bool unlock, Error **errp)
+{
+    int ret;
+    int i;
+
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        if (perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        if (shared_perm_lock_bits & (1ULL << i)) {
+            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
+            if (ret) {
+                error_setg(errp, "Failed to lock byte %d", off);
+                return ret;
+            }
+        } else if (unlock) {
+            ret = qemu_unlock_fd(s->lock_fd, off, 1);
+            if (ret) {
+                error_setg(errp, "Failed to unlock byte %d", off);
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
+static int raw_check_lock_bytes(BDRVRawState *s,
+                                uint64_t perm, uint64_t shared_perm,
+                                Error **errp)
+{
+    int ret;
+    int i;
+
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_SHARED_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (perm & p) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                error_setg(errp,
+                           "Failed to check byte %d for \"%s\" permission",
+                           off, bdrv_perm_names(p));
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    for (i = 0; i < BLK_PERM_MAX; ++i) {
+        int off = RAW_LOCK_PERM_BASE + i;
+        uint64_t p = 1ULL << i;
+        if (!(shared_perm & p)) {
+            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
+            if (ret) {
+                error_setg(errp,
+                           "Failed to check byte %d for shared \"%s\" permission",
+                           off, bdrv_perm_names(p));
+                error_append_hint(errp,
+                                  "Is another process using the image?\n");
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+static int raw_handle_perm_lock(BlockDriverState *bs,
+                                RawPermLockOp op,
+                                uint64_t new_perm, uint64_t new_shared,
+                                Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret = 0;
+    Error *local_err = NULL;
+
+    if (!RAW_LOCK_SUPPORTED) {
+        return 0;
+    }
+
+    if (!s->use_lock) {
+        return 0;
+    }
+
+    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
+        return 0;
+    }
+
+    assert(s->lock_fd > 0);
+
+    switch (op) {
+    case RAW_PL_PREPARE:
+        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
+                                   ~s->shared_perm | ~new_shared,
+                                   false, errp);
+        if (!ret) {
+            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
+            if (!ret) {
+                return 0;
+            }
+        }
+        op = RAW_PL_ABORT;
+        /* fall through to unlock bytes. */
+    case RAW_PL_ABORT:
+        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    case RAW_PL_COMMIT:
+        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
+        if (local_err) {
+            /* Theoretically the above call only unlocks bytes and it cannot
+             * fail. Something weird happened, report it.
+             */
+            error_report_err(local_err);
+        }
+        break;
+    }
+    return ret;
+}
+
 static int raw_reopen_prepare(BDRVReopenState *state,
                               BlockReopenQueue *queue, Error **errp)
 {
@@ -549,6 +732,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     BDRVRawReopenState *rs;
     int ret = 0;
     Error *local_err = NULL;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
     assert(state != NULL);
     assert(state->bs != NULL);
@@ -613,13 +798,22 @@ static int raw_reopen_prepare(BDRVReopenState *state,
     if (rs->fd != -1) {
         raw_probe_alignment(state->bs, rs->fd, &local_err);
         if (local_err) {
-            qemu_close(rs->fd);
-            rs->fd = -1;
             error_propagate(errp, local_err);
             ret = -EINVAL;
+            goto fail;
         }
     }
 
+    ret = raw_handle_perm_lock(state->bs, RAW_PL_PREPARE,
+                               s->perm & ~clear_perms,
+                               s->shared_perm, errp);
+    if (ret) {
+        goto fail;
+    }
+    return 0;
+fail:
+    qemu_close(rs->fd);
+    rs->fd = -1;
     return ret;
 }
 
@@ -627,6 +821,8 @@ static void raw_reopen_commit(BDRVReopenState *state)
 {
     BDRVRawReopenState *rs = state->opaque;
     BDRVRawState *s = state->bs->opaque;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
     s->open_flags = rs->open_flags;
 
@@ -635,12 +831,17 @@ static void raw_reopen_commit(BDRVReopenState *state)
 
     g_free(state->opaque);
     state->opaque = NULL;
+    raw_handle_perm_lock(state->bs, RAW_PL_COMMIT, s->perm & ~clear_perms,
+                         s->shared_perm, NULL);
 }
 
 
 static void raw_reopen_abort(BDRVReopenState *state)
 {
+    BDRVRawState *s = state->bs->opaque;
     BDRVRawReopenState *rs = state->opaque;
+    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
+        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
 
      /* nothing to do if NULL, we didn't get far enough */
     if (rs == NULL) {
@@ -653,6 +854,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
     }
     g_free(state->opaque);
     state->opaque = NULL;
+    raw_handle_perm_lock(state->bs, RAW_PL_ABORT, s->perm & ~clear_perms,
+                         s->shared_perm, NULL);
 }
 
 static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
@@ -1410,6 +1613,10 @@ static void raw_close(BlockDriverState *bs)
         qemu_close(s->fd);
         s->fd = -1;
     }
+    if (s->lock_fd >= 0) {
+        qemu_close(s->lock_fd);
+        s->lock_fd = -1;
+    }
 }
 
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -1947,6 +2154,56 @@ static QemuOptsList raw_create_opts = {
     }
 };
 
+static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
+                          Error **errp)
+{
+    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
+}
+
+static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
+{
+    BDRVRawState *s = bs->opaque;
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    s->perm = perm;
+    s->shared_perm = shared;
+}
+
+static void raw_abort_perm_update(BlockDriverState *bs)
+{
+    BDRVRawState *s = bs->opaque;
+
+    raw_handle_perm_lock(bs, RAW_PL_ABORT, s->perm, s->shared_perm, NULL);
+}
+
+static int raw_inactivate(BlockDriverState *bs)
+{
+    int ret;
+    uint64_t perm = 0;
+    uint64_t shared = BLK_PERM_ALL;
+
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL);
+    if (ret) {
+        return ret;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
+    return 0;
+}
+
+
+static void raw_invalidate_cache(BlockDriverState *bs, Error **errp)
+{
+    BDRVRawState *s = bs->opaque;
+    int ret;
+
+    assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE));
+    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm,
+                               errp);
+    if (ret) {
+        return;
+    }
+    raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL);
+}
+
 BlockDriver bdrv_file = {
     .format_name = "file",
     .protocol_name = "file",
@@ -1977,7 +2234,11 @@ BlockDriver bdrv_file = {
     .bdrv_get_info = raw_get_info,
     .bdrv_get_allocated_file_size
                         = raw_get_allocated_file_size,
-
+    .bdrv_inactivate = raw_inactivate,
+    .bdrv_invalidate_cache = raw_invalidate_cache,
+    .bdrv_check_perm = raw_check_perm,
+    .bdrv_set_perm   = raw_set_perm,
+    .bdrv_abort_perm_update = raw_abort_perm_update,
     .create_opts = &raw_create_opts,
 };
 
-- 
2.9.3

Re: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Kevin Wolf 8 years, 9 months ago

Am 26.04.2017 um 05:34 hat Fam Zheng geschrieben:
> This extends the permission bits of op blocker API to external using
> Linux OFD locks.
> 
> Each permission in @perm and @shared_perm is represented by a locked
> byte in the image file.  Requesting a permission in @perm is translated
> to a shared lock of the corresponding byte; rejecting to share the same
> permission is translated to a shared lock of a separate byte. With that,
> we use 2x number of bytes of distinct permission types.
> 
> virtlockd in libvirt locks the first byte, so we do locking from a
> higher offset.
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block/file-posix.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 264 insertions(+), 3 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 2114720..b92fdc3 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -129,12 +129,28 @@ do { \
>  
>  #define MAX_BLOCKSIZE	4096
>  
> +/* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
> + * leaving a few more bytes for its future use. */
> +#define RAW_LOCK_PERM_BASE             100
> +#define RAW_LOCK_SHARED_BASE           200
> +#ifdef F_OFD_SETLK
> +#define RAW_LOCK_SUPPORTED 1
> +#else
> +#define RAW_LOCK_SUPPORTED 0
> +#endif
> +
>  typedef struct BDRVRawState {
>      int fd;
> +    int lock_fd;
> +    bool use_lock;
>      int type;
>      int open_flags;
>      size_t buf_align;
>  
> +    /* The current permissions. */
> +    uint64_t perm;
> +    uint64_t shared_perm;
> +
>  #ifdef CONFIG_XFS
>      bool is_xfs:1;
>  #endif
> @@ -440,6 +456,8 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>      }
>      s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
>  
> +    s->use_lock = qemu_opt_get_bool(opts, "locking", true);
> +
>      s->open_flags = open_flags;
>      raw_parse_flags(bdrv_flags, &s->open_flags);
>  
> @@ -455,6 +473,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
>      }
>      s->fd = fd;
>  
> +    s->lock_fd = -1;
> +    fd = qemu_open(filename, O_RDONLY);

Note that with /dev/fdset there can be cases where we can open a file
O_RDWR, but not O_RDONLY. Should we better just use the same flags as
for the s->fd?

> +    if (fd < 0) {
> +        if (RAW_LOCK_SUPPORTED) {
> +            ret = -errno;
> +            error_setg_errno(errp, errno, "Could not open '%s' for locking",
> +                             filename);
> +            qemu_close(s->fd);
> +            goto fail;
> +        }
> +    }

You still open the fd and possibly error out even with s->use_lock ==
false. Should the code starting from qemu_open() to here be conditional
on s->use_lock?

> +    s->lock_fd = fd;
> +    s->perm = 0;
> +    s->shared_perm = BLK_PERM_ALL;
> +
>  #ifdef CONFIG_LINUX_AIO
>       /* Currently Linux does AIO only for files opened with O_DIRECT */
>      if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
> @@ -542,6 +575,156 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
>      return raw_open_common(bs, options, flags, 0, errp);
>  }
>  
> +typedef enum {
> +    RAW_PL_PREPARE,
> +    RAW_PL_COMMIT,
> +    RAW_PL_ABORT,
> +} RawPermLockOp;
> +
> +/* Lock wanted bytes by @perm and ~@shared_perm in the file; if @unlock ==

This function doesn't take @perm or @shared_perm. This comment is
especially confusing because shared_perm_lock_bits == ~shared_perm. We
should be explicit here that shared_perm_lock_bits is the mask of all
permissions that _cannot_ be shared.

> + * true, also unlock the unneeded bytes. */
> +static int raw_apply_lock_bytes(BDRVRawState *s,
> +                                uint64_t perm_lock_bits,
> +                                uint64_t shared_perm_lock_bits,
> +                                bool unlock, Error **errp)
> +{
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> +        int off = RAW_LOCK_PERM_BASE + i;
> +        if (perm_lock_bits & (1ULL << i)) {
> +            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
> +            if (ret) {
> +                error_setg(errp, "Failed to lock byte %d", off);

Should bdrv_perm_names() be used in this function, too? Maybe not that
important because we never expect this to fail (we don't do any
exclusive locks).

> +                return ret;
> +            }
> +        } else if (unlock) {
> +            ret = qemu_unlock_fd(s->lock_fd, off, 1);
> +            if (ret) {
> +                error_setg(errp, "Failed to unlock byte %d", off);
> +                return ret;
> +            }
> +        }
> +    }
> +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> +        int off = RAW_LOCK_SHARED_BASE + i;
> +        if (shared_perm_lock_bits & (1ULL << i)) {
> +            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
> +            if (ret) {
> +                error_setg(errp, "Failed to lock byte %d", off);
> +                return ret;
> +            }
> +        } else if (unlock) {
> +            ret = qemu_unlock_fd(s->lock_fd, off, 1);
> +            if (ret) {
> +                error_setg(errp, "Failed to unlock byte %d", off);
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}

Note: If this function returns an error, we may have acquired some of
the locks. The caller probably wants to call it again to reset the
permissions to the old mask.

> +/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
> +static int raw_check_lock_bytes(BDRVRawState *s,
> +                                uint64_t perm, uint64_t shared_perm,
> +                                Error **errp)
> +{
> +    int ret;
> +    int i;
> +
> +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> +        int off = RAW_LOCK_SHARED_BASE + i;
> +        uint64_t p = 1ULL << i;
> +        if (perm & p) {
> +            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
> +            if (ret) {
> +                error_setg(errp,
> +                           "Failed to check byte %d for \"%s\" permission",
> +                           off, bdrv_perm_names(p));

bdrv_perm_names() returns a malloced string, which is leaked here.

> +                error_append_hint(errp,
> +                                  "Is another process using the image?\n");

We probably need to tweak the error messages a bit. When I saw this one
this morning, I felt that if I didn't know how you were going to
implement the locking, it would make zero sense to me:

$ ./qemu-img snapshot -l /tmp/test.qcow2
qemu-img: Could not open '/tmp/test.qcow2': Failed to check byte 101 for shared "write" permission
Is another process using the image?

Something shorter and less technical like 'Failed to get "write" lock'
is probably the friendlier message.

> +                return ret;
> +            }
> +        }
> +    }
> +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> +        int off = RAW_LOCK_PERM_BASE + i;
> +        uint64_t p = 1ULL << i;
> +        if (!(shared_perm & p)) {
> +            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
> +            if (ret) {
> +                error_setg(errp,
> +                           "Failed to check byte %d for shared \"%s\" permission",
> +                           off, bdrv_perm_names(p));
> +                error_append_hint(errp,
> +                                  "Is another process using the image?\n");
> +                return ret;
> +            }
> +        }
> +    }
> +    return 0;
> +}
> +
> +static int raw_handle_perm_lock(BlockDriverState *bs,
> +                                RawPermLockOp op,
> +                                uint64_t new_perm, uint64_t new_shared,
> +                                Error **errp)
> +{
> +    BDRVRawState *s = bs->opaque;
> +    int ret = 0;
> +    Error *local_err = NULL;
> +
> +    if (!RAW_LOCK_SUPPORTED) {
> +        return 0;
> +    }
> +
> +    if (!s->use_lock) {
> +        return 0;
> +    }
> +
> +    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
> +        return 0;
> +    }

I don't like the handling of BDRV_O_INACTIVE here in the file-posix
driver. In theory, the users of the node already shouldn't be requesting
any problematic permissions while the image is inactive.

What are the actual problems that we're solving with this and the
.bdrv_invalidate_cache/.bdrv_inactivate callbacks?

> +    assert(s->lock_fd > 0);
> +
> +    switch (op) {
> +    case RAW_PL_PREPARE:
> +        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
> +                                   ~s->shared_perm | ~new_shared,
> +                                   false, errp);
> +        if (!ret) {
> +            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
> +            if (!ret) {
> +                return 0;
> +            }
> +        }
> +        op = RAW_PL_ABORT;

op isn't used after this place, I wouldn't be surprised if some compiler
or static analyser complained about it.

> +        /* fall through to unlock bytes. */

Good, this undoes whatever raw_apply_lock_bytes() already has done.

> +    case RAW_PL_ABORT:
> +        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
> +        if (local_err) {
> +            /* Theoretically the above call only unlocks bytes and it cannot
> +             * fail. Something weird happened, report it.
> +             */
> +            error_report_err(local_err);
> +        }
> +        break;
> +    case RAW_PL_COMMIT:
> +        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
> +        if (local_err) {
> +            /* Theoretically the above call only unlocks bytes and it cannot
> +             * fail. Something weird happened, report it.
> +             */
> +            error_report_err(local_err);
> +        }
> +        break;
> +    }
> +    return ret;
> +}
> +
>  static int raw_reopen_prepare(BDRVReopenState *state,
>                                BlockReopenQueue *queue, Error **errp)
>  {
> @@ -549,6 +732,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
>      BDRVRawReopenState *rs;
>      int ret = 0;
>      Error *local_err = NULL;
> +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
>  
>      assert(state != NULL);
>      assert(state->bs != NULL);
> @@ -613,13 +798,22 @@ static int raw_reopen_prepare(BDRVReopenState *state,
>      if (rs->fd != -1) {
>          raw_probe_alignment(state->bs, rs->fd, &local_err);
>          if (local_err) {
> -            qemu_close(rs->fd);
> -            rs->fd = -1;
>              error_propagate(errp, local_err);
>              ret = -EINVAL;
> +            goto fail;
>          }
>      }
>  
> +    ret = raw_handle_perm_lock(state->bs, RAW_PL_PREPARE,
> +                               s->perm & ~clear_perms,
> +                               s->shared_perm, errp);

Is this a workaround for bdrv_can_set_read_only() not checking whether
there are still writers attached? Because I think in theory, we should
be able to assert() that clear_perms are already clear.

> +    if (ret) {
> +        goto fail;
> +    }
> +    return 0;
> +fail:
> +    qemu_close(rs->fd);
> +    rs->fd = -1;
>      return ret;
>  }
>  
> @@ -627,6 +821,8 @@ static void raw_reopen_commit(BDRVReopenState *state)
>  {
>      BDRVRawReopenState *rs = state->opaque;
>      BDRVRawState *s = state->bs->opaque;
> +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
>  
>      s->open_flags = rs->open_flags;
>  
> @@ -635,12 +831,17 @@ static void raw_reopen_commit(BDRVReopenState *state)
>  
>      g_free(state->opaque);
>      state->opaque = NULL;
> +    raw_handle_perm_lock(state->bs, RAW_PL_COMMIT, s->perm & ~clear_perms,
> +                         s->shared_perm, NULL);

Shouldn't we update s->perm here? Or probably move the update into
raw_handle_perm_lock(). Or even more probably, as said above, replace
the permission handling in raw_reopen_* by checking permissions in the
generic block layer beforehand.

>  }
>  
>  
>  static void raw_reopen_abort(BDRVReopenState *state)
>  {
> +    BDRVRawState *s = state->bs->opaque;
>      BDRVRawReopenState *rs = state->opaque;
> +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
>  
>       /* nothing to do if NULL, we didn't get far enough */
>      if (rs == NULL) {
> @@ -653,6 +854,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
>      }
>      g_free(state->opaque);
>      state->opaque = NULL;
> +    raw_handle_perm_lock(state->bs, RAW_PL_ABORT, s->perm & ~clear_perms,
> +                         s->shared_perm, NULL);
>  }
>  
>  static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
> @@ -1410,6 +1613,10 @@ static void raw_close(BlockDriverState *bs)
>          qemu_close(s->fd);
>          s->fd = -1;
>      }
> +    if (s->lock_fd >= 0) {
> +        qemu_close(s->lock_fd);
> +        s->lock_fd = -1;
> +    }
>  }
>  
>  static int raw_truncate(BlockDriverState *bs, int64_t offset)
> @@ -1947,6 +2154,56 @@ static QemuOptsList raw_create_opts = {
>      }
>  };
>  
> +static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
> +                          Error **errp)
> +{
> +    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
> +}
> +
> +static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
> +{
> +    BDRVRawState *s = bs->opaque;
> +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
> +    s->perm = perm;
> +    s->shared_perm = shared;
> +}
> +
> +static void raw_abort_perm_update(BlockDriverState *bs)
> +{
> +    BDRVRawState *s = bs->opaque;
> +
> +    raw_handle_perm_lock(bs, RAW_PL_ABORT, s->perm, s->shared_perm, NULL);

The parameters are supposed to be the new permissions, which are
obviously ignored in the case of RAW_PL_ABORT. Would passing 0 for
both be more obvious?

> +}
> +
> +static int raw_inactivate(BlockDriverState *bs)
> +{
> +    int ret;
> +    uint64_t perm = 0;
> +    uint64_t shared = BLK_PERM_ALL;
> +
> +    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL);
> +    if (ret) {
> +        return ret;
> +    }
> +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
> +    return 0;
> +}
> +
> +
> +static void raw_invalidate_cache(BlockDriverState *bs, Error **errp)
> +{
> +    BDRVRawState *s = bs->opaque;
> +    int ret;
> +
> +    assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE));
> +    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm,
> +                               errp);
> +    if (ret) {
> +        return;
> +    }
> +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL);
> +}

Looks okay if we really need BDRV_O_INACTIVE handling in file-posix.

Kevin

Re: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Fam Zheng 8 years, 9 months ago

On Wed, 04/26 16:22, Kevin Wolf wrote:
> > @@ -455,6 +473,21 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
> >      }
> >      s->fd = fd;
> >  
> > +    s->lock_fd = -1;
> > +    fd = qemu_open(filename, O_RDONLY);
> 
> Note that with /dev/fdset there can be cases where we can open a file
> O_RDWR, but not O_RDONLY. Should we better just use the same flags as
> for the s->fd?

Makes sense.

> 
> > +    if (fd < 0) {
> > +        if (RAW_LOCK_SUPPORTED) {
> > +            ret = -errno;
> > +            error_setg_errno(errp, errno, "Could not open '%s' for locking",
> > +                             filename);
> > +            qemu_close(s->fd);
> > +            goto fail;
> > +        }
> > +    }
> 
> You still open the fd and possibly error out even with s->use_lock ==
> false. Should the code starting from qemu_open() to here be conditional
> on s->use_lock?

Yes.

> 
> > +    s->lock_fd = fd;
> > +    s->perm = 0;
> > +    s->shared_perm = BLK_PERM_ALL;
> > +
> >  #ifdef CONFIG_LINUX_AIO
> >       /* Currently Linux does AIO only for files opened with O_DIRECT */
> >      if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
> > @@ -542,6 +575,156 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
> >      return raw_open_common(bs, options, flags, 0, errp);
> >  }
> >  
> > +typedef enum {
> > +    RAW_PL_PREPARE,
> > +    RAW_PL_COMMIT,
> > +    RAW_PL_ABORT,
> > +} RawPermLockOp;
> > +
> > +/* Lock wanted bytes by @perm and ~@shared_perm in the file; if @unlock ==
> 
> This function doesn't take @perm or @shared_perm. This comment is
> especially confusing because shared_perm_lock_bits == ~shared_perm. We
> should be explicit here that shared_perm_lock_bits is the mask of all
> permissions that _cannot_ be shared.

Will update the comment.

> 
> > + * true, also unlock the unneeded bytes. */
> > +static int raw_apply_lock_bytes(BDRVRawState *s,
> > +                                uint64_t perm_lock_bits,
> > +                                uint64_t shared_perm_lock_bits,
> > +                                bool unlock, Error **errp)
> > +{
> > +    int ret;
> > +    int i;
> > +
> > +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> > +        int off = RAW_LOCK_PERM_BASE + i;
> > +        if (perm_lock_bits & (1ULL << i)) {
> > +            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
> > +            if (ret) {
> > +                error_setg(errp, "Failed to lock byte %d", off);
> 
> Should bdrv_perm_names() be used in this function, too? Maybe not that
> important because we never expect this to fail (we don't do any
> exclusive locks).

Ineed, so going a bit of low level is probably better when it does fail. :)

> 
> > +                return ret;
> > +            }
> > +        } else if (unlock) {
> > +            ret = qemu_unlock_fd(s->lock_fd, off, 1);
> > +            if (ret) {
> > +                error_setg(errp, "Failed to unlock byte %d", off);
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> > +        int off = RAW_LOCK_SHARED_BASE + i;
> > +        if (shared_perm_lock_bits & (1ULL << i)) {
> > +            ret = qemu_lock_fd(s->lock_fd, off, 1, false);
> > +            if (ret) {
> > +                error_setg(errp, "Failed to lock byte %d", off);
> > +                return ret;
> > +            }
> > +        } else if (unlock) {
> > +            ret = qemu_unlock_fd(s->lock_fd, off, 1);
> > +            if (ret) {
> > +                error_setg(errp, "Failed to unlock byte %d", off);
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    return 0;
> > +}
> 
> Note: If this function returns an error, we may have acquired some of
> the locks. The caller probably wants to call it again to reset the
> permissions to the old mask.

I think callers do.

> 
> > +/* Check "unshared" bytes implied by @perm and ~@shared_perm in the file. */
> > +static int raw_check_lock_bytes(BDRVRawState *s,
> > +                                uint64_t perm, uint64_t shared_perm,
> > +                                Error **errp)
> > +{
> > +    int ret;
> > +    int i;
> > +
> > +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> > +        int off = RAW_LOCK_SHARED_BASE + i;
> > +        uint64_t p = 1ULL << i;
> > +        if (perm & p) {
> > +            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
> > +            if (ret) {
> > +                error_setg(errp,
> > +                           "Failed to check byte %d for \"%s\" permission",
> > +                           off, bdrv_perm_names(p));
> 
> bdrv_perm_names() returns a malloced string, which is leaked here.

Neglected that, will fix.

> 
> > +                error_append_hint(errp,
> > +                                  "Is another process using the image?\n");
> 
> We probably need to tweak the error messages a bit. When I saw this one
> this morning, I felt that if I didn't know how you were going to
> implement the locking, it would make zero sense to me:
> 
> $ ./qemu-img snapshot -l /tmp/test.qcow2
> qemu-img: Could not open '/tmp/test.qcow2': Failed to check byte 101 for shared "write" permission
> Is another process using the image?
> 
> Something shorter and less technical like 'Failed to get "write" lock'
> is probably the friendlier message.

OK, it's shorter and friendlier.

> 
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    for (i = 0; i < BLK_PERM_MAX; ++i) {
> > +        int off = RAW_LOCK_PERM_BASE + i;
> > +        uint64_t p = 1ULL << i;
> > +        if (!(shared_perm & p)) {
> > +            ret = qemu_lock_fd_test(s->lock_fd, off, 1, true);
> > +            if (ret) {
> > +                error_setg(errp,
> > +                           "Failed to check byte %d for shared \"%s\" permission",
> > +                           off, bdrv_perm_names(p));
> > +                error_append_hint(errp,
> > +                                  "Is another process using the image?\n");
> > +                return ret;
> > +            }
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int raw_handle_perm_lock(BlockDriverState *bs,
> > +                                RawPermLockOp op,
> > +                                uint64_t new_perm, uint64_t new_shared,
> > +                                Error **errp)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +    int ret = 0;
> > +    Error *local_err = NULL;
> > +
> > +    if (!RAW_LOCK_SUPPORTED) {
> > +        return 0;
> > +    }
> > +
> > +    if (!s->use_lock) {
> > +        return 0;
> > +    }
> > +
> > +    if (bdrv_get_flags(bs) & BDRV_O_INACTIVE) {
> > +        return 0;
> > +    }
> 
> I don't like the handling of BDRV_O_INACTIVE here in the file-posix
> driver. In theory, the users of the node already shouldn't be requesting
> any problematic permissions while the image is inactive.
> 
> What are the actual problems that we're solving with this and the
> .bdrv_invalidate_cache/.bdrv_inactivate callbacks?

To handle locks in "-incoming" case. Removing this check will result in an
early locking error:

    (gdb) bt
    #0  0x0000555555ba40e7 in raw_check_lock_bytes 
    #1  0x0000555555ba4351 in raw_handle_perm_lock 
        at /stor/work/qemu/block/file-posix.c:729
    #2  0x0000555555ba6984 in raw_check_perm 
    #3  0x0000555555b4b3b2 in bdrv_check_perm 
        at /stor/work/qemu/block.c:1480
    #4  0x0000555555b4baae in bdrv_check_update_perm 
        at /stor/work/qemu/block.c:1665
    #5  0x0000555555b4c0da in bdrv_root_attach_child 
    #6  0x0000555555b4c299 in bdrv_attach_child 
    #7  0x0000555555b4cbc1 in bdrv_open_child 
    #8  0x0000555555b74703 in qcow2_open 
    #9  0x0000555555b4a362 in bdrv_open_driver 
    #10 0x0000555555b4acc2 in bdrv_open_common 
    #11 0x0000555555b4d592 in bdrv_open_inherit 
    #12 0x0000555555b4d9a9 in bdrv_open 
        at /stor/work/qemu/block.c:2538
    #13 0x0000555555b9c185 in blk_new_open 
        at /stor/work/qemu/block/block-backend.c:212
    #14 0x00005555558dcc0c in blockdev_init 
    #15 0x00005555558ddcee in drive_new 
    #16 0x00005555558ed6fd in drive_init_func 
    #17 0x0000555555c58fa0 in qemu_opts_foreach 
        at /stor/work/qemu/util/qemu-option.c:1114
    #18 0x00005555558f620f in main 

> 
> > +    assert(s->lock_fd > 0);
> > +
> > +    switch (op) {
> > +    case RAW_PL_PREPARE:
> > +        ret = raw_apply_lock_bytes(s, s->perm | new_perm,
> > +                                   ~s->shared_perm | ~new_shared,
> > +                                   false, errp);
> > +        if (!ret) {
> > +            ret = raw_check_lock_bytes(s, new_perm, new_shared, errp);
> > +            if (!ret) {
> > +                return 0;
> > +            }
> > +        }
> > +        op = RAW_PL_ABORT;
> 
> op isn't used after this place, I wouldn't be surprised if some compiler
> or static analyser complained about it.

I just don't want to surprise the "case RAW_PL_ABORT:" code. :)

> 
> > +        /* fall through to unlock bytes. */
> 
> Good, this undoes whatever raw_apply_lock_bytes() already has done.
> 
> > +    case RAW_PL_ABORT:
> > +        raw_apply_lock_bytes(s, s->perm, ~s->shared_perm, true, &local_err);
> > +        if (local_err) {
> > +            /* Theoretically the above call only unlocks bytes and it cannot
> > +             * fail. Something weird happened, report it.
> > +             */
> > +            error_report_err(local_err);
> > +        }
> > +        break;
> > +    case RAW_PL_COMMIT:
> > +        raw_apply_lock_bytes(s, new_perm, ~new_shared, true, &local_err);
> > +        if (local_err) {
> > +            /* Theoretically the above call only unlocks bytes and it cannot
> > +             * fail. Something weird happened, report it.
> > +             */
> > +            error_report_err(local_err);
> > +        }
> > +        break;
> > +    }
> > +    return ret;
> > +}
> > +
> >  static int raw_reopen_prepare(BDRVReopenState *state,
> >                                BlockReopenQueue *queue, Error **errp)
> >  {
> > @@ -549,6 +732,8 @@ static int raw_reopen_prepare(BDRVReopenState *state,
> >      BDRVRawReopenState *rs;
> >      int ret = 0;
> >      Error *local_err = NULL;
> > +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> > +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
> >  
> >      assert(state != NULL);
> >      assert(state->bs != NULL);
> > @@ -613,13 +798,22 @@ static int raw_reopen_prepare(BDRVReopenState *state,
> >      if (rs->fd != -1) {
> >          raw_probe_alignment(state->bs, rs->fd, &local_err);
> >          if (local_err) {
> > -            qemu_close(rs->fd);
> > -            rs->fd = -1;
> >              error_propagate(errp, local_err);
> >              ret = -EINVAL;
> > +            goto fail;
> >          }
> >      }
> >  
> > +    ret = raw_handle_perm_lock(state->bs, RAW_PL_PREPARE,
> > +                               s->perm & ~clear_perms,
> > +                               s->shared_perm, errp);
> 
> Is this a workaround for bdrv_can_set_read_only() not checking whether
> there are still writers attached? Because I think in theory, we should
> be able to assert() that clear_perms are already clear.

You are right, it seems these reopen hunks are superfluous. I will drop them.

> 
> > +    if (ret) {
> > +        goto fail;
> > +    }
> > +    return 0;
> > +fail:
> > +    qemu_close(rs->fd);
> > +    rs->fd = -1;
> >      return ret;
> >  }
> >  
> > @@ -627,6 +821,8 @@ static void raw_reopen_commit(BDRVReopenState *state)
> >  {
> >      BDRVRawReopenState *rs = state->opaque;
> >      BDRVRawState *s = state->bs->opaque;
> > +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> > +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
> >  
> >      s->open_flags = rs->open_flags;
> >  
> > @@ -635,12 +831,17 @@ static void raw_reopen_commit(BDRVReopenState *state)
> >  
> >      g_free(state->opaque);
> >      state->opaque = NULL;
> > +    raw_handle_perm_lock(state->bs, RAW_PL_COMMIT, s->perm & ~clear_perms,
> > +                         s->shared_perm, NULL);
> 
> Shouldn't we update s->perm here? Or probably move the update into
> raw_handle_perm_lock(). Or even more probably, as said above, replace
> the permission handling in raw_reopen_* by checking permissions in the
> generic block layer beforehand.
> 
> >  }
> >  
> >  
> >  static void raw_reopen_abort(BDRVReopenState *state)
> >  {
> > +    BDRVRawState *s = state->bs->opaque;
> >      BDRVRawReopenState *rs = state->opaque;
> > +    uint64_t clear_perms = state->flags & BDRV_O_RDWR ? 0 :
> > +        BLK_PERM_WRITE | BLK_PERM_RESIZE | BLK_PERM_WRITE_UNCHANGED;
> >  
> >       /* nothing to do if NULL, we didn't get far enough */
> >      if (rs == NULL) {
> > @@ -653,6 +854,8 @@ static void raw_reopen_abort(BDRVReopenState *state)
> >      }
> >      g_free(state->opaque);
> >      state->opaque = NULL;
> > +    raw_handle_perm_lock(state->bs, RAW_PL_ABORT, s->perm & ~clear_perms,
> > +                         s->shared_perm, NULL);
> >  }
> >  
> >  static int hdev_get_max_transfer_length(BlockDriverState *bs, int fd)
> > @@ -1410,6 +1613,10 @@ static void raw_close(BlockDriverState *bs)
> >          qemu_close(s->fd);
> >          s->fd = -1;
> >      }
> > +    if (s->lock_fd >= 0) {
> > +        qemu_close(s->lock_fd);
> > +        s->lock_fd = -1;
> > +    }
> >  }
> >  
> >  static int raw_truncate(BlockDriverState *bs, int64_t offset)
> > @@ -1947,6 +2154,56 @@ static QemuOptsList raw_create_opts = {
> >      }
> >  };
> >  
> > +static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
> > +                          Error **errp)
> > +{
> > +    return raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, errp);
> > +}
> > +
> > +static void raw_set_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
> > +    s->perm = perm;
> > +    s->shared_perm = shared;
> > +}
> > +
> > +static void raw_abort_perm_update(BlockDriverState *bs)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +
> > +    raw_handle_perm_lock(bs, RAW_PL_ABORT, s->perm, s->shared_perm, NULL);
> 
> The parameters are supposed to be the new permissions, which are
> obviously ignored in the case of RAW_PL_ABORT. Would passing 0 for
> both be more obvious?

OK, I'll change that.

> 
> > +}
> > +
> > +static int raw_inactivate(BlockDriverState *bs)
> > +{
> > +    int ret;
> > +    uint64_t perm = 0;
> > +    uint64_t shared = BLK_PERM_ALL;
> > +
> > +    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, perm, shared, NULL);
> > +    if (ret) {
> > +        return ret;
> > +    }
> > +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, perm, shared, NULL);
> > +    return 0;
> > +}
> > +
> > +
> > +static void raw_invalidate_cache(BlockDriverState *bs, Error **errp)
> > +{
> > +    BDRVRawState *s = bs->opaque;
> > +    int ret;
> > +
> > +    assert(!(bdrv_get_flags(bs) & BDRV_O_INACTIVE));
> > +    ret = raw_handle_perm_lock(bs, RAW_PL_PREPARE, s->perm, s->shared_perm,
> > +                               errp);
> > +    if (ret) {
> > +        return;
> > +    }
> > +    raw_handle_perm_lock(bs, RAW_PL_COMMIT, s->perm, s->shared_perm, NULL);
> > +}
> 
> Looks okay if we really need BDRV_O_INACTIVE handling in file-posix.
> 
> Kevin

Fam

Re: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Kevin Wolf 8 years, 9 months ago

Am 26.04.2017 um 05:34 hat Fam Zheng geschrieben:
> This extends the permission bits of op blocker API to external using
> Linux OFD locks.
> 
> Each permission in @perm and @shared_perm is represented by a locked
> byte in the image file.  Requesting a permission in @perm is translated
> to a shared lock of the corresponding byte; rejecting to share the same
> permission is translated to a shared lock of a separate byte. With that,
> we use 2x number of bytes of distinct permission types.
> 
> virtlockd in libvirt locks the first byte, so we do locking from a
> higher offset.
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Signed-off-by: Fam Zheng <famz@redhat.com>

>  BlockDriver bdrv_file = {
>      .format_name = "file",
>      .protocol_name = "file",
> @@ -1977,7 +2234,11 @@ BlockDriver bdrv_file = {
>      .bdrv_get_info = raw_get_info,
>      .bdrv_get_allocated_file_size
>                          = raw_get_allocated_file_size,
> -
> +    .bdrv_inactivate = raw_inactivate,
> +    .bdrv_invalidate_cache = raw_invalidate_cache,
> +    .bdrv_check_perm = raw_check_perm,
> +    .bdrv_set_perm   = raw_set_perm,
> +    .bdrv_abort_perm_update = raw_abort_perm_update,
>      .create_opts = &raw_create_opts,
>  };

By the way, is it intentional that we apply locking only to bdrv_file,
but not to bdrv_host_device or bdrv_host_cdrom?

Kevin

Re: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Fam Zheng 8 years, 9 months ago

On Fri, 04/28 15:45, Kevin Wolf wrote:
> Am 26.04.2017 um 05:34 hat Fam Zheng geschrieben:
> > This extends the permission bits of op blocker API to external using
> > Linux OFD locks.
> > 
> > Each permission in @perm and @shared_perm is represented by a locked
> > byte in the image file.  Requesting a permission in @perm is translated
> > to a shared lock of the corresponding byte; rejecting to share the same
> > permission is translated to a shared lock of a separate byte. With that,
> > we use 2x number of bytes of distinct permission types.
> > 
> > virtlockd in libvirt locks the first byte, so we do locking from a
> > higher offset.
> > 
> > Suggested-by: Kevin Wolf <kwolf@redhat.com>
> > Signed-off-by: Fam Zheng <famz@redhat.com>
> 
> >  BlockDriver bdrv_file = {
> >      .format_name = "file",
> >      .protocol_name = "file",
> > @@ -1977,7 +2234,11 @@ BlockDriver bdrv_file = {
> >      .bdrv_get_info = raw_get_info,
> >      .bdrv_get_allocated_file_size
> >                          = raw_get_allocated_file_size,
> > -
> > +    .bdrv_inactivate = raw_inactivate,
> > +    .bdrv_invalidate_cache = raw_invalidate_cache,
> > +    .bdrv_check_perm = raw_check_perm,
> > +    .bdrv_set_perm   = raw_set_perm,
> > +    .bdrv_abort_perm_update = raw_abort_perm_update,
> >      .create_opts = &raw_create_opts,
> >  };
> 
> By the way, is it intentional that we apply locking only to bdrv_file,
> but not to bdrv_host_device or bdrv_host_cdrom?

Good question.

Though CDROM is not very interesting, I am not sure about bdrv_host_device:

1) On the one hand, a host device can contain a qcow2 image so maybe locking is
useful.  Another reason to lock is that they share the same QAPI option,
BlockdevOptionsFile. That last reason is, it should be very easy to add it.

2) On the other hand, unlike files, host devices get pretty high chances in
being accesses by multiple readers/writers, outside of QEMU, such as partition
detection, mount, fsck, etc.

What is your opinion?

Fam

Re: [Qemu-devel] [PATCH v15 20/21] file-posix: Add image locking to perm operations

Posted by Kevin Wolf 8 years, 9 months ago

Am 28.04.2017 um 17:30 hat Fam Zheng geschrieben:
> On Fri, 04/28 15:45, Kevin Wolf wrote:
> > Am 26.04.2017 um 05:34 hat Fam Zheng geschrieben:
> > > This extends the permission bits of op blocker API to external using
> > > Linux OFD locks.
> > > 
> > > Each permission in @perm and @shared_perm is represented by a locked
> > > byte in the image file.  Requesting a permission in @perm is translated
> > > to a shared lock of the corresponding byte; rejecting to share the same
> > > permission is translated to a shared lock of a separate byte. With that,
> > > we use 2x number of bytes of distinct permission types.
> > > 
> > > virtlockd in libvirt locks the first byte, so we do locking from a
> > > higher offset.
> > > 
> > > Suggested-by: Kevin Wolf <kwolf@redhat.com>
> > > Signed-off-by: Fam Zheng <famz@redhat.com>
> > 
> > >  BlockDriver bdrv_file = {
> > >      .format_name = "file",
> > >      .protocol_name = "file",
> > > @@ -1977,7 +2234,11 @@ BlockDriver bdrv_file = {
> > >      .bdrv_get_info = raw_get_info,
> > >      .bdrv_get_allocated_file_size
> > >                          = raw_get_allocated_file_size,
> > > -
> > > +    .bdrv_inactivate = raw_inactivate,
> > > +    .bdrv_invalidate_cache = raw_invalidate_cache,
> > > +    .bdrv_check_perm = raw_check_perm,
> > > +    .bdrv_set_perm   = raw_set_perm,
> > > +    .bdrv_abort_perm_update = raw_abort_perm_update,
> > >      .create_opts = &raw_create_opts,
> > >  };
> > 
> > By the way, is it intentional that we apply locking only to bdrv_file,
> > but not to bdrv_host_device or bdrv_host_cdrom?
> 
> Good question.
> 
> Though CDROM is not very interesting, I am not sure about bdrv_host_device:
> 
> 1) On the one hand, a host device can contain a qcow2 image so maybe locking is
> useful.  Another reason to lock is that they share the same QAPI option,
> BlockdevOptionsFile. That last reason is, it should be very easy to add it.
> 
> 2) On the other hand, unlike files, host devices get pretty high chances in
> being accesses by multiple readers/writers, outside of QEMU, such as partition
> detection, mount, fsck, etc.
> 
> What is your opinion?

I would add it, if nothing else to be consistent with regular files. You
don't even need qcow2 on a block device for it to be useful, even for
raw images the guest may not be able to tolerate a second writer (in
this case, share-rw of the qdev device will directly control the locking
mode).

As for 2), I don't think these other users are a problem because we're
only taking shared locks. We'll prevent other users from taking an
exclusive lock, but that's exactly right because it's true that we're
using the image.

Kevin