[RFC PATCH v2 15/21] physmem: extract ram_block_discard_range_fd() from ram_block_discard_range()

Xiaoyao Li posted 21 patches 1 year, 1 month ago
[RFC PATCH v2 15/21] physmem: extract ram_block_discard_range_fd() from ram_block_discard_range()
Posted by Xiaoyao Li 1 year, 1 month ago
Extract the alignment check and sanity check out from
ram_block_discard_range() into a seperate function
ram_block_discard_range_fd(), which can be passed with an explicit fd as
input parameter.

ram_block_discard_range_fd() can be used to discard private memory range
from gmem fd with later patch. When doing private memory <-> shared
memory conversion, it requires 4KB alignment instead of
RamBlock.page_size.

Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 softmmu/physmem.c | 192 ++++++++++++++++++++++++----------------------
 1 file changed, 100 insertions(+), 92 deletions(-)

diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 34d580ec0d39..6ee6bc794f44 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -3425,117 +3425,125 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
     return ret;
 }
 
+static int ram_block_discard_range_fd(RAMBlock *rb, uint64_t start,
+                                      size_t length, int fd)
+{
+    uint8_t *host_startaddr = rb->host + start;
+    bool need_madvise, need_fallocate;
+    int ret = -1;
+
+    errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+    /* The logic here is messy;
+     *    madvise DONTNEED fails for hugepages
+     *    fallocate works on hugepages and shmem
+     *    shared anonymous memory requires madvise REMOVE
+     */
+    need_madvise = (rb->page_size == qemu_host_page_size) && (rb->fd == fd);
+    need_fallocate = fd != -1;
+
+    if (need_fallocate) {
+        /* For a file, this causes the area of the file to be zero'd
+         * if read, and for hugetlbfs also causes it to be unmapped
+         * so a userfault will trigger.
+         */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+        /*
+         * We'll discard data from the actual file, even though we only
+         * have a MAP_PRIVATE mapping, possibly messing with other
+         * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to
+         * change that behavior whithout violating the promised
+         * semantics of ram_block_discard_range().
+         *
+         * Only warn, because it works as long as nobody else uses that
+         * file.
+         */
+        if (!qemu_ram_is_shared(rb)) {
+            warn_report_once("%s: Discarding RAM"
+                                " in private file mappings is possibly"
+                                " dangerous, because it will modify the"
+                                " underlying file and will affect other"
+                                " users of the file", __func__);
+        }
+
+        ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                        start, length);
+        if (ret) {
+            ret = -errno;
+            error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+                            __func__, rb->idstr, start, length, ret);
+            return ret;
+        }
+#else
+        ret = -ENOSYS;
+        error_report("%s: fallocate not available/file "
+                     "%s:%" PRIx64 " +%zx (%d)",
+                     __func__, rb->idstr, start, length, ret);
+        return ret;
+#endif
+    }
+
+    if (need_madvise) {
+        /* For normal RAM this causes it to be unmapped,
+         * for shared memory it causes the local mapping to disappear
+         * and to fall back on the file contents (which we just
+         * fallocate'd away).
+         */
+#if defined(CONFIG_MADVISE)
+        if (qemu_ram_is_shared(rb) && fd < 0) {
+            ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
+        } else {
+            ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
+        }
+        if (ret) {
+            ret = -errno;
+            error_report("%s: Failed to discard range %s:%" PRIx64 " +%zx (%d)",
+                         __func__, rb->idstr, start, length, ret);
+            return ret;
+        }
+#else
+        ret = -ENOSYS;
+        error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)",
+                        __func__, rb->idstr, start, length, ret);
+        return ret;
+#endif
+    }
+
+    trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
+                                  need_madvise, need_fallocate, ret);
+    return ret;
+}
+
 /*
  * Unmap pages of memory from start to start+length such that
  * they a) read as 0, b) Trigger whatever fault mechanism
  * the OS provides for postcopy.
+ *
  * The pages must be unmapped by the end of the function.
- * Returns: 0 on success, none-0 on failure
- *
+ * Returns: 0 on success, none-0 on failure.
  */
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
 {
-    int ret = -1;
-
     uint8_t *host_startaddr = rb->host + start;
 
     if (!QEMU_PTR_IS_ALIGNED(host_startaddr, rb->page_size)) {
         error_report("%s: Unaligned start address: %p",
                      __func__, host_startaddr);
-        goto err;
+        return -1;
     }
 
-    if ((start + length) <= rb->max_length) {
-        bool need_madvise, need_fallocate;
-        if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
-            error_report("%s: Unaligned length: %zx", __func__, length);
-            goto err;
-        }
-
-        errno = ENOTSUP; /* If we are missing MADVISE etc */
-
-        /* The logic here is messy;
-         *    madvise DONTNEED fails for hugepages
-         *    fallocate works on hugepages and shmem
-         *    shared anonymous memory requires madvise REMOVE
-         */
-        need_madvise = (rb->page_size == qemu_host_page_size);
-        need_fallocate = rb->fd != -1;
-        if (need_fallocate) {
-            /* For a file, this causes the area of the file to be zero'd
-             * if read, and for hugetlbfs also causes it to be unmapped
-             * so a userfault will trigger.
-             */
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-            /*
-             * We'll discard data from the actual file, even though we only
-             * have a MAP_PRIVATE mapping, possibly messing with other
-             * MAP_PRIVATE/MAP_SHARED mappings. There is no easy way to
-             * change that behavior whithout violating the promised
-             * semantics of ram_block_discard_range().
-             *
-             * Only warn, because it works as long as nobody else uses that
-             * file.
-             */
-            if (!qemu_ram_is_shared(rb)) {
-                warn_report_once("%s: Discarding RAM"
-                                 " in private file mappings is possibly"
-                                 " dangerous, because it will modify the"
-                                 " underlying file and will affect other"
-                                 " users of the file", __func__);
-            }
-
-            ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                            start, length);
-            if (ret) {
-                ret = -errno;
-                error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
-                             __func__, rb->idstr, start, length, ret);
-                goto err;
-            }
-#else
-            ret = -ENOSYS;
-            error_report("%s: fallocate not available/file"
-                         "%s:%" PRIx64 " +%zx (%d)",
-                         __func__, rb->idstr, start, length, ret);
-            goto err;
-#endif
-        }
-        if (need_madvise) {
-            /* For normal RAM this causes it to be unmapped,
-             * for shared memory it causes the local mapping to disappear
-             * and to fall back on the file contents (which we just
-             * fallocate'd away).
-             */
-#if defined(CONFIG_MADVISE)
-            if (qemu_ram_is_shared(rb) && rb->fd < 0) {
-                ret = madvise(host_startaddr, length, QEMU_MADV_REMOVE);
-            } else {
-                ret = madvise(host_startaddr, length, QEMU_MADV_DONTNEED);
-            }
-            if (ret) {
-                ret = -errno;
-                error_report("%s: Failed to discard range "
-                             "%s:%" PRIx64 " +%zx (%d)",
-                             __func__, rb->idstr, start, length, ret);
-                goto err;
-            }
-#else
-            ret = -ENOSYS;
-            error_report("%s: MADVISE not available %s:%" PRIx64 " +%zx (%d)",
-                         __func__, rb->idstr, start, length, ret);
-            goto err;
-#endif
-        }
-        trace_ram_block_discard_range(rb->idstr, host_startaddr, length,
-                                      need_madvise, need_fallocate, ret);
-    } else {
+    if ((start + length) > rb->max_length) {
         error_report("%s: Overrun block '%s' (%" PRIu64 "/%zx/" RAM_ADDR_FMT")",
                      __func__, rb->idstr, start, length, rb->max_length);
+        return -1;
     }
 
-err:
-    return ret;
+    if (!QEMU_IS_ALIGNED(length, rb->page_size)) {
+        error_report("%s: Unaligned length: %zx", __func__, length);
+        return -1;
+    }
+
+    return ram_block_discard_range_fd(rb, start, length, rb->fd);
 }
 
 bool ramblock_is_pmem(RAMBlock *rb)
-- 
2.34.1