Hi,
Regarding the subject (and the commit message), should we still be
calling them "private" slots, or guestmem_slots?
On Sun, Nov 5, 2023 at 4:34 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> From: Sean Christopherson <seanjc@google.com>
>
> Add support for creating "private" memslots via KVM_CREATE_GUEST_MEMFD and
> KVM_SET_USER_MEMORY_REGION2. Make vm_userspace_mem_region_add() a wrapper
> to its effective replacement, vm_mem_add(), so that private memslots are
> fully opt-in, i.e. don't require update all tests that add memory regions.
nit: update->updating
>
> Pivot on the KVM_MEM_PRIVATE flag instead of the validity of the "gmem"
KVM_MEM_PRIVATE -> KVM_MEM_GUEST_MEMFD
> file descriptor so that simple tests can let vm_mem_add() do the heavy
> lifting of creating the guest memfd, but also allow the caller to pass in
> an explicit fd+offset so that fancier tests can do things like back
> multiple memslots with a single file. If the caller passes in a fd, dup()
> the fd so that (a) __vm_mem_region_delete() can close the fd associated
> with the memory region without needing yet another flag, and (b) so that
> the caller can safely close its copy of the fd without having to first
> destroy memslots.
>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> Message-Id: <20231027182217.3615211-27-seanjc@google.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> .../selftests/kvm/include/kvm_util_base.h | 23 ++++++
> .../testing/selftests/kvm/include/test_util.h | 5 ++
> tools/testing/selftests/kvm/lib/kvm_util.c | 76 +++++++++++--------
> 3 files changed, 73 insertions(+), 31 deletions(-)
>
> diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
> index 9f144841c2ee..9f861182c02a 100644
> --- a/tools/testing/selftests/kvm/include/kvm_util_base.h
> +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
> @@ -431,6 +431,26 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
>
> void vm_create_irqchip(struct kvm_vm *vm);
>
> +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
> + uint64_t flags)
> +{
> + struct kvm_create_guest_memfd guest_memfd = {
> + .size = size,
> + .flags = flags,
> + };
> +
> + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
> +}
> +
> +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
> + uint64_t flags)
> +{
> + int fd = __vm_create_guest_memfd(vm, size, flags);
> +
> + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
> + return fd;
> +}
> +
> void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
> uint64_t gpa, uint64_t size, void *hva);
> int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
> @@ -439,6 +459,9 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> enum vm_mem_backing_src_type src_type,
> uint64_t guest_paddr, uint32_t slot, uint64_t npages,
> uint32_t flags);
> +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> + uint64_t guest_paddr, uint32_t slot, uint64_t npages,
> + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
>
> void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
> void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
> diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
> index 7e614adc6cf4..7257f2243ab9 100644
> --- a/tools/testing/selftests/kvm/include/test_util.h
> +++ b/tools/testing/selftests/kvm/include/test_util.h
> @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
> return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
> }
>
> +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
> +{
> + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
> +}
> +
> /* Aligns x up to the next multiple of size. Size must be a power of 2. */
> static inline uint64_t align_up(uint64_t x, uint64_t size)
> {
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index 3676b37bea38..b63500fca627 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -669,6 +669,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
> TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
> close(region->fd);
> }
> + if (region->region.guest_memfd >= 0)
> + close(region->region.guest_memfd);
>
> free(region);
> }
> @@ -870,36 +872,15 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
> errno, strerror(errno));
> }
>
> -/*
> - * VM Userspace Memory Region Add
> - *
> - * Input Args:
> - * vm - Virtual Machine
> - * src_type - Storage source for this region.
> - * NULL to use anonymous memory.
"VM_MEM_SRC_ANONYMOUS to use anonymous memory"
> - * guest_paddr - Starting guest physical address
> - * slot - KVM region slot
> - * npages - Number of physical pages
> - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
> - *
> - * Output Args: None
> - *
> - * Return: None
> - *
> - * Allocates a memory area of the number of pages specified by npages
> - * and maps it to the VM specified by vm, at a starting physical address
> - * given by guest_paddr. The region is created with a KVM region slot
> - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
> - * region is created with the flags given by flags.
> - */
> -void vm_userspace_mem_region_add(struct kvm_vm *vm,
> - enum vm_mem_backing_src_type src_type,
> - uint64_t guest_paddr, uint32_t slot, uint64_t npages,
> - uint32_t flags)
> +/* FIXME: This thing needs to be ripped apart and rewritten. */
It sure does :)
With these nits:
Reviewed-by: Fuad Tabba <tabba@google.com>
Tested-by: Fuad Tabba <tabba@google.com>
Cheers,
/fuad
> +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
> + uint64_t guest_paddr, uint32_t slot, uint64_t npages,
> + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
> {
> int ret;
> struct userspace_mem_region *region;
> size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
> + size_t mem_size = npages * vm->page_size;
> size_t alignment;
>
> TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
> @@ -952,7 +933,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> /* Allocate and initialize new mem region structure. */
> region = calloc(1, sizeof(*region));
> TEST_ASSERT(region != NULL, "Insufficient Memory");
> - region->mmap_size = npages * vm->page_size;
> + region->mmap_size = mem_size;
>
> #ifdef __s390x__
> /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
> @@ -999,14 +980,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> /* As needed perform madvise */
> if ((src_type == VM_MEM_SRC_ANONYMOUS ||
> src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
> - ret = madvise(region->host_mem, npages * vm->page_size,
> + ret = madvise(region->host_mem, mem_size,
> src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
> TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
> - region->host_mem, npages * vm->page_size,
> + region->host_mem, mem_size,
> vm_mem_backing_src_alias(src_type)->name);
> }
>
> region->backing_src_type = src_type;
> +
> + if (flags & KVM_MEM_GUEST_MEMFD) {
> + if (guest_memfd < 0) {
> + uint32_t guest_memfd_flags = 0;
> + TEST_ASSERT(!guest_memfd_offset,
> + "Offset must be zero when creating new guest_memfd");
> + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
> + } else {
> + /*
> + * Install a unique fd for each memslot so that the fd
> + * can be closed when the region is deleted without
> + * needing to track if the fd is owned by the framework
> + * or by the caller.
> + */
> + guest_memfd = dup(guest_memfd);
> + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
> + }
> +
> + region->region.guest_memfd = guest_memfd;
> + region->region.guest_memfd_offset = guest_memfd_offset;
> + } else {
> + region->region.guest_memfd = -1;
> + }
> +
> region->unused_phy_pages = sparsebit_alloc();
> sparsebit_set_num(region->unused_phy_pages,
> guest_paddr >> vm->page_shift, npages);
> @@ -1019,9 +1024,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
> " rc: %i errno: %i\n"
> " slot: %u flags: 0x%x\n"
> - " guest_phys_addr: 0x%lx size: 0x%lx",
> + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d\n",
> ret, errno, slot, flags,
> - guest_paddr, (uint64_t) region->region.memory_size);
> + guest_paddr, (uint64_t) region->region.memory_size,
> + region->region.guest_memfd);
>
> /* Add to quick lookup data structures */
> vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
> @@ -1042,6 +1048,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
> }
> }
>
> +void vm_userspace_mem_region_add(struct kvm_vm *vm,
> + enum vm_mem_backing_src_type src_type,
> + uint64_t guest_paddr, uint32_t slot,
> + uint64_t npages, uint32_t flags)
> +{
> + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
> +}
> +
> /*
> * Memslot to region
> *
> --
> 2.39.1
>
>