Use one of the 14 reserved u64s in struct kvm_userspace_memory_region2
for the user to provide `userfault_bitmap`.
The memslot flag indicates if KVM should be reading from the
`userfault_bitmap` field from the memslot. The user is permitted to
provide a bogus pointer. If the pointer cannot be read from, we will
return -EFAULT (with no other information) back to the user.
Signed-off-by: James Houghton <jthoughton@google.com>
---
include/linux/kvm_host.h | 14 ++++++++++++++
include/uapi/linux/kvm.h | 4 +++-
virt/kvm/Kconfig | 3 +++
virt/kvm/kvm_main.c | 28 ++++++++++++++++++++++++++++
4 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 401439bb21e3..f7a3dfd5e224 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -590,6 +590,7 @@ struct kvm_memory_slot {
unsigned long *dirty_bitmap;
struct kvm_arch_memory_slot arch;
unsigned long userspace_addr;
+ unsigned long __user *userfault_bitmap;
u32 flags;
short id;
u16 as_id;
@@ -724,6 +725,11 @@ static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
}
#endif
+static inline bool kvm_has_userfault(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_HAVE_KVM_USERFAULT);
+}
+
struct kvm_memslots {
u64 generation;
atomic_long_t last_used_slot;
@@ -2553,4 +2559,12 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range);
#endif
+int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ gfn_t gfn);
+
+static inline bool kvm_memslot_userfault(struct kvm_memory_slot *memslot)
+{
+ return memslot->flags & KVM_MEM_USERFAULT;
+}
+
#endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 502ea63b5d2e..94be7e8b46a4 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -40,7 +40,8 @@ struct kvm_userspace_memory_region2 {
__u64 guest_memfd_offset;
__u32 guest_memfd;
__u32 pad1;
- __u64 pad2[14];
+ __u64 userfault_bitmap;
+ __u64 pad2[13];
};
/*
@@ -51,6 +52,7 @@ struct kvm_userspace_memory_region2 {
#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
#define KVM_MEM_GUEST_MEMFD (1UL << 2)
+#define KVM_MEM_USERFAULT (1UL << 3)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..9eb1fae238b1 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -124,3 +124,6 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
config HAVE_KVM_ARCH_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM
+
+config HAVE_KVM_USERFAULT
+ bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de2c11dae231..23fa3e911c4e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1541,6 +1541,9 @@ static int check_memory_region_flags(struct kvm *kvm,
!(mem->flags & KVM_MEM_GUEST_MEMFD))
valid_flags |= KVM_MEM_READONLY;
+ if (kvm_has_userfault(kvm))
+ valid_flags |= KVM_MEM_USERFAULT;
+
if (mem->flags & ~valid_flags)
return -EINVAL;
@@ -2042,6 +2045,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (r)
goto out;
}
+ if (mem->flags & KVM_MEM_USERFAULT)
+ new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap;
r = kvm_set_memslot(kvm, old, new, change);
if (r)
@@ -6426,3 +6431,26 @@ void kvm_exit(void)
kvm_irqfd_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+int kvm_gfn_userfault(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ gfn_t gfn)
+{
+ unsigned long bitmap_chunk = 0;
+ off_t offset;
+
+ if (!kvm_memslot_userfault(memslot))
+ return 0;
+
+ if (WARN_ON_ONCE(!memslot->userfault_bitmap))
+ return 0;
+
+ offset = gfn - memslot->base_gfn;
+
+ if (copy_from_user(&bitmap_chunk,
+ memslot->userfault_bitmap + offset / BITS_PER_LONG,
+ sizeof(bitmap_chunk)))
+ return -EFAULT;
+
+ /* Set in the bitmap means that the gfn is userfault */
+ return !!(bitmap_chunk & (1ul << (offset % BITS_PER_LONG)));
+}
--
2.47.0.338.g60cca15819-goog
Hi James,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 4d911c7abee56771b0219a9fbf0120d06bdc9c14]
url: https://github.com/intel-lab-lkp/linux/commits/James-Houghton/KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap/20241205-032516
base: 4d911c7abee56771b0219a9fbf0120d06bdc9c14
patch link: https://lore.kernel.org/r/20241204191349.1730936-2-jthoughton%40google.com
patch subject: [PATCH v1 01/13] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap
config: i386-buildonly-randconfig-006 (https://download.01.org/0day-ci/archive/20241205/202412052133.pTg3UAQm-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241205/202412052133.pTg3UAQm-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412052133.pTg3UAQm-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function '__kvm_set_memory_region':
>> arch/x86/kvm/../../../virt/kvm/kvm_main.c:2049:41: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
2049 | new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap;
| ^
vim +2049 arch/x86/kvm/../../../virt/kvm/kvm_main.c
1931
1932 /*
1933 * Allocate some memory and give it an address in the guest physical address
1934 * space.
1935 *
1936 * Discontiguous memory is allowed, mostly for framebuffers.
1937 *
1938 * Must be called holding kvm->slots_lock for write.
1939 */
1940 int __kvm_set_memory_region(struct kvm *kvm,
1941 const struct kvm_userspace_memory_region2 *mem)
1942 {
1943 struct kvm_memory_slot *old, *new;
1944 struct kvm_memslots *slots;
1945 enum kvm_mr_change change;
1946 unsigned long npages;
1947 gfn_t base_gfn;
1948 int as_id, id;
1949 int r;
1950
1951 r = check_memory_region_flags(kvm, mem);
1952 if (r)
1953 return r;
1954
1955 as_id = mem->slot >> 16;
1956 id = (u16)mem->slot;
1957
1958 /* General sanity checks */
1959 if ((mem->memory_size & (PAGE_SIZE - 1)) ||
1960 (mem->memory_size != (unsigned long)mem->memory_size))
1961 return -EINVAL;
1962 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
1963 return -EINVAL;
1964 /* We can read the guest memory with __xxx_user() later on. */
1965 if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
1966 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) ||
1967 !access_ok((void __user *)(unsigned long)mem->userspace_addr,
1968 mem->memory_size))
1969 return -EINVAL;
1970 if (mem->flags & KVM_MEM_GUEST_MEMFD &&
1971 (mem->guest_memfd_offset & (PAGE_SIZE - 1) ||
1972 mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset))
1973 return -EINVAL;
1974 if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_MEM_SLOTS_NUM)
1975 return -EINVAL;
1976 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
1977 return -EINVAL;
1978 if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
1979 return -EINVAL;
1980
1981 slots = __kvm_memslots(kvm, as_id);
1982
1983 /*
1984 * Note, the old memslot (and the pointer itself!) may be invalidated
1985 * and/or destroyed by kvm_set_memslot().
1986 */
1987 old = id_to_memslot(slots, id);
1988
1989 if (!mem->memory_size) {
1990 if (!old || !old->npages)
1991 return -EINVAL;
1992
1993 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages))
1994 return -EIO;
1995
1996 return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE);
1997 }
1998
1999 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT);
2000 npages = (mem->memory_size >> PAGE_SHIFT);
2001
2002 if (!old || !old->npages) {
2003 change = KVM_MR_CREATE;
2004
2005 /*
2006 * To simplify KVM internals, the total number of pages across
2007 * all memslots must fit in an unsigned long.
2008 */
2009 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
2010 return -EINVAL;
2011 } else { /* Modify an existing slot. */
2012 /* Private memslots are immutable, they can only be deleted. */
2013 if (mem->flags & KVM_MEM_GUEST_MEMFD)
2014 return -EINVAL;
2015 if ((mem->userspace_addr != old->userspace_addr) ||
2016 (npages != old->npages) ||
2017 ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
2018 return -EINVAL;
2019
2020 if (base_gfn != old->base_gfn)
2021 change = KVM_MR_MOVE;
2022 else if (mem->flags != old->flags)
2023 change = KVM_MR_FLAGS_ONLY;
2024 else /* Nothing to change. */
2025 return 0;
2026 }
2027
2028 if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
2029 kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
2030 return -EEXIST;
2031
2032 /* Allocate a slot that will persist in the memslot. */
2033 new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
2034 if (!new)
2035 return -ENOMEM;
2036
2037 new->as_id = as_id;
2038 new->id = id;
2039 new->base_gfn = base_gfn;
2040 new->npages = npages;
2041 new->flags = mem->flags;
2042 new->userspace_addr = mem->userspace_addr;
2043 if (mem->flags & KVM_MEM_GUEST_MEMFD) {
2044 r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset);
2045 if (r)
2046 goto out;
2047 }
2048 if (mem->flags & KVM_MEM_USERFAULT)
> 2049 new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap;
2050
2051 r = kvm_set_memslot(kvm, old, new, change);
2052 if (r)
2053 goto out_unbind;
2054
2055 return 0;
2056
2057 out_unbind:
2058 if (mem->flags & KVM_MEM_GUEST_MEMFD)
2059 kvm_gmem_unbind(new);
2060 out:
2061 kfree(new);
2062 return r;
2063 }
2064 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
2065
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
> arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function '__kvm_set_memory_region': > >> arch/x86/kvm/../../../virt/kvm/kvm_main.c:2049:41: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] > 2049 | new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap; > | ^ I realize that, not only have I done this cast slightly wrong, I'm missing a few checks on userfault_bitmap that I should have. Applying this diff, or at least something like it, to fix it: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b552cdef2850..30f09141df64 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1977,6 +1977,12 @@ int __kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; + if (mem->flags & KVM_MEM_USERFAULT && + ((mem->userfault_bitmap != untagged_addr(mem->userfault_bitmap)) || + !access_ok((void __user *)(unsigned long)mem->userfault_bitmap, + DIV_ROUND_UP(mem->memory_size >> PAGE_SHIFT, BITS_PER_LONG) + * sizeof(long)))) + return -EINVAL; slots = __kvm_memslots(kvm, as_id); @@ -2053,7 +2059,8 @@ int __kvm_set_memory_region(struct kvm *kvm, goto out; } if (mem->flags & KVM_MEM_USERFAULT) - new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap; + new->userfault_bitmap = + (unsigned long __user *)(unsigned long)mem->userfault_bitmap; r = kvm_set_memslot(kvm, old, new, change); if (r)
Hi James,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 4d911c7abee56771b0219a9fbf0120d06bdc9c14]
url: https://github.com/intel-lab-lkp/linux/commits/James-Houghton/KVM-Add-KVM_MEM_USERFAULT-memslot-flag-and-bitmap/20241205-032516
base: 4d911c7abee56771b0219a9fbf0120d06bdc9c14
patch link: https://lore.kernel.org/r/20241204191349.1730936-2-jthoughton%40google.com
patch subject: [PATCH v1 01/13] KVM: Add KVM_MEM_USERFAULT memslot flag and bitmap
config: x86_64-randconfig-121 (https://download.01.org/0day-ci/archive/20241205/202412051904.GNL7BE1X-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241205/202412051904.GNL7BE1X-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412051904.GNL7BE1X-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
arch/x86/kvm/../../../virt/kvm/kvm_main.c: note: in included file:
include/linux/kvm_host.h:2080:54: sparse: sparse: array of flexible structures
include/linux/kvm_host.h:2082:56: sparse: sparse: array of flexible structures
>> arch/x86/kvm/../../../virt/kvm/kvm_main.c:2049:39: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected unsigned long [noderef] __user *userfault_bitmap @@ got unsigned long * @@
arch/x86/kvm/../../../virt/kvm/kvm_main.c:2049:39: sparse: expected unsigned long [noderef] __user *userfault_bitmap
arch/x86/kvm/../../../virt/kvm/kvm_main.c:2049:39: sparse: got unsigned long *
arch/x86/kvm/../../../virt/kvm/kvm_main.c:626:25: sparse: sparse: context imbalance in 'kvm_mmu_notifier_invalidate_range_start' - different lock contexts for basic block
arch/x86/kvm/../../../virt/kvm/kvm_main.c:626:25: sparse: sparse: context imbalance in 'kvm_mmu_notifier_invalidate_range_end' - different lock contexts for basic block
arch/x86/kvm/../../../virt/kvm/kvm_main.c:626:25: sparse: sparse: context imbalance in 'kvm_mmu_notifier_clear_flush_young' - different lock contexts for basic block
arch/x86/kvm/../../../virt/kvm/kvm_main.c:626:25: sparse: sparse: context imbalance in 'kvm_mmu_notifier_clear_young' - different lock contexts for basic block
arch/x86/kvm/../../../virt/kvm/kvm_main.c:626:25: sparse: sparse: context imbalance in 'kvm_mmu_notifier_test_young' - different lock contexts for basic block
arch/x86/kvm/../../../virt/kvm/kvm_main.c: note: in included file (through include/linux/mutex.h, include/linux/kvm_types.h, include/kvm/iodev.h):
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
arch/x86/kvm/../../../virt/kvm/kvm_main.c:1960:49: sparse: sparse: self-comparison always evaluates to false
include/linux/list.h:83:21: sparse: sparse: self-comparison always evaluates to true
vim +2049 arch/x86/kvm/../../../virt/kvm/kvm_main.c
1931
1932 /*
1933 * Allocate some memory and give it an address in the guest physical address
1934 * space.
1935 *
1936 * Discontiguous memory is allowed, mostly for framebuffers.
1937 *
1938 * Must be called holding kvm->slots_lock for write.
1939 */
1940 int __kvm_set_memory_region(struct kvm *kvm,
1941 const struct kvm_userspace_memory_region2 *mem)
1942 {
1943 struct kvm_memory_slot *old, *new;
1944 struct kvm_memslots *slots;
1945 enum kvm_mr_change change;
1946 unsigned long npages;
1947 gfn_t base_gfn;
1948 int as_id, id;
1949 int r;
1950
1951 r = check_memory_region_flags(kvm, mem);
1952 if (r)
1953 return r;
1954
1955 as_id = mem->slot >> 16;
1956 id = (u16)mem->slot;
1957
1958 /* General sanity checks */
1959 if ((mem->memory_size & (PAGE_SIZE - 1)) ||
1960 (mem->memory_size != (unsigned long)mem->memory_size))
1961 return -EINVAL;
1962 if (mem->guest_phys_addr & (PAGE_SIZE - 1))
1963 return -EINVAL;
1964 /* We can read the guest memory with __xxx_user() later on. */
1965 if ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
1966 (mem->userspace_addr != untagged_addr(mem->userspace_addr)) ||
1967 !access_ok((void __user *)(unsigned long)mem->userspace_addr,
1968 mem->memory_size))
1969 return -EINVAL;
1970 if (mem->flags & KVM_MEM_GUEST_MEMFD &&
1971 (mem->guest_memfd_offset & (PAGE_SIZE - 1) ||
1972 mem->guest_memfd_offset + mem->memory_size < mem->guest_memfd_offset))
1973 return -EINVAL;
1974 if (as_id >= kvm_arch_nr_memslot_as_ids(kvm) || id >= KVM_MEM_SLOTS_NUM)
1975 return -EINVAL;
1976 if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
1977 return -EINVAL;
1978 if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES)
1979 return -EINVAL;
1980
1981 slots = __kvm_memslots(kvm, as_id);
1982
1983 /*
1984 * Note, the old memslot (and the pointer itself!) may be invalidated
1985 * and/or destroyed by kvm_set_memslot().
1986 */
1987 old = id_to_memslot(slots, id);
1988
1989 if (!mem->memory_size) {
1990 if (!old || !old->npages)
1991 return -EINVAL;
1992
1993 if (WARN_ON_ONCE(kvm->nr_memslot_pages < old->npages))
1994 return -EIO;
1995
1996 return kvm_set_memslot(kvm, old, NULL, KVM_MR_DELETE);
1997 }
1998
1999 base_gfn = (mem->guest_phys_addr >> PAGE_SHIFT);
2000 npages = (mem->memory_size >> PAGE_SHIFT);
2001
2002 if (!old || !old->npages) {
2003 change = KVM_MR_CREATE;
2004
2005 /*
2006 * To simplify KVM internals, the total number of pages across
2007 * all memslots must fit in an unsigned long.
2008 */
2009 if ((kvm->nr_memslot_pages + npages) < kvm->nr_memslot_pages)
2010 return -EINVAL;
2011 } else { /* Modify an existing slot. */
2012 /* Private memslots are immutable, they can only be deleted. */
2013 if (mem->flags & KVM_MEM_GUEST_MEMFD)
2014 return -EINVAL;
2015 if ((mem->userspace_addr != old->userspace_addr) ||
2016 (npages != old->npages) ||
2017 ((mem->flags ^ old->flags) & KVM_MEM_READONLY))
2018 return -EINVAL;
2019
2020 if (base_gfn != old->base_gfn)
2021 change = KVM_MR_MOVE;
2022 else if (mem->flags != old->flags)
2023 change = KVM_MR_FLAGS_ONLY;
2024 else /* Nothing to change. */
2025 return 0;
2026 }
2027
2028 if ((change == KVM_MR_CREATE || change == KVM_MR_MOVE) &&
2029 kvm_check_memslot_overlap(slots, id, base_gfn, base_gfn + npages))
2030 return -EEXIST;
2031
2032 /* Allocate a slot that will persist in the memslot. */
2033 new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
2034 if (!new)
2035 return -ENOMEM;
2036
2037 new->as_id = as_id;
2038 new->id = id;
2039 new->base_gfn = base_gfn;
2040 new->npages = npages;
2041 new->flags = mem->flags;
2042 new->userspace_addr = mem->userspace_addr;
2043 if (mem->flags & KVM_MEM_GUEST_MEMFD) {
2044 r = kvm_gmem_bind(kvm, new, mem->guest_memfd, mem->guest_memfd_offset);
2045 if (r)
2046 goto out;
2047 }
2048 if (mem->flags & KVM_MEM_USERFAULT)
> 2049 new->userfault_bitmap = (unsigned long *)mem->userfault_bitmap;
2050
2051 r = kvm_set_memslot(kvm, old, new, change);
2052 if (r)
2053 goto out_unbind;
2054
2055 return 0;
2056
2057 out_unbind:
2058 if (mem->flags & KVM_MEM_GUEST_MEMFD)
2059 kvm_gmem_unbind(new);
2060 out:
2061 kfree(new);
2062 return r;
2063 }
2064 EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
2065
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.