Introduce a "version 2" of KVM_SET_MEMORY_ATTRIBUTES to support returning
information back to userspace.
This new ioctl and structure will, in a later patch, be shared as a
guest_memfd ioctl, where the padding in the new kvm_memory_attributes2
structure will be for writing the response from the guest_memfd ioctl to
userspace.
A new ioctl is necessary for these reasons:
1. KVM_SET_MEMORY_ATTRIBUTES is currently a write-only ioctl and does not
allow userspace to read fields. There's nothing in code (yet?) that
validates this, but using _IOWR for consistency would be prudent.
2. KVM_SET_MEMORY_ATTRIBUTES, when used as a guest_memfd ioctl, will need
an additional field to provide userspace with more error details.
Alternatively, a completely new ioctl could be defined, unrelated to
KVM_SET_MEMORY_ATTRIBUTES, but using the same ioctl number and struct for
the vm and guest_memfd ioctls streamlines the interface for userspace. In
addition, any memory attributes, implemented on the vm or guest_memfd
ioctl, can be easily shared with the other.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
Documentation/virt/kvm/api.rst | 32 +++++++++++++++++++++++++++++++
include/uapi/linux/kvm.h | 12 ++++++++++++
virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++---
3 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 754b662a453c3..a812769d79bf6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6355,6 +6355,8 @@ S390:
Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
Returns -EINVAL if called on a protected VM.
+.. _KVM_SET_MEMORY_ATTRIBUTES:
+
4.141 KVM_SET_MEMORY_ATTRIBUTES
-------------------------------
@@ -6512,6 +6514,36 @@ the capability to be present.
`flags` must currently be zero.
+4.144 KVM_SET_MEMORY_ATTRIBUTES2
+---------------------------------
+
+:Capability: KVM_CAP_MEMORY_ATTRIBUTES2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_memory_attributes2 (in/out)
+:Returns: 0 on success, <0 on error
+
+KVM_SET_MEMORY_ATTRIBUTES2 is an extension to
+KVM_SET_MEMORY_ATTRIBUTES that supports returning (writing) values to
+userspace. The original (pre-extension) fields are shared with
+KVM_SET_MEMORY_ATTRIBUTES identically.
+
+Attribute values are shared with KVM_SET_MEMORY_ATTRIBUTES.
+
+::
+
+ struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+ };
+
+ #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+See also: :ref: `KVM_SET_MEMORY_ATTRIBUTES`.
+
.. _kvm_run:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52f6000ab0208..c300e38c7c9cd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -963,6 +963,7 @@ struct kvm_enable_cap {
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
+#define KVM_CAP_MEMORY_ATTRIBUTES2 245
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1617,4 +1618,15 @@ struct kvm_pre_fault_memory {
__u64 padding[5];
};
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
+#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd6, struct kvm_memory_attributes2)
+
+struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35166754a22b4..dd84b377e46db 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2621,7 +2621,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
return r;
}
static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
- struct kvm_memory_attributes *attrs)
+ struct kvm_memory_attributes2 *attrs)
{
gfn_t start, end;
@@ -4959,6 +4959,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_DEVICE_CTRL:
return 1;
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
+ case KVM_CAP_MEMORY_ATTRIBUTES2:
case KVM_CAP_MEMORY_ATTRIBUTES:
if (!vm_memory_attributes)
return 0;
@@ -5184,6 +5185,14 @@ do { \
sizeof_field(struct kvm_userspace_memory_region2, field)); \
} while (0)
+#define SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(field) \
+do { \
+ BUILD_BUG_ON(offsetof(struct kvm_set_memory_attributes, field) != \
+ offsetof(struct kvm_set_memory_attributes2, field)); \
+ BUILD_BUG_ON(sizeof_field(struct kvm_set_memory_attributes, field) != \
+ sizeof_field(struct kvm_set_memory_attributes2, field)); \
+} while (0)
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5366,15 +5375,35 @@ static long kvm_vm_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
+ case KVM_SET_MEMORY_ATTRIBUTES2:
case KVM_SET_MEMORY_ATTRIBUTES: {
- struct kvm_memory_attributes attrs;
+ struct kvm_memory_attributes2 attrs;
+ unsigned long size;
+
+ if (ioctl == KVM_SET_MEMORY_ATTRIBUTES) {
+ /*
+ * Fields beyond struct kvm_userspace_memory_region shouldn't be
+ * accessed, but avoid leaking kernel memory in case of a bug.
+ */
+ memset(&mem, 0, sizeof(mem));
+ size = sizeof(struct kvm_set_memory_attributes);
+ } else {
+ size = sizeof(struct kvm_set_memory_attributes2);
+ }
+
+ /* Ensure the common parts of the two structs are identical. */
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(slot);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(flags);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(guest_phys_addr);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(memory_size);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(userspace_addr);
r = -ENOTTY;
if (!vm_memory_attributes)
goto out;
r = -EFAULT;
- if (copy_from_user(&attrs, argp, sizeof(attrs)))
+ if (copy_from_user(&attrs, argp, size))
goto out;
r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
--
2.51.0.858.gf9c4a03a3a-goog
On 17/10/2025 21:11, Ackerley Tng wrote:
> Introduce a "version 2" of KVM_SET_MEMORY_ATTRIBUTES to support returning
> information back to userspace.
>
> This new ioctl and structure will, in a later patch, be shared as a
> guest_memfd ioctl, where the padding in the new kvm_memory_attributes2
> structure will be for writing the response from the guest_memfd ioctl to
> userspace.
>
> A new ioctl is necessary for these reasons:
>
> 1. KVM_SET_MEMORY_ATTRIBUTES is currently a write-only ioctl and does not
> allow userspace to read fields. There's nothing in code (yet?) that
> validates this, but using _IOWR for consistency would be prudent.
>
> 2. KVM_SET_MEMORY_ATTRIBUTES, when used as a guest_memfd ioctl, will need
> an additional field to provide userspace with more error details.
>
> Alternatively, a completely new ioctl could be defined, unrelated to
> KVM_SET_MEMORY_ATTRIBUTES, but using the same ioctl number and struct for
> the vm and guest_memfd ioctls streamlines the interface for userspace. In
> addition, any memory attributes, implemented on the vm or guest_memfd
> ioctl, can be easily shared with the other.
>
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> ---
> Documentation/virt/kvm/api.rst | 32 +++++++++++++++++++++++++++++++
> include/uapi/linux/kvm.h | 12 ++++++++++++
> virt/kvm/kvm_main.c | 35 +++++++++++++++++++++++++++++++---
> 3 files changed, 76 insertions(+), 3 deletions(-)
>
[...]
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 52f6000ab0208..c300e38c7c9cd 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
[...]
> @@ -5366,15 +5375,35 @@ static long kvm_vm_ioctl(struct file *filp,
> }
> #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
> #ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
> + case KVM_SET_MEMORY_ATTRIBUTES2:
> case KVM_SET_MEMORY_ATTRIBUTES: {
> - struct kvm_memory_attributes attrs;
> + struct kvm_memory_attributes2 attrs;
> + unsigned long size;
> +
> + if (ioctl == KVM_SET_MEMORY_ATTRIBUTES) {
> + /*
> + * Fields beyond struct kvm_userspace_memory_region shouldn't be
> + * accessed, but avoid leaking kernel memory in case of a bug.
> + */
> + memset(&mem, 0, sizeof(mem));
s/mem/attrs/g
> + size = sizeof(struct kvm_set_memory_attributes);
> + } else {
> + size = sizeof(struct kvm_set_memory_attributes2);
s/kvm_set_memory_attributes/kvm_memory_attributes/ (on both sizeof lines
above and in the SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD macro).
> + }
> +
> + /* Ensure the common parts of the two structs are identical. */
> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(slot);
> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(flags);
> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(guest_phys_addr);
> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(memory_size);
> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(userspace_addr);
The fields are:
* address
* size
* attributes
* flags
The list you've got appears to match struct kvm_userspace_memory_region
- copy/paste error?
Thanks,
Steve
>
> r = -ENOTTY;
> if (!vm_memory_attributes)
> goto out;
>
> r = -EFAULT;
> - if (copy_from_user(&attrs, argp, sizeof(attrs)))
> + if (copy_from_user(&attrs, argp, size))
> goto out;
>
> r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
Steven Price <steven.price@arm.com> writes:
> On 17/10/2025 21:11, Ackerley Tng wrote:
>>
>> [...snip...]
>>
>> @@ -5366,15 +5375,35 @@ static long kvm_vm_ioctl(struct file *filp,
>> }
>> #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
>> #ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
>> + case KVM_SET_MEMORY_ATTRIBUTES2:
>> case KVM_SET_MEMORY_ATTRIBUTES: {
>> - struct kvm_memory_attributes attrs;
>> + struct kvm_memory_attributes2 attrs;
>> + unsigned long size;
>> +
>> + if (ioctl == KVM_SET_MEMORY_ATTRIBUTES) {
>> + /*
>> + * Fields beyond struct kvm_userspace_memory_region shouldn't be
>> + * accessed, but avoid leaking kernel memory in case of a bug.
>> + */
>> + memset(&mem, 0, sizeof(mem));
>
> s/mem/attrs/g
>
>> + size = sizeof(struct kvm_set_memory_attributes);
>> + } else {
>> + size = sizeof(struct kvm_set_memory_attributes2);
>
> s/kvm_set_memory_attributes/kvm_memory_attributes/ (on both sizeof lines
> above and in the SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD macro).
>
>> + }
>> +
>> + /* Ensure the common parts of the two structs are identical. */
>> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(slot);
>> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(flags);
>> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(guest_phys_addr);
>> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(memory_size);
>> + SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(userspace_addr);
>
> The fields are:
> * address
> * size
> * attributes
> * flags
>
> The list you've got appears to match struct kvm_userspace_memory_region
> - copy/paste error?
>
Yes I did copy/paste this from KVM_SET_USER_MEMORY_REGION2.
Thanks for catching this! I missed out build-testing this with
CONFIG_KVM_VM_MEMORY_ATTRIBUTES.
I've done that and here's a replacement patch.
> Thanks,
> Steve
>
>>
>> [...snip...]
>>
From 31283972574bde2ffa1960d30c80286f8467c594 Mon Sep 17 00:00:00 2001
From: Ackerley Tng <ackerleytng@google.com>
Date: Thu, 16 Oct 2025 11:48:01 -0700
Subject: [PATCH] KVM: Introduce KVM_SET_MEMORY_ATTRIBUTES2
Introduce a "version 2" of KVM_SET_MEMORY_ATTRIBUTES to support returning
information back to userspace.
This new ioctl and structure will, in a later patch, be shared as a
guest_memfd ioctl, where the padding in the new kvm_memory_attributes2
structure will be for writing the response from the guest_memfd ioctl to
userspace.
A new ioctl is necessary for these reasons:
1. KVM_SET_MEMORY_ATTRIBUTES is currently a write-only ioctl and does not
allow userspace to read fields. There's nothing in code (yet?) that
validates this, but using _IOWR for consistency would be prudent.
2. KVM_SET_MEMORY_ATTRIBUTES, when used as a guest_memfd ioctl, will need
an additional field to provide userspace with more error details.
Alternatively, a completely new ioctl could be defined, unrelated to
KVM_SET_MEMORY_ATTRIBUTES, but using the same ioctl number and struct for
the vm and guest_memfd ioctls streamlines the interface for userspace. In
addition, any memory attributes, implemented on the vm or guest_memfd
ioctl, can be easily shared with the other.
Suggested-by: Sean Christopherson <seanjc@google.com>
Change-Id: I50cd506d9a28bf68a90e659015603de579569bc1
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
Documentation/virt/kvm/api.rst | 32 ++++++++++++++++++++++++++++++++
include/uapi/linux/kvm.h | 12 ++++++++++++
virt/kvm/kvm_main.c | 34 +++++++++++++++++++++++++++++++---
3 files changed, 75 insertions(+), 3 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 754b662a453c3..a812769d79bf6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6355,6 +6355,8 @@ S390:
Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
Returns -EINVAL if called on a protected VM.
+.. _KVM_SET_MEMORY_ATTRIBUTES:
+
4.141 KVM_SET_MEMORY_ATTRIBUTES
-------------------------------
@@ -6512,6 +6514,36 @@ the capability to be present.
`flags` must currently be zero.
+4.144 KVM_SET_MEMORY_ATTRIBUTES2
+---------------------------------
+
+:Capability: KVM_CAP_MEMORY_ATTRIBUTES2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_memory_attributes2 (in/out)
+:Returns: 0 on success, <0 on error
+
+KVM_SET_MEMORY_ATTRIBUTES2 is an extension to
+KVM_SET_MEMORY_ATTRIBUTES that supports returning (writing) values to
+userspace. The original (pre-extension) fields are shared with
+KVM_SET_MEMORY_ATTRIBUTES identically.
+
+Attribute values are shared with KVM_SET_MEMORY_ATTRIBUTES.
+
+::
+
+ struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+ };
+
+ #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+See also: :ref: `KVM_SET_MEMORY_ATTRIBUTES`.
+
.. _kvm_run:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52f6000ab0208..c300e38c7c9cd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -963,6 +963,7 @@ struct kvm_enable_cap {
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
+#define KVM_CAP_MEMORY_ATTRIBUTES2 245
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1617,4 +1618,15 @@ struct kvm_pre_fault_memory {
__u64 padding[5];
};
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
+#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd6, struct kvm_memory_attributes2)
+
+struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35166754a22b4..95aa51b334a70 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2621,7 +2621,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
return r;
}
static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
- struct kvm_memory_attributes *attrs)
+ struct kvm_memory_attributes2 *attrs)
{
gfn_t start, end;
@@ -4959,6 +4959,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_DEVICE_CTRL:
return 1;
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
+ case KVM_CAP_MEMORY_ATTRIBUTES2:
case KVM_CAP_MEMORY_ATTRIBUTES:
if (!vm_memory_attributes)
return 0;
@@ -5184,6 +5185,14 @@ do { \
sizeof_field(struct kvm_userspace_memory_region2, field)); \
} while (0)
+#define SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(field) \
+do { \
+ BUILD_BUG_ON(offsetof(struct kvm_memory_attributes, field) != \
+ offsetof(struct kvm_memory_attributes2, field)); \
+ BUILD_BUG_ON(sizeof_field(struct kvm_memory_attributes, field) != \
+ sizeof_field(struct kvm_memory_attributes2, field)); \
+} while (0)
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5366,15 +5375,34 @@ static long kvm_vm_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
+ case KVM_SET_MEMORY_ATTRIBUTES2:
case KVM_SET_MEMORY_ATTRIBUTES: {
- struct kvm_memory_attributes attrs;
+ struct kvm_memory_attributes2 attrs;
+ unsigned long size;
+
+ if (ioctl == KVM_SET_MEMORY_ATTRIBUTES) {
+ /*
+ * Fields beyond struct kvm_userspace_memory_region shouldn't be
+ * accessed, but avoid leaking kernel memory in case of a bug.
+ */
+ memset(&attrs, 0, sizeof(attrs));
+ size = sizeof(struct kvm_memory_attributes);
+ } else {
+ size = sizeof(struct kvm_memory_attributes2);
+ }
+
+ /* Ensure the common parts of the two structs are identical. */
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(address);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(size);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(attributes);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(flags);
r = -ENOTTY;
if (!vm_memory_attributes)
goto out;
r = -EFAULT;
- if (copy_from_user(&attrs, argp, sizeof(attrs)))
+ if (copy_from_user(&attrs, argp, size))
goto out;
r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
--
2.51.0.915.g61a8936c21-goog
Ackerley Tng <ackerleytng@google.com> writes:
Found another issue with KVM_CAP_MEMORY_ATTRIBUTES2.
KVM_CAP_MEMORY_ATTRIBUTES2 was defined to do the same thing as
KVM_CAP_MEMORY_ATTRIBUTES, but that's wrong since
KVM_CAP_MEMORY_ATTRIBUTES2 should indicate the presence of
KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2.
Usage is kind of weird and I hope to get feedback on this as
well.
This describes the difference between the previous version of this patch
and the one attached below.
I also added this to the changelog
Add KVM_CAP_MEMORY_ATTRIBUTES2 to indicate that struct
kvm_memory_attributes2 exists and can be used either with
KVM_SET_MEMORY_ATTRIBUTES2 via the vm or guest_memfd ioctl.
Since KVM_SET_MEMORY_ATTRIBUTES2 is not limited to be used only with the vm
ioctl, return 1 for KVM_CAP_MEMORY_ATTRIBUTES2 as long as struct
kvm_memory_attributes2 and KVM_SET_MEMORY_ATTRIBUTES2 can be
used. KVM_CAP_MEMORY_ATTRIBUTES must still be used to actually get valid
attributes.
Handle KVM_CAP_MEMORY_ATTRIBUTES2 and return 1 regardless of
CONFIG_KVM_VM_MEMORY_ATTRIBUTES, since KVM_SET_MEMORY_ATTRIBUTES2 is not
limited to a vm ioctl and can also be used with the guest_memfd ioctl.
Here's the entire patch so hopefully it's easy to swap out this entire
patch over the original one.
From 8887ba58f6fd97c529c8152d6f18e5e26651dbec Mon Sep 17 00:00:00 2001
From: Ackerley Tng <ackerleytng@google.com>
Date: Thu, 16 Oct 2025 11:48:01 -0700
Subject: [PATCH] KVM: Introduce KVM_SET_MEMORY_ATTRIBUTES2
Introduce a "version 2" of KVM_SET_MEMORY_ATTRIBUTES to support returning
information back to userspace.
This new ioctl and structure will, in a later patch, be shared as a
guest_memfd ioctl, where the padding in the new kvm_memory_attributes2
structure will be for writing the response from the guest_memfd ioctl to
userspace.
A new ioctl is necessary for these reasons:
1. KVM_SET_MEMORY_ATTRIBUTES is currently a write-only ioctl and does not
allow userspace to read fields. There's nothing in code (yet?) that
validates this, but using _IOWR for consistency would be prudent.
2. KVM_SET_MEMORY_ATTRIBUTES, when used as a guest_memfd ioctl, will need
an additional field to provide userspace with more error details.
Alternatively, a completely new ioctl could be defined, unrelated to
KVM_SET_MEMORY_ATTRIBUTES, but using the same ioctl number and struct for
the vm and guest_memfd ioctls streamlines the interface for userspace. In
addition, any memory attributes, implemented on the vm or guest_memfd
ioctl, can be easily shared with the other.
Add KVM_CAP_MEMORY_ATTRIBUTES2 to indicate that struct
kvm_memory_attributes2 exists and can be used either with
KVM_SET_MEMORY_ATTRIBUTES2 via the vm or guest_memfd ioctl.
Since KVM_SET_MEMORY_ATTRIBUTES2 is not limited to be used only with the vm
ioctl, return 1 for KVM_CAP_MEMORY_ATTRIBUTES2 as long as struct
kvm_memory_attributes2 and KVM_SET_MEMORY_ATTRIBUTES2 can be
used. KVM_CAP_MEMORY_ATTRIBUTES must still be used to actually get valid
attributes.
Handle KVM_CAP_MEMORY_ATTRIBUTES2 and return 1 regardless of
CONFIG_KVM_VM_MEMORY_ATTRIBUTES, since KVM_SET_MEMORY_ATTRIBUTES2 is not
limited to a vm ioctl and can also be used with the guest_memfd ioctl.
Suggested-by: Sean Christopherson <seanjc@google.com>
Change-Id: I50cd506d9a28bf68a90e659015603de579569bc1
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
Documentation/virt/kvm/api.rst | 32 ++++++++++++++++++++++++++++++++
include/uapi/linux/kvm.h | 12 ++++++++++++
virt/kvm/kvm_main.c | 34 +++++++++++++++++++++++++++++++---
3 files changed, 75 insertions(+), 3 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 754b662a453c3..a812769d79bf6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6355,6 +6355,8 @@ S390:
Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set.
Returns -EINVAL if called on a protected VM.
+.. _KVM_SET_MEMORY_ATTRIBUTES:
+
4.141 KVM_SET_MEMORY_ATTRIBUTES
-------------------------------
@@ -6512,6 +6514,36 @@ the capability to be present.
`flags` must currently be zero.
+4.144 KVM_SET_MEMORY_ATTRIBUTES2
+---------------------------------
+
+:Capability: KVM_CAP_MEMORY_ATTRIBUTES2
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_memory_attributes2 (in/out)
+:Returns: 0 on success, <0 on error
+
+KVM_SET_MEMORY_ATTRIBUTES2 is an extension to
+KVM_SET_MEMORY_ATTRIBUTES that supports returning (writing) values to
+userspace. The original (pre-extension) fields are shared with
+KVM_SET_MEMORY_ATTRIBUTES identically.
+
+Attribute values are shared with KVM_SET_MEMORY_ATTRIBUTES.
+
+::
+
+ struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+ };
+
+ #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
+
+See also: :ref: `KVM_SET_MEMORY_ATTRIBUTES`.
+
.. _kvm_run:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52f6000ab0208..c300e38c7c9cd 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -963,6 +963,7 @@ struct kvm_enable_cap {
#define KVM_CAP_RISCV_MP_STATE_RESET 242
#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
#define KVM_CAP_GUEST_MEMFD_FLAGS 244
+#define KVM_CAP_MEMORY_ATTRIBUTES2 245
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1617,4 +1618,15 @@ struct kvm_pre_fault_memory {
__u64 padding[5];
};
+/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
+#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd6, struct kvm_memory_attributes2)
+
+struct kvm_memory_attributes2 {
+ __u64 address;
+ __u64 size;
+ __u64 attributes;
+ __u64 flags;
+ __u64 reserved[4];
+};
+
#endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 35166754a22b4..d083011744eba 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2621,7 +2621,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
return r;
}
static int kvm_vm_ioctl_set_mem_attributes(struct kvm *kvm,
- struct kvm_memory_attributes *attrs)
+ struct kvm_memory_attributes2 *attrs)
{
gfn_t start, end;
@@ -4957,6 +4957,7 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_BINARY_STATS_FD:
case KVM_CAP_SYSTEM_EVENT_DATA:
case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_MEMORY_ATTRIBUTES2:
return 1;
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
case KVM_CAP_MEMORY_ATTRIBUTES:
@@ -5184,6 +5185,14 @@ do { \
sizeof_field(struct kvm_userspace_memory_region2, field)); \
} while (0)
+#define SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(field) \
+do { \
+ BUILD_BUG_ON(offsetof(struct kvm_memory_attributes, field) != \
+ offsetof(struct kvm_memory_attributes2, field)); \
+ BUILD_BUG_ON(sizeof_field(struct kvm_memory_attributes, field) != \
+ sizeof_field(struct kvm_memory_attributes2, field)); \
+} while (0)
+
static long kvm_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5366,15 +5375,34 @@ static long kvm_vm_ioctl(struct file *filp,
}
#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
+ case KVM_SET_MEMORY_ATTRIBUTES2:
case KVM_SET_MEMORY_ATTRIBUTES: {
- struct kvm_memory_attributes attrs;
+ struct kvm_memory_attributes2 attrs;
+ unsigned long size;
+
+ if (ioctl == KVM_SET_MEMORY_ATTRIBUTES) {
+ /*
+ * Fields beyond struct kvm_userspace_memory_region shouldn't be
+ * accessed, but avoid leaking kernel memory in case of a bug.
+ */
+ memset(&attrs, 0, sizeof(attrs));
+ size = sizeof(struct kvm_memory_attributes);
+ } else {
+ size = sizeof(struct kvm_memory_attributes2);
+ }
+
+ /* Ensure the common parts of the two structs are identical. */
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(address);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(size);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(attributes);
+ SANITY_CHECK_MEMORY_ATTRIBUTES_FIELD(flags);
r = -ENOTTY;
if (!vm_memory_attributes)
goto out;
r = -EFAULT;
- if (copy_from_user(&attrs, argp, sizeof(attrs)))
+ if (copy_from_user(&attrs, argp, size))
goto out;
r = kvm_vm_ioctl_set_mem_attributes(kvm, &attrs);
--
2.51.1.838.g19442a804e-goog
On Wed, Oct 22, 2025, Ackerley Tng wrote:
> Ackerley Tng <ackerleytng@google.com> writes:
>
> Found another issue with KVM_CAP_MEMORY_ATTRIBUTES2.
>
> KVM_CAP_MEMORY_ATTRIBUTES2 was defined to do the same thing as
> KVM_CAP_MEMORY_ATTRIBUTES, but that's wrong since
> KVM_CAP_MEMORY_ATTRIBUTES2 should indicate the presence of
> KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2.
No? If no attributes are supported, whether or not KVM_SET_MEMORY_ATTRIBUTES2
exists is largely irrelevant. We can even provide the same -ENOTTY errno by
checking that _any_ attributes are supported, i.e. so that doing
KVM_SET_MEMORY_ATTRIBUTES2 on KVM without any support whatsoever fails in the
same way that KVM with code support but no attributes fails.
In other words, I don't see why it can't do both. Even if we can't massage the
right errno, I would much rather KVM_SET_MEMORY_ATTRIBUTES2 enumerate the set of
supported attributes than simply '1'. E.g. we have no plans to support
KVM_SET_MEMORY_ATTRIBUTES on guest_memfd, and so returning simply '1' creates an
unwanted and unnecessary dependency.
> @@ -1617,4 +1618,15 @@ struct kvm_pre_fault_memory {
> __u64 padding[5];
> };
>
> +/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
> +#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd6, struct kvm_memory_attributes2)
Please use the same literal number, 0xd2, as
#define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
The "final" ioctl number that userspace sees incorporates the directionality and
the size of the struct, i.e. KVM_SET_MEMORY_ATTRIBUTES and KVM_SET_MEMORY_ATTRIBUTES2
are guaranteed to be distinct even if they both use 0xd2 as the "minor" number.
> +
> +struct kvm_memory_attributes2 {
> + __u64 address;
> + __u64 size;
> + __u64 attributes;
> + __u64 flags;
> + __u64 reserved[4];
Maybe be paranoid and reserve 12 u64s?
Sean Christopherson <seanjc@google.com> writes:
> On Wed, Oct 22, 2025, Ackerley Tng wrote:
>> Ackerley Tng <ackerleytng@google.com> writes:
>>
>> Found another issue with KVM_CAP_MEMORY_ATTRIBUTES2.
>>
>> KVM_CAP_MEMORY_ATTRIBUTES2 was defined to do the same thing as
>> KVM_CAP_MEMORY_ATTRIBUTES, but that's wrong since
>> KVM_CAP_MEMORY_ATTRIBUTES2 should indicate the presence of
>> KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2.
>
> No? If no attributes are supported, whether or not KVM_SET_MEMORY_ATTRIBUTES2
> exists is largely irrelevant.
That's true.
> We can even provide the same -ENOTTY errno by
> checking that _any_ attributes are supported, i.e. so that doing
> KVM_SET_MEMORY_ATTRIBUTES2 on KVM without any support whatsoever fails in the
> same way that KVM with code support but no attributes fails.
>
IIUC KVM_SET_MEMORY_ATTRIBUTES doesn't fail with -ENOTTY now when there
are no valid attributes.
Even if there's no valid attributes (as in
kvm_supported_mem_attributes() returns 0), it's possible to call
KVM_SET_MEMORY_ATTRIBUTES with .attributes set to 0, which will be a
no-op, but will return 0.
I think this is kind of correct behavior since .attributes = 0 is
actually a valid expression for "I want this range to be shared", and
for a VM that doesn't support private memory, it's a valid expression.
The other way that there are "no attributes" would be if there are no
/VM/ attributes, in which case KVM_SET_MEMORY_ATTRIBUTES, sent to as a
vm ioctl, will return -ENOTTY.
> In other words, I don't see why it can't do both. Even if we can't massage the
> right errno, I would much rather KVM_SET_MEMORY_ATTRIBUTES2 enumerate the set of
Did you mean KVM_CAP_MEMORY_ATTRIBUTES2 in the line above?
> supported attributes than simply '1'. E.g. we have no plans to support
> KVM_SET_MEMORY_ATTRIBUTES on guest_memfd, and so returning simply '1' creates an
> unwanted and unnecessary dependency.
>
Okay I'll switch this back to what it was.
>> @@ -1617,4 +1618,15 @@ struct kvm_pre_fault_memory {
>> __u64 padding[5];
>> };
>>
>> +/* Available with KVM_CAP_MEMORY_ATTRIBUTES2 */
>> +#define KVM_SET_MEMORY_ATTRIBUTES2 _IOWR(KVMIO, 0xd6, struct kvm_memory_attributes2)
>
> Please use the same literal number, 0xd2, as
>
> #define KVM_SET_MEMORY_ATTRIBUTES _IOW(KVMIO, 0xd2, struct kvm_memory_attributes)
>
> The "final" ioctl number that userspace sees incorporates the directionality and
> the size of the struct, i.e. KVM_SET_MEMORY_ATTRIBUTES and KVM_SET_MEMORY_ATTRIBUTES2
> are guaranteed to be distinct even if they both use 0xd2 as the "minor" number.
>
Will do.
>> +
>> +struct kvm_memory_attributes2 {
>> + __u64 address;
>> + __u64 size;
>> + __u64 attributes;
>> + __u64 flags;
>> + __u64 reserved[4];
>
> Maybe be paranoid and reserve 12 u64s?
Will do.
On Thu, Oct 23, 2025, Ackerley Tng wrote: > Sean Christopherson <seanjc@google.com> writes: > > > On Wed, Oct 22, 2025, Ackerley Tng wrote: > >> Ackerley Tng <ackerleytng@google.com> writes: > >> > >> Found another issue with KVM_CAP_MEMORY_ATTRIBUTES2. > >> > >> KVM_CAP_MEMORY_ATTRIBUTES2 was defined to do the same thing as > >> KVM_CAP_MEMORY_ATTRIBUTES, but that's wrong since > >> KVM_CAP_MEMORY_ATTRIBUTES2 should indicate the presence of > >> KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2. > > > > No? If no attributes are supported, whether or not KVM_SET_MEMORY_ATTRIBUTES2 > > exists is largely irrelevant. > > That's true. > > > We can even provide the same -ENOTTY errno by > > checking that _any_ attributes are supported, i.e. so that doing > > KVM_SET_MEMORY_ATTRIBUTES2 on KVM without any support whatsoever fails in the > > same way that KVM with code support but no attributes fails. > > IIUC KVM_SET_MEMORY_ATTRIBUTES doesn't fail with -ENOTTY now when there > are no valid attributes. > > Even if there's no valid attributes (as in > kvm_supported_mem_attributes() returns 0), it's possible to call > KVM_SET_MEMORY_ATTRIBUTES with .attributes set to 0, which will be a > no-op, but will return 0. > > I think this is kind of correct behavior since .attributes = 0 is > actually a valid expression for "I want this range to be shared", and > for a VM that doesn't support private memory, it's a valid expression. > > > The other way that there are "no attributes" would be if there are no > /VM/ attributes, in which case KVM_SET_MEMORY_ATTRIBUTES, sent to as a > vm ioctl, will return -ENOTTY. Ya, this is what I was trying to say with "_any_ attributes are supported". I.e. by "any" I meant "any attributes in KVM for VMs vs. gmems", not "any attributes for this specific VM/gmem instance". > > In other words, I don't see why it can't do both. Even if we can't massage the > > right errno, I would much rather KVM_SET_MEMORY_ATTRIBUTES2 enumerate the set of > > Did you mean KVM_CAP_MEMORY_ATTRIBUTES2 in the line above? Doh, yes.
Sean Christopherson <seanjc@google.com> writes:
> On Thu, Oct 23, 2025, Ackerley Tng wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>>
>> > On Wed, Oct 22, 2025, Ackerley Tng wrote:
>> >> Ackerley Tng <ackerleytng@google.com> writes:
>> >>
>> >> Found another issue with KVM_CAP_MEMORY_ATTRIBUTES2.
>> >>
>> >> KVM_CAP_MEMORY_ATTRIBUTES2 was defined to do the same thing as
>> >> KVM_CAP_MEMORY_ATTRIBUTES, but that's wrong since
>> >> KVM_CAP_MEMORY_ATTRIBUTES2 should indicate the presence of
>> >> KVM_SET_MEMORY_ATTRIBUTES2 and struct kvm_memory_attributes2.
>> >
>> > No? If no attributes are supported, whether or not KVM_SET_MEMORY_ATTRIBUTES2
>> > exists is largely irrelevant.
>>
>> That's true.
>>
>> > We can even provide the same -ENOTTY errno by
>> > checking that _any_ attributes are supported, i.e. so that doing
>> > KVM_SET_MEMORY_ATTRIBUTES2 on KVM without any support whatsoever fails in the
>> > same way that KVM with code support but no attributes fails.
>>
>> IIUC KVM_SET_MEMORY_ATTRIBUTES doesn't fail with -ENOTTY now when there
>> are no valid attributes.
>>
>> Even if there's no valid attributes (as in
>> kvm_supported_mem_attributes() returns 0), it's possible to call
>> KVM_SET_MEMORY_ATTRIBUTES with .attributes set to 0, which will be a
>> no-op, but will return 0.
>>
>> I think this is kind of correct behavior since .attributes = 0 is
>> actually a valid expression for "I want this range to be shared", and
>> for a VM that doesn't support private memory, it's a valid expression.
>>
>>
>> The other way that there are "no attributes" would be if there are no
>> /VM/ attributes, in which case KVM_SET_MEMORY_ATTRIBUTES, sent to as a
>> vm ioctl, will return -ENOTTY.
>
> Ya, this is what I was trying to say with "_any_ attributes are supported". I.e.
> by "any" I meant "any attributes in KVM for VMs vs. gmems", not "any attributes
> for this specific VM/gmem instance".
>
>>
>> [...snip...]
>>
I've been thinking more about this:
#ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
case KVM_CAP_MEMORY_ATTRIBUTES2:
case KVM_CAP_MEMORY_ATTRIBUTES:
if (!vm_memory_attributes)
return 0;
return kvm_supported_mem_attributes(kvm);
#endif
And the purpose of adding KVM_CAP_MEMORY_ATTRIBUTES2 is that
KVM_CAP_MEMORY_ATTRIBUTES2 tells userspace that
KVM_SET_MEMORY_ATTRIBUTES2 is available iff there are valid
attributes.
(So there's still a purpose)
Without valid attributes, userspace can't tell if it should use
KVM_SET_MEMORY_ATTRIBUTES or the 2 version.
I also added KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES, which tells
userspace the valid attributes when calling KVM_SET_MEMORY_ATTRIBUTES2
on a guest_memfd:
#ifdef CONFIG_KVM_GUEST_MEMFD
case KVM_CAP_GUEST_MEMFD:
return 1;
case KVM_CAP_GUEST_MEMFD_FLAGS:
return kvm_gmem_get_supported_flags(kvm);
case KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES:
if (vm_memory_attributes)
return 0;
return kvm_supported_mem_attributes(kvm);
#endif
So to set memory attributes, userspace should
if (kvm_check_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES) > 0)
use KVM_SET_MEMORY_ATTRIBUTES2 with guest_memfd
else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES2) > 0)
use KVM_SET_MEMORY_ATTRIBUTES2 with VM fd
else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) > 0)
use KVM_SET_MEMORY_ATTRIBUTES with VM fd
else
can't set memory attributes
Something like that?
In selftests there's this, when KVM_SET_USER_MEMORY_REGION2 was
introduced:
#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
__TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
"KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
But looks like there's no direct equivalent for the introduction of
KVM_SET_MEMORY_ATTRIBUTES2?
The closest would be to add a TEST_REQUIRE_VALID_ATTRIBUTES() which
checks KVM_CAP_MEMORY_ATTRIBUTES2 or
KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES before making the vm or
guest_memfd ioctl respsectively.
On Fri, Oct 24, 2025, Ackerley Tng wrote:
> Sean Christopherson <seanjc@google.com> writes:
> >>
> >> [...snip...]
> >>
>
> I've been thinking more about this:
>
> #ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
> case KVM_CAP_MEMORY_ATTRIBUTES2:
> case KVM_CAP_MEMORY_ATTRIBUTES:
> if (!vm_memory_attributes)
> return 0;
>
> return kvm_supported_mem_attributes(kvm);
> #endif
>
> And the purpose of adding KVM_CAP_MEMORY_ATTRIBUTES2 is that
> KVM_CAP_MEMORY_ATTRIBUTES2 tells userspace that
> KVM_SET_MEMORY_ATTRIBUTES2 is available iff there are valid
> attributes.
>
> (So there's still a purpose)
>
> Without valid attributes, userspace can't tell if it should use
> KVM_SET_MEMORY_ATTRIBUTES or the 2 version.
To do what? If there are no attributes, userspace can't do anything useful anyways.
> I also added KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES, which tells
> userspace the valid attributes when calling KVM_SET_MEMORY_ATTRIBUTES2
> on a guest_memfd:
Ya, and that KVM_SET_MEMORY_ATTRIBUTES2 is supported on guest_memfd.
> #ifdef CONFIG_KVM_GUEST_MEMFD
> case KVM_CAP_GUEST_MEMFD:
> return 1;
> case KVM_CAP_GUEST_MEMFD_FLAGS:
> return kvm_gmem_get_supported_flags(kvm);
> case KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES:
> if (vm_memory_attributes)
> return 0;
>
> return kvm_supported_mem_attributes(kvm);
> #endif
>
> So to set memory attributes, userspace should
Userspace *can*. User could also decide it only wants to support guest_memfd
attributes, e.g. because the platform admins controls the entire stack and built
their entire operation around in-place conversion.
> if (kvm_check_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES) > 0)
> use KVM_SET_MEMORY_ATTRIBUTES2 with guest_memfd
> else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES2) > 0)
> use KVM_SET_MEMORY_ATTRIBUTES2 with VM fd
> else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) > 0)
> use KVM_SET_MEMORY_ATTRIBUTES with VM fd
> else
> can't set memory attributes
>
> Something like that?
More or else, ya.
> In selftests there's this, when KVM_SET_USER_MEMORY_REGION2 was
> introduced:
>
> #define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
> __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
> "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
>
> But looks like there's no direct equivalent for the introduction of
> KVM_SET_MEMORY_ATTRIBUTES2?
KVM_CAP_USER_MEMORY2 is the equivalent.
There's was no need to enumerate anything beyond yes/no, because
SET_USER_MEMORY_REGION2 didn't introduce new flags, it expanded the size of the
structure passed in from userspace so that KVM_CAP_GUEST_MEMFD could be introduced
without breaking backwards compatibility.
> The closest would be to add a TEST_REQUIRE_VALID_ATTRIBUTES() which
> checks KVM_CAP_MEMORY_ATTRIBUTES2 or
> KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES before making the vm or
> guest_memfd ioctl respsectively.
Yes. This is what I did in my (never posted, but functional) version:
@@ -486,6 +488,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
}
guest_rng = new_guest_random_state(guest_random_seed);
sync_global_to_guest(vm, guest_rng);
+ sync_global_to_guest(vm, kvm_has_gmem_attributes);
kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
@@ -2319,6 +2333,8 @@ void __attribute((constructor)) kvm_selftest_init(void)
guest_random_seed = last_guest_seed = random();
pr_info("Random seed: 0x%x\n", guest_random_seed);
+ kvm_has_gmem_attributes = kvm_has_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES);
+
kvm_selftest_arch_init();
}
That way the core library code can pivot on gmem vs. VM attributes without having
to rely on tests to define anything. E.g.
static inline void vm_mem_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
uint64_t size, uint64_t attrs)
{
if (kvm_has_gmem_attributes) {
off_t fd_offset;
uint64_t len;
int fd;
fd = kvm_gpa_to_guest_memfd(vm, gpa, &fd_offset, &len);
TEST_ASSERT(len >= size, "Setting attributes beyond the length of a guest_memfd");
gmem_set_memory_attributes(fd, fd_offset, size, attrs);
} else {
vm_set_memory_attributes(vm, gpa, size, attrs);
}
}
Sean Christopherson <seanjc@google.com> writes:
> On Fri, Oct 24, 2025, Ackerley Tng wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> >>
>> >> [...snip...]
>> >>
>>
>> I've been thinking more about this:
>>
>> #ifdef CONFIG_KVM_VM_MEMORY_ATTRIBUTES
>> case KVM_CAP_MEMORY_ATTRIBUTES2:
>> case KVM_CAP_MEMORY_ATTRIBUTES:
>> if (!vm_memory_attributes)
>> return 0;
>>
>> return kvm_supported_mem_attributes(kvm);
>> #endif
>>
>> And the purpose of adding KVM_CAP_MEMORY_ATTRIBUTES2 is that
>> KVM_CAP_MEMORY_ATTRIBUTES2 tells userspace that
>> KVM_SET_MEMORY_ATTRIBUTES2 is available iff there are valid
>> attributes.
>>
>> (So there's still a purpose)
>>
>> Without valid attributes, userspace can't tell if it should use
>> KVM_SET_MEMORY_ATTRIBUTES or the 2 version.
>
> To do what? If there are no attributes, userspace can't do anything useful anyways.
>
>> I also added KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES, which tells
>> userspace the valid attributes when calling KVM_SET_MEMORY_ATTRIBUTES2
>> on a guest_memfd:
>
> Ya, and that KVM_SET_MEMORY_ATTRIBUTES2 is supported on guest_memfd.
>
>> #ifdef CONFIG_KVM_GUEST_MEMFD
>> case KVM_CAP_GUEST_MEMFD:
>> return 1;
>> case KVM_CAP_GUEST_MEMFD_FLAGS:
>> return kvm_gmem_get_supported_flags(kvm);
>> case KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES:
>> if (vm_memory_attributes)
>> return 0;
>>
>> return kvm_supported_mem_attributes(kvm);
>> #endif
>>
>> So to set memory attributes, userspace should
>
> Userspace *can*. User could also decide it only wants to support guest_memfd
> attributes, e.g. because the platform admins controls the entire stack and built
> their entire operation around in-place conversion.
>
>> if (kvm_check_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES) > 0)
>> use KVM_SET_MEMORY_ATTRIBUTES2 with guest_memfd
>> else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES2) > 0)
>> use KVM_SET_MEMORY_ATTRIBUTES2 with VM fd
>> else if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) > 0)
>> use KVM_SET_MEMORY_ATTRIBUTES with VM fd
>> else
>> can't set memory attributes
>>
>> Something like that?
>
> More or else, ya.
>
>> In selftests there's this, when KVM_SET_USER_MEMORY_REGION2 was
>> introduced:
>>
>> #define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
>> __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
>> "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
>>
>> But looks like there's no direct equivalent for the introduction of
>> KVM_SET_MEMORY_ATTRIBUTES2?
>
> KVM_CAP_USER_MEMORY2 is the equivalent.
>
> There's was no need to enumerate anything beyond yes/no, because
> SET_USER_MEMORY_REGION2 didn't introduce new flags, it expanded the size of the
> structure passed in from userspace so that KVM_CAP_GUEST_MEMFD could be introduced
> without breaking backwards compatibility.
>
>> The closest would be to add a TEST_REQUIRE_VALID_ATTRIBUTES() which
>> checks KVM_CAP_MEMORY_ATTRIBUTES2 or
>> KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES before making the vm or
>> guest_memfd ioctl respsectively.
>
> Yes. This is what I did in my (never posted, but functional) version:
>
> @@ -486,6 +488,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
> }
> guest_rng = new_guest_random_state(guest_random_seed);
> sync_global_to_guest(vm, guest_rng);
> + sync_global_to_guest(vm, kvm_has_gmem_attributes);
I ported this [1] except for syncing this value to the guest, because I
think the guest shouldn't need to know this information, the host should
decide what to do. I think, if the guests really need to know this, the
test itself can do the syncing.
[1] https://lore.kernel.org/all/5656d432df1217c08da0cc2694fd79948bfd686f.1760731772.git.ackerleytng@google.com/
>
> kvm_arch_vm_post_create(vm, nr_runnable_vcpus);
>
> @@ -2319,6 +2333,8 @@ void __attribute((constructor)) kvm_selftest_init(void)
> guest_random_seed = last_guest_seed = random();
> pr_info("Random seed: 0x%x\n", guest_random_seed);
>
> + kvm_has_gmem_attributes = kvm_has_cap(KVM_CAP_GUEST_MEMFD_MEMORY_ATTRIBUTES);
> +
> kvm_selftest_arch_init();
> }
>
> That way the core library code can pivot on gmem vs. VM attributes without having
> to rely on tests to define anything. E.g.
>
> static inline void vm_mem_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
> uint64_t size, uint64_t attrs)
> {
> if (kvm_has_gmem_attributes) {
> off_t fd_offset;
> uint64_t len;
> int fd;
>
> fd = kvm_gpa_to_guest_memfd(vm, gpa, &fd_offset, &len);
> TEST_ASSERT(len >= size, "Setting attributes beyond the length of a guest_memfd");
> gmem_set_memory_attributes(fd, fd_offset, size, attrs);
> } else {
> vm_set_memory_attributes(vm, gpa, size, attrs);
> }
> }
On Fri, Oct 24, 2025, Ackerley Tng wrote:
> Sean Christopherson <seanjc@google.com> writes:
> > @@ -486,6 +488,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
> > }
> > guest_rng = new_guest_random_state(guest_random_seed);
> > sync_global_to_guest(vm, guest_rng);
> > + sync_global_to_guest(vm, kvm_has_gmem_attributes);
>
> I ported this [1] except for syncing this value to the guest, because I
> think the guest shouldn't need to know this information,
KVM selftests are about practically and testing, what information should or
shouldn't be available to a test from e.g. a safety perspective is completely
irrelevant. In fact, one of the biggest advantages of selftests over KUT is
that the guest side can know _exactly_ what's going on in the host.
See the usage in 1850e3da4b03 ("KVM: selftests: Update private_mem_conversions_test
to mmap() guest_memfd") from:
https://github.com/sean-jc/linux.git x86/gmem_inplace
> the host should decide what to do. I think, if the guests really need to know
> this, the test itself can do the syncing.
Why force tests to do extra work, and potentially introduce subtle bugs due to
state being stale?
Sean Christopherson <seanjc@google.com> writes:
> On Fri, Oct 24, 2025, Ackerley Tng wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> > @@ -486,6 +488,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
>> > }
>> > guest_rng = new_guest_random_state(guest_random_seed);
>> > sync_global_to_guest(vm, guest_rng);
>> > + sync_global_to_guest(vm, kvm_has_gmem_attributes);
>>
>> I ported this [1] except for syncing this value to the guest, because I
>> think the guest shouldn't need to know this information,
>
> KVM selftests are about practically and testing, what information should or
> shouldn't be available to a test from e.g. a safety perspective is completely
> irrelevant. In fact, one of the biggest advantages of selftests over KUT is
> that the guest side can know _exactly_ what's going on in the host.
>
> See the usage in 1850e3da4b03 ("KVM: selftests: Update private_mem_conversions_test
> to mmap() guest_memfd") from:
>
> https://github.com/sean-jc/linux.git x86/gmem_inplace
>
>> the host should decide what to do. I think, if the guests really need to know
>> this, the test itself can do the syncing.
>
> Why force tests to do extra work, and potentially introduce subtle bugs due to
> state being stale?
Adding it back. Thanks!
This variable should be sync-able for TDX selftests as well since the
value should be synced before the TD image is loaded.
© 2016 - 2026 Red Hat, Inc.