[PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion

Xiaoyao Li posted 70 patches 1 year ago
Only 68 patches received!
There is a newer version of this series
[PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion
Posted by Xiaoyao Li 1 year ago
It's used for discarding opposite memory after memory conversion, for
confidential guest.

When page is converted from shared to private, the original shared
memory can be discarded via ram_block_discard_range();

When page is converted from private to shared, the original private
memory is back'ed by guest_memfd. Introduce
ram_block_discard_guest_memfd_range() for discarding memory in
guest_memfd.

Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
---
 include/exec/cpu-common.h |  2 ++
 system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 41115d891940..de728a18eef2 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private);
 
 #endif
 
diff --git a/system/physmem.c b/system/physmem.c
index ddfecddefcd6..cd6008fa09ad 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -3641,6 +3641,29 @@ err:
     return ret;
 }
 
+static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
+                                               size_t length)
+{
+    int ret = -1;
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+                    start, length);
+
+    if (ret) {
+        ret = -errno;
+        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
+                     __func__, rb->idstr, start, length, ret);
+    }
+#else
+    ret = -ENOSYS;
+    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
+                 __func__, rb->idstr, start, length, ret);
+#endif
+
+    return ret;
+}
+
 bool ramblock_is_pmem(RAMBlock *rb)
 {
     return rb->flags & RAM_PMEM;
@@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
     return qatomic_read(&ram_block_discard_required_cnt) ||
            qatomic_read(&ram_block_coordinated_discard_required_cnt);
 }
+
+int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
+                            bool shared_to_private)
+{
+    if (!rb || rb->guest_memfd < 0) {
+        return -1;
+    }
+
+    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
+        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
+        return -1;
+    }
+
+    if (!length) {
+        return -1;
+    }
+
+    if (start + length > rb->max_length) {
+        return -1;
+    }
+
+    if (shared_to_private) {
+        return ram_block_discard_range(rb, start, length);
+    } else {
+        return ram_block_discard_guest_memfd_range(rb, start, length);
+    }
+}
-- 
2.34.1
Re: [PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion
Posted by Isaku Yamahata 1 year ago
On Wed, Nov 15, 2023 at 02:14:18AM -0500,
Xiaoyao Li <xiaoyao.li@intel.com> wrote:

> It's used for discarding opposite memory after memory conversion, for
> confidential guest.
> 
> When page is converted from shared to private, the original shared
> memory can be discarded via ram_block_discard_range();
> 
> When page is converted from private to shared, the original private
> memory is back'ed by guest_memfd. Introduce
> ram_block_discard_guest_memfd_range() for discarding memory in
> guest_memfd.
> 
> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
>  include/exec/cpu-common.h |  2 ++
>  system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 52 insertions(+)
> 
> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> index 41115d891940..de728a18eef2 100644
> --- a/include/exec/cpu-common.h
> +++ b/include/exec/cpu-common.h
> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>  
>  int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>  int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
> +                            bool shared_to_private);
>  
>  #endif
>  
> diff --git a/system/physmem.c b/system/physmem.c
> index ddfecddefcd6..cd6008fa09ad 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -3641,6 +3641,29 @@ err:
>      return ret;
>  }
>  
> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
> +                                               size_t length)
> +{
> +    int ret = -1;
> +
> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
> +                    start, length);
> +
> +    if (ret) {
> +        ret = -errno;
> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
> +                     __func__, rb->idstr, start, length, ret);
> +    }
> +#else
> +    ret = -ENOSYS;
> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
> +                 __func__, rb->idstr, start, length, ret);
> +#endif
> +
> +    return ret;
> +}
> +
>  bool ramblock_is_pmem(RAMBlock *rb)
>  {
>      return rb->flags & RAM_PMEM;
> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>      return qatomic_read(&ram_block_discard_required_cnt) ||
>             qatomic_read(&ram_block_coordinated_discard_required_cnt);
>  }
> +
> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
> +                            bool shared_to_private)
> +{
> +    if (!rb || rb->guest_memfd < 0) {
> +        return -1;
> +    }
> +
> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
> +        return -1;
> +    }
> +
> +    if (!length) {
> +        return -1;
> +    }
> +
> +    if (start + length > rb->max_length) {
> +        return -1;
> +    }
> +
> +    if (shared_to_private) {
> +        return ram_block_discard_range(rb, start, length);
> +    } else {
> +        return ram_block_discard_guest_memfd_range(rb, start, length);
> +    }
> +}

Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
mad sense. But now it doesn't, and it issues only punch hole. We should rename
it to represent what it actually does. discard_range?
-- 
Isaku Yamahata <isaku.yamahata@linux.intel.com>
Re: [PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion
Posted by Xiaoyao Li 11 months, 3 weeks ago
On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
> 
>> It's used for discarding opposite memory after memory conversion, for
>> confidential guest.
>>
>> When page is converted from shared to private, the original shared
>> memory can be discarded via ram_block_discard_range();
>>
>> When page is converted from private to shared, the original private
>> memory is back'ed by guest_memfd. Introduce
>> ram_block_discard_guest_memfd_range() for discarding memory in
>> guest_memfd.
>>
>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> ---
>>   include/exec/cpu-common.h |  2 ++
>>   system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 52 insertions(+)
>>
>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>> index 41115d891940..de728a18eef2 100644
>> --- a/include/exec/cpu-common.h
>> +++ b/include/exec/cpu-common.h
>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>>   
>>   int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>   int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>> +                            bool shared_to_private);
>>   
>>   #endif
>>   
>> diff --git a/system/physmem.c b/system/physmem.c
>> index ddfecddefcd6..cd6008fa09ad 100644
>> --- a/system/physmem.c
>> +++ b/system/physmem.c
>> @@ -3641,6 +3641,29 @@ err:
>>       return ret;
>>   }
>>   
>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
>> +                                               size_t length)
>> +{
>> +    int ret = -1;
>> +
>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>> +                    start, length);
>> +
>> +    if (ret) {
>> +        ret = -errno;
>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
>> +                     __func__, rb->idstr, start, length, ret);
>> +    }
>> +#else
>> +    ret = -ENOSYS;
>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
>> +                 __func__, rb->idstr, start, length, ret);
>> +#endif
>> +
>> +    return ret;
>> +}
>> +
>>   bool ramblock_is_pmem(RAMBlock *rb)
>>   {
>>       return rb->flags & RAM_PMEM;
>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>       return qatomic_read(&ram_block_discard_required_cnt) ||
>>              qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>   }
>> +
>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>> +                            bool shared_to_private)
>> +{
>> +    if (!rb || rb->guest_memfd < 0) {
>> +        return -1;
>> +    }
>> +
>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>> +        return -1;
>> +    }
>> +
>> +    if (!length) {
>> +        return -1;
>> +    }
>> +
>> +    if (start + length > rb->max_length) {
>> +        return -1;
>> +    }
>> +
>> +    if (shared_to_private) {
>> +        return ram_block_discard_range(rb, start, length);
>> +    } else {
>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>> +    }
>> +}
> 
> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
> mad sense. But now it doesn't, and it issues only punch hole. We should rename
> it to represent what it actually does. discard_range?

ram_block_discard_range() already exists for non-guest-memfd memory discard.

I cannot come up with a proper name. e.g., 
ram_block_discard_opposite_range() while *opposite* seems unclear.

Do you have any better idea?
Re: [PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion
Posted by David Hildenbrand 11 months, 3 weeks ago
On 08.12.23 08:59, Xiaoyao Li wrote:
> On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
>> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
>> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
>>
>>> It's used for discarding opposite memory after memory conversion, for
>>> confidential guest.
>>>
>>> When page is converted from shared to private, the original shared
>>> memory can be discarded via ram_block_discard_range();
>>>
>>> When page is converted from private to shared, the original private
>>> memory is back'ed by guest_memfd. Introduce
>>> ram_block_discard_guest_memfd_range() for discarding memory in
>>> guest_memfd.
>>>
>>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>> ---
>>>    include/exec/cpu-common.h |  2 ++
>>>    system/physmem.c          | 50 +++++++++++++++++++++++++++++++++++++++
>>>    2 files changed, 52 insertions(+)
>>>
>>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>>> index 41115d891940..de728a18eef2 100644
>>> --- a/include/exec/cpu-common.h
>>> +++ b/include/exec/cpu-common.h
>>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
>>>    
>>>    int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>>    int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>>> +                            bool shared_to_private);
>>>    
>>>    #endif
>>>    
>>> diff --git a/system/physmem.c b/system/physmem.c
>>> index ddfecddefcd6..cd6008fa09ad 100644
>>> --- a/system/physmem.c
>>> +++ b/system/physmem.c
>>> @@ -3641,6 +3641,29 @@ err:
>>>        return ret;
>>>    }
>>>    
>>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, uint64_t start,
>>> +                                               size_t length)
>>> +{
>>> +    int ret = -1;
>>> +
>>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
>>> +                    start, length);
>>> +
>>> +    if (ret) {
>>> +        ret = -errno;
>>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)",
>>> +                     __func__, rb->idstr, start, length, ret);
>>> +    }
>>> +#else
>>> +    ret = -ENOSYS;
>>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx (%d)",
>>> +                 __func__, rb->idstr, start, length, ret);
>>> +#endif
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>    bool ramblock_is_pmem(RAMBlock *rb)
>>>    {
>>>        return rb->flags & RAM_PMEM;
>>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>>        return qatomic_read(&ram_block_discard_required_cnt) ||
>>>               qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>>    }
>>> +
>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t length,
>>> +                            bool shared_to_private)
>>> +{
>>> +    if (!rb || rb->guest_memfd < 0) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (!length) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (start + length > rb->max_length) {
>>> +        return -1;
>>> +    }
>>> +
>>> +    if (shared_to_private) {
>>> +        return ram_block_discard_range(rb, start, length);
>>> +    } else {
>>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>>> +    }
>>> +}
>>
>> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the function name
>> mad sense. But now it doesn't, and it issues only punch hole. We should rename
>> it to represent what it actually does. discard_range?
> 
> ram_block_discard_range() already exists for non-guest-memfd memory discard.
> 
> I cannot come up with a proper name. e.g.,
> ram_block_discard_opposite_range() while *opposite* seems unclear.
> 
> Do you have any better idea?

Having some indication that this is about "guest_memfd" back and forth 
switching/conversion will make sense. But I'm also not able to come up 
with a better name.

Maybe have two functions:

ram_block_activate_guest_memfd_range
ram_block_deactivate_guest_memfd_range

-- 
Cheers,

David / dhildenb
Re: [PATCH v3 09/70] physmem: Introduce ram_block_convert_range() for page conversion
Posted by Xiaoyao Li 11 months, 1 week ago
On 12/8/2023 7:52 PM, David Hildenbrand wrote:
> On 08.12.23 08:59, Xiaoyao Li wrote:
>> On 11/18/2023 5:03 AM, Isaku Yamahata wrote:
>>> On Wed, Nov 15, 2023 at 02:14:18AM -0500,
>>> Xiaoyao Li <xiaoyao.li@intel.com> wrote:
>>>
>>>> It's used for discarding opposite memory after memory conversion, for
>>>> confidential guest.
>>>>
>>>> When page is converted from shared to private, the original shared
>>>> memory can be discarded via ram_block_discard_range();
>>>>
>>>> When page is converted from private to shared, the original private
>>>> memory is back'ed by guest_memfd. Introduce
>>>> ram_block_discard_guest_memfd_range() for discarding memory in
>>>> guest_memfd.
>>>>
>>>> Originally-from: Isaku Yamahata <isaku.yamahata@intel.com>
>>>> Codeveloped-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>>>> ---
>>>>    include/exec/cpu-common.h |  2 ++
>>>>    system/physmem.c          | 50 
>>>> +++++++++++++++++++++++++++++++++++++++
>>>>    2 files changed, 52 insertions(+)
>>>>
>>>> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
>>>> index 41115d891940..de728a18eef2 100644
>>>> --- a/include/exec/cpu-common.h
>>>> +++ b/include/exec/cpu-common.h
>>>> @@ -175,6 +175,8 @@ typedef int (RAMBlockIterFunc)(RAMBlock *rb, 
>>>> void *opaque);
>>>>    int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
>>>>    int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length);
>>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length,
>>>> +                            bool shared_to_private);
>>>>    #endif
>>>> diff --git a/system/physmem.c b/system/physmem.c
>>>> index ddfecddefcd6..cd6008fa09ad 100644
>>>> --- a/system/physmem.c
>>>> +++ b/system/physmem.c
>>>> @@ -3641,6 +3641,29 @@ err:
>>>>        return ret;
>>>>    }
>>>> +static int ram_block_discard_guest_memfd_range(RAMBlock *rb, 
>>>> uint64_t start,
>>>> +                                               size_t length)
>>>> +{
>>>> +    int ret = -1;
>>>> +
>>>> +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>>> +    ret = fallocate(rb->guest_memfd, FALLOC_FL_PUNCH_HOLE | 
>>>> FALLOC_FL_KEEP_SIZE,
>>>> +                    start, length);
>>>> +
>>>> +    if (ret) {
>>>> +        ret = -errno;
>>>> +        error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx 
>>>> (%d)",
>>>> +                     __func__, rb->idstr, start, length, ret);
>>>> +    }
>>>> +#else
>>>> +    ret = -ENOSYS;
>>>> +    error_report("%s: fallocate not available %s:%" PRIx64 " +%zx 
>>>> (%d)",
>>>> +                 __func__, rb->idstr, start, length, ret);
>>>> +#endif
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>>    bool ramblock_is_pmem(RAMBlock *rb)
>>>>    {
>>>>        return rb->flags & RAM_PMEM;
>>>> @@ -3828,3 +3851,30 @@ bool ram_block_discard_is_required(void)
>>>>        return qatomic_read(&ram_block_discard_required_cnt) ||
>>>>               
>>>> qatomic_read(&ram_block_coordinated_discard_required_cnt);
>>>>    }
>>>> +
>>>> +int ram_block_convert_range(RAMBlock *rb, uint64_t start, size_t 
>>>> length,
>>>> +                            bool shared_to_private)
>>>> +{
>>>> +    if (!rb || rb->guest_memfd < 0) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (!QEMU_PTR_IS_ALIGNED(start, qemu_host_page_size) ||
>>>> +        !QEMU_PTR_IS_ALIGNED(length, qemu_host_page_size)) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (!length) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (start + length > rb->max_length) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (shared_to_private) {
>>>> +        return ram_block_discard_range(rb, start, length);
>>>> +    } else {
>>>> +        return ram_block_discard_guest_memfd_range(rb, start, length);
>>>> +    }
>>>> +}
>>>
>>> Originally this function issued KVM_SET_MEMORY_ATTRIBUTES, the 
>>> function name
>>> mad sense. But now it doesn't, and it issues only punch hole. We 
>>> should rename
>>> it to represent what it actually does. discard_range?
>>
>> ram_block_discard_range() already exists for non-guest-memfd memory 
>> discard.
>>
>> I cannot come up with a proper name. e.g.,
>> ram_block_discard_opposite_range() while *opposite* seems unclear.
>>
>> Do you have any better idea?
> 
> Having some indication that this is about "guest_memfd" back and forth 
> switching/conversion will make sense. But I'm also not able to come up 
> with a better name.
> 
> Maybe have two functions:
> 
> ram_block_activate_guest_memfd_range
> ram_block_deactivate_guest_memfd_range
> 

finally, I decide to drop this function and expose 
ram_block_discard_guest_memfd_range() instead. So caller can call the 
ram_block_discard_*() on its own.