This commit solves the issue with userfault_fd WP feature that
background snapshot is based on. For any never poluated or discarded
memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating
PTE for that page, thereby loosing WP setting for it.
So we need to pre-fault pages for each RAM block to be protected
before making a userfault_fd wr-protect ioctl().
Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
---
migration/migration.c | 6 ++++++
migration/ram.c | 48 +++++++++++++++++++++++++++++++++++++++++++
migration/ram.h | 1 +
3 files changed, 55 insertions(+)
diff --git a/migration/migration.c b/migration/migration.c
index be4729e7c8..71bce15a1b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -3827,6 +3827,12 @@ static void *bg_migration_thread(void *opaque)
update_iteration_initial_status(s);
+ /*
+ * Prepare for tracking memory writes with UFFD-WP - populate
+ * RAM pages before protecting.
+ */
+ ram_write_tracking_prepare();
+
qemu_savevm_state_header(s->to_dst_file);
qemu_savevm_state_setup(s->to_dst_file);
diff --git a/migration/ram.c b/migration/ram.c
index 40e78952ad..24c8627214 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1560,6 +1560,54 @@ out:
return ret;
}
+/*
+ * ram_block_populate_pages: populate memory in the RAM block by reading
+ * an integer from the beginning of each page.
+ *
+ * Since it's solely used for userfault_fd WP feature, here we just
+ * hardcode page size to qemu_real_host_page_size.
+ *
+ * @bs: RAM block to populate
+ */
+static void ram_block_populate_pages(RAMBlock *bs)
+{
+ char *ptr = (char *) bs->host;
+
+ for (ram_addr_t offset = 0; offset < bs->used_length;
+ offset += qemu_real_host_page_size) {
+ char tmp = *(ptr + offset);
+ /* Don't optimize the read out */
+ asm volatile("" : "+r" (tmp));
+ }
+}
+
+/*
+ * ram_write_tracking_prepare: prepare for UFFD-WP memory tracking
+ */
+void ram_write_tracking_prepare(void)
+{
+ RAMBlock *bs;
+
+ RCU_READ_LOCK_GUARD();
+
+ RAMBLOCK_FOREACH_NOT_IGNORED(bs) {
+ /* Nothing to do with read-only and MMIO-writable regions */
+ if (bs->mr->readonly || bs->mr->rom_device) {
+ continue;
+ }
+
+ /*
+ * Populate pages of the RAM block before enabling userfault_fd
+ * write protection.
+ *
+ * This stage is required since ioctl(UFFDIO_WRITEPROTECT) with
+ * UFFDIO_WRITEPROTECT_MODE_WP mode setting would silently skip
+ * pages with pte_none() entries in page table.
+ */
+ ram_block_populate_pages(bs);
+ }
+}
+
/*
* ram_write_tracking_start: start UFFD-WP memory tracking
*
diff --git a/migration/ram.h b/migration/ram.h
index 6378bb3ebc..4833e9fd5b 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -82,6 +82,7 @@ void colo_incoming_start_dirty_log(void);
/* Background snapshot */
bool ram_write_tracking_available(void);
bool ram_write_tracking_compatible(void);
+void ram_write_tracking_prepare(void);
int ram_write_tracking_start(void);
void ram_write_tracking_stop(void);
--
2.27.0
On 31.03.21 19:28, Andrey Gruzdev wrote:
> This commit solves the issue with userfault_fd WP feature that
> background snapshot is based on. For any never poluated or discarded
> memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating
> PTE for that page, thereby loosing WP setting for it.
>
> So we need to pre-fault pages for each RAM block to be protected
> before making a userfault_fd wr-protect ioctl().
>
> Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
> ---
> migration/migration.c | 6 ++++++
> migration/ram.c | 48 +++++++++++++++++++++++++++++++++++++++++++
> migration/ram.h | 1 +
> 3 files changed, 55 insertions(+)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index be4729e7c8..71bce15a1b 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3827,6 +3827,12 @@ static void *bg_migration_thread(void *opaque)
>
> update_iteration_initial_status(s);
>
> + /*
> + * Prepare for tracking memory writes with UFFD-WP - populate
> + * RAM pages before protecting.
> + */
> + ram_write_tracking_prepare();
> +
> qemu_savevm_state_header(s->to_dst_file);
> qemu_savevm_state_setup(s->to_dst_file);
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 40e78952ad..24c8627214 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1560,6 +1560,54 @@ out:
> return ret;
> }
>
> +/*
> + * ram_block_populate_pages: populate memory in the RAM block by reading
> + * an integer from the beginning of each page.
> + *
> + * Since it's solely used for userfault_fd WP feature, here we just
> + * hardcode page size to qemu_real_host_page_size.
> + *
> + * @bs: RAM block to populate
> + */
> +static void ram_block_populate_pages(RAMBlock *bs)
Usually we use "rb" or "block"; however migration/ram.c seems to do
things differently.
> +{
> + char *ptr = (char *) bs->host;
> +
> + for (ram_addr_t offset = 0; offset < bs->used_length;
> + offset += qemu_real_host_page_size) {
> + char tmp = *(ptr + offset);
^ missing empty line.
> + /* Don't optimize the read out */
> + asm volatile("" : "+r" (tmp));
> + }
Reviewed-by: David Hildenbrand <david@redhat.com>
and might want to add
Reported-by: David Hildenbrand <david@redhat.com>
(also to patch #2)
--
Thanks,
David / dhildenb
On 31.03.21 19:33, David Hildenbrand wrote:
> On 31.03.21 19:28, Andrey Gruzdev wrote:
>> This commit solves the issue with userfault_fd WP feature that
>> background snapshot is based on. For any never poluated or discarded
>> memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating
>> PTE for that page, thereby loosing WP setting for it.
>>
>> So we need to pre-fault pages for each RAM block to be protected
>> before making a userfault_fd wr-protect ioctl().
>>
>> Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
>> ---
>> migration/migration.c | 6 ++++++
>> migration/ram.c | 48 +++++++++++++++++++++++++++++++++++++++++++
>> migration/ram.h | 1 +
>> 3 files changed, 55 insertions(+)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index be4729e7c8..71bce15a1b 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -3827,6 +3827,12 @@ static void *bg_migration_thread(void *opaque)
>>
>> update_iteration_initial_status(s);
>>
>> + /*
>> + * Prepare for tracking memory writes with UFFD-WP - populate
>> + * RAM pages before protecting.
>> + */
>> + ram_write_tracking_prepare();
>> +
>> qemu_savevm_state_header(s->to_dst_file);
>> qemu_savevm_state_setup(s->to_dst_file);
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 40e78952ad..24c8627214 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1560,6 +1560,54 @@ out:
>> return ret;
>> }
>>
>> +/*
>> + * ram_block_populate_pages: populate memory in the RAM block by reading
>> + * an integer from the beginning of each page.
>> + *
>> + * Since it's solely used for userfault_fd WP feature, here we just
>> + * hardcode page size to qemu_real_host_page_size.
>> + *
>> + * @bs: RAM block to populate
>> + */
>> +static void ram_block_populate_pages(RAMBlock *bs)
>
> Usually we use "rb" or "block"; however migration/ram.c seems to do
> things differently.
>
>> +{
>> + char *ptr = (char *) bs->host;
>> +
>> + for (ram_addr_t offset = 0; offset < bs->used_length;
>> + offset += qemu_real_host_page_size) {
>> + char tmp = *(ptr + offset);
>
> ^ missing empty line.
>
>> + /* Don't optimize the read out */
>> + asm volatile("" : "+r" (tmp));
>> + }
>
>
> Reviewed-by: David Hildenbrand <david@redhat.com>
>
>
> and might want to add
>
> Reported-by: David Hildenbrand <david@redhat.com>
>
> (also to patch #2)
>
Also, proper "Fixes:" tags would be handy as well.
--
Thanks,
David / dhildenb
On 31.03.2021 20:37, David Hildenbrand wrote:
> On 31.03.21 19:33, David Hildenbrand wrote:
>> On 31.03.21 19:28, Andrey Gruzdev wrote:
>>> This commit solves the issue with userfault_fd WP feature that
>>> background snapshot is based on. For any never poluated or discarded
>>> memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating
>>> PTE for that page, thereby loosing WP setting for it.
>>>
>>> So we need to pre-fault pages for each RAM block to be protected
>>> before making a userfault_fd wr-protect ioctl().
>>>
>>> Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
>>> ---
>>> migration/migration.c | 6 ++++++
>>> migration/ram.c | 48
>>> +++++++++++++++++++++++++++++++++++++++++++
>>> migration/ram.h | 1 +
>>> 3 files changed, 55 insertions(+)
>>>
>>> diff --git a/migration/migration.c b/migration/migration.c
>>> index be4729e7c8..71bce15a1b 100644
>>> --- a/migration/migration.c
>>> +++ b/migration/migration.c
>>> @@ -3827,6 +3827,12 @@ static void *bg_migration_thread(void *opaque)
>>> update_iteration_initial_status(s);
>>> + /*
>>> + * Prepare for tracking memory writes with UFFD-WP - populate
>>> + * RAM pages before protecting.
>>> + */
>>> + ram_write_tracking_prepare();
>>> +
>>> qemu_savevm_state_header(s->to_dst_file);
>>> qemu_savevm_state_setup(s->to_dst_file);
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 40e78952ad..24c8627214 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -1560,6 +1560,54 @@ out:
>>> return ret;
>>> }
>>> +/*
>>> + * ram_block_populate_pages: populate memory in the RAM block by
>>> reading
>>> + * an integer from the beginning of each page.
>>> + *
>>> + * Since it's solely used for userfault_fd WP feature, here we just
>>> + * hardcode page size to qemu_real_host_page_size.
>>> + *
>>> + * @bs: RAM block to populate
>>> + */
>>> +static void ram_block_populate_pages(RAMBlock *bs)
>>
>> Usually we use "rb" or "block"; however migration/ram.c seems to do
>> things differently.
>>
>>> +{
>>> + char *ptr = (char *) bs->host;
>>> +
>>> + for (ram_addr_t offset = 0; offset < bs->used_length;
>>> + offset += qemu_real_host_page_size) {
>>> + char tmp = *(ptr + offset);
>>
>> ^ missing empty line.
>>
>>> + /* Don't optimize the read out */
>>> + asm volatile("" : "+r" (tmp));
>>> + }
>>
>>
>> Reviewed-by: David Hildenbrand <david@redhat.com>
>>
>>
>> and might want to add
>>
>> Reported-by: David Hildenbrand <david@redhat.com>
>>
>> (also to patch #2)
>>
>
> Also, proper "Fixes:" tags would be handy as well.
>
Ok, thanks.
--
Andrey Gruzdev, Principal Engineer
Virtuozzo GmbH +7-903-247-6397
virtuzzo.com
On 31.03.2021 20:33, David Hildenbrand wrote:
> On 31.03.21 19:28, Andrey Gruzdev wrote:
>> This commit solves the issue with userfault_fd WP feature that
>> background snapshot is based on. For any never poluated or discarded
>> memory page, the UFFDIO_WRITEPROTECT ioctl() would skip updating
>> PTE for that page, thereby loosing WP setting for it.
>>
>> So we need to pre-fault pages for each RAM block to be protected
>> before making a userfault_fd wr-protect ioctl().
>>
>> Signed-off-by: Andrey Gruzdev <andrey.gruzdev@virtuozzo.com>
>> ---
>> migration/migration.c | 6 ++++++
>> migration/ram.c | 48 +++++++++++++++++++++++++++++++++++++++++++
>> migration/ram.h | 1 +
>> 3 files changed, 55 insertions(+)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index be4729e7c8..71bce15a1b 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -3827,6 +3827,12 @@ static void *bg_migration_thread(void *opaque)
>> update_iteration_initial_status(s);
>> + /*
>> + * Prepare for tracking memory writes with UFFD-WP - populate
>> + * RAM pages before protecting.
>> + */
>> + ram_write_tracking_prepare();
>> +
>> qemu_savevm_state_header(s->to_dst_file);
>> qemu_savevm_state_setup(s->to_dst_file);
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 40e78952ad..24c8627214 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1560,6 +1560,54 @@ out:
>> return ret;
>> }
>> +/*
>> + * ram_block_populate_pages: populate memory in the RAM block by
>> reading
>> + * an integer from the beginning of each page.
>> + *
>> + * Since it's solely used for userfault_fd WP feature, here we just
>> + * hardcode page size to qemu_real_host_page_size.
>> + *
>> + * @bs: RAM block to populate
>> + */
>> +static void ram_block_populate_pages(RAMBlock *bs)
>
> Usually we use "rb" or "block"; however migration/ram.c seems to do
> things differently.
>
Yes, I'll rename.
>> +{
>> + char *ptr = (char *) bs->host;
>> +
>> + for (ram_addr_t offset = 0; offset < bs->used_length;
>> + offset += qemu_real_host_page_size) {
>> + char tmp = *(ptr + offset);
>
> ^ missing empty line.
>
Aha.
>> + /* Don't optimize the read out */
>> + asm volatile("" : "+r" (tmp));
>> + }
>
>
> Reviewed-by: David Hildenbrand <david@redhat.com>
>
>
> and might want to add
>
> Reported-by: David Hildenbrand <david@redhat.com>
>
> (also to patch #2)
>
I'll add, thanks.
--
Andrey Gruzdev, Principal Engineer
Virtuozzo GmbH +7-903-247-6397
virtuzzo.com
© 2016 - 2026 Red Hat, Inc.