[PATCH v2 2/3] i386/a-b-bootblock: zero the first byte of each page on start

Daniil Tatianin posted 3 patches 1 year ago
Maintainers: Juan Quintela <quintela@redhat.com>, Peter Xu <peterx@redhat.com>, Leonardo Bras <leobras@redhat.com>, Thomas Huth <thuth@redhat.com>
[PATCH v2 2/3] i386/a-b-bootblock: zero the first byte of each page on start
Posted by Daniil Tatianin 1 year ago
The migration qtest all the way up to this point used to work by sheer
luck relying on the contents of all pages from 1MiB to 100MiB to contain
the same one value in the first byte initially.

This easily breaks if we reduce the amount of RAM for the test instances
from 150MiB to e.g 110MiB since that makes SeaBIOS dirty some of the
pages starting at about 0x5dd2000 (~93 MiB) as it reuses those for the
HighMemory allocator since commit dc88f9b72df ("malloc: use large
ZoneHigh when there is enough memory").

This would result in the following errors:
    12/60 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test                 ERROR           2.74s   killed by signal 6 SIGABRT
    stderr:
    Memory content inconsistency at 5dd2000 first_byte = cc last_byte = cb current = 9e hit_edge = 1
    Memory content inconsistency at 5dd3000 first_byte = cc last_byte = cb current = 89 hit_edge = 1
    Memory content inconsistency at 5dd4000 first_byte = cc last_byte = cb current = 23 hit_edge = 1
    Memory content inconsistency at 5dd5000 first_byte = cc last_byte = cb current = 31 hit_edge = 1
    Memory content inconsistency at 5dd6000 first_byte = cc last_byte = cb current = 70 hit_edge = 1
    Memory content inconsistency at 5dd7000 first_byte = cc last_byte = cb current = ff hit_edge = 1
    Memory content inconsistency at 5dd8000 first_byte = cc last_byte = cb current = 54 hit_edge = 1
    Memory content inconsistency at 5dd9000 first_byte = cc last_byte = cb current = 64 hit_edge = 1
    Memory content inconsistency at 5dda000 first_byte = cc last_byte = cb current = 1d hit_edge = 1
    Memory content inconsistency at 5ddb000 first_byte = cc last_byte = cb current = 1a hit_edge = 1
    and in another 26 pages**
    ERROR:../tests/qtest/migration-test.c:300:check_guests_ram: assertion failed: (bad == 0)

Fix this by always zeroing the first byte of each page in the range so
that we get consistent results no matter the initial contents.

Fixes: ea0c6d62391 ("test: Postcopy")
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Reviewed-by: Peter Xu <peterx@redhat.com>
---
 tests/migration/i386/a-b-bootblock.S |  9 +++++++++
 tests/migration/i386/a-b-bootblock.h | 16 ++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/tests/migration/i386/a-b-bootblock.S b/tests/migration/i386/a-b-bootblock.S
index 036216e4a7..6bb9999d60 100644
--- a/tests/migration/i386/a-b-bootblock.S
+++ b/tests/migration/i386/a-b-bootblock.S
@@ -44,6 +44,15 @@ start:             # at 0x7c00 ?
 
         # bl keeps a counter so we limit the output speed
         mov $0, %bl
+
+pre_zero:
+        mov $TEST_MEM_START,%eax
+do_zero:
+        movb $0, (%eax)
+        add $4096,%eax
+        cmp $TEST_MEM_END,%eax
+        jl do_zero
+
 mainloop:
         mov $TEST_MEM_START,%eax
 innerloop:
diff --git a/tests/migration/i386/a-b-bootblock.h b/tests/migration/i386/a-b-bootblock.h
index b7b0fce2ee..5b523917ce 100644
--- a/tests/migration/i386/a-b-bootblock.h
+++ b/tests/migration/i386/a-b-bootblock.h
@@ -4,18 +4,18 @@
  * the header and the assembler differences in your patch submission.
  */
 unsigned char x86_bootsect[] = {
-  0xfa, 0x0f, 0x01, 0x16, 0x78, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00,
+  0xfa, 0x0f, 0x01, 0x16, 0x8c, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00,
   0x0f, 0x22, 0xc0, 0x66, 0xea, 0x20, 0x7c, 0x00, 0x00, 0x08, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x92, 0x0c, 0x02,
   0xe6, 0x92, 0xb8, 0x10, 0x00, 0x00, 0x00, 0x8e, 0xd8, 0x66, 0xb8, 0x41,
   0x00, 0x66, 0xba, 0xf8, 0x03, 0xee, 0xb3, 0x00, 0xb8, 0x00, 0x00, 0x10,
-  0x00, 0xfe, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40,
-  0x06, 0x7c, 0xf2, 0xfe, 0xc3, 0x80, 0xe3, 0x3f, 0x75, 0xe6, 0x66, 0xb8,
-  0x42, 0x00, 0x66, 0xba, 0xf8, 0x03, 0xee, 0xeb, 0xdb, 0x8d, 0x76, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00,
-  0x00, 0x9a, 0xcf, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00,
-  0x27, 0x00, 0x60, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0xc6, 0x00, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00,
+  0x40, 0x06, 0x7c, 0xf1, 0xb8, 0x00, 0x00, 0x10, 0x00, 0xfe, 0x00, 0x05,
+  0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40, 0x06, 0x7c, 0xf2, 0xfe,
+  0xc3, 0x80, 0xe3, 0x3f, 0x75, 0xe6, 0x66, 0xb8, 0x42, 0x00, 0x66, 0xba,
+  0xf8, 0x03, 0xee, 0xeb, 0xdb, 0x8d, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00,
+  0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0x74, 0x7c,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-- 
2.34.1
Re: [PATCH v2 2/3] i386/a-b-bootblock: zero the first byte of each page on start
Posted by Juan Quintela 11 months, 3 weeks ago
Daniil Tatianin <d-tatianin@yandex-team.ru> wrote:
> The migration qtest all the way up to this point used to work by sheer
> luck relying on the contents of all pages from 1MiB to 100MiB to contain
> the same one value in the first byte initially.
>
> This easily breaks if we reduce the amount of RAM for the test instances
> from 150MiB to e.g 110MiB since that makes SeaBIOS dirty some of the
> pages starting at about 0x5dd2000 (~93 MiB) as it reuses those for the
> HighMemory allocator since commit dc88f9b72df ("malloc: use large
> ZoneHigh when there is enough memory").
>
> This would result in the following errors:
>     12/60 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test                 ERROR           2.74s   killed by signal 6 SIGABRT
>     stderr:
>     Memory content inconsistency at 5dd2000 first_byte = cc last_byte = cb current = 9e hit_edge = 1
>     Memory content inconsistency at 5dd3000 first_byte = cc last_byte = cb current = 89 hit_edge = 1
>     Memory content inconsistency at 5dd4000 first_byte = cc last_byte = cb current = 23 hit_edge = 1
>     Memory content inconsistency at 5dd5000 first_byte = cc last_byte = cb current = 31 hit_edge = 1
>     Memory content inconsistency at 5dd6000 first_byte = cc last_byte = cb current = 70 hit_edge = 1
>     Memory content inconsistency at 5dd7000 first_byte = cc last_byte = cb current = ff hit_edge = 1
>     Memory content inconsistency at 5dd8000 first_byte = cc last_byte = cb current = 54 hit_edge = 1
>     Memory content inconsistency at 5dd9000 first_byte = cc last_byte = cb current = 64 hit_edge = 1
>     Memory content inconsistency at 5dda000 first_byte = cc last_byte = cb current = 1d hit_edge = 1
>     Memory content inconsistency at 5ddb000 first_byte = cc last_byte = cb current = 1a hit_edge = 1
>     and in another 26 pages**
>     ERROR:../tests/qtest/migration-test.c:300:check_guests_ram: assertion failed: (bad == 0)
>
> Fix this by always zeroing the first byte of each page in the range so
> that we get consistent results no matter the initial contents.
>
> Fixes: ea0c6d62391 ("test: Postcopy")
> Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
> Reviewed-by: Peter Xu <peterx@redhat.com>
> ---

Reviewed-by: Juan Quintela <quintela@redhat.com>

queued.
Re: [PATCH v2 2/3] i386/a-b-bootblock: zero the first byte of each page on start
Posted by Vladimir Sementsov-Ogievskiy 12 months ago
On 19.09.23 13:23, Daniil Tatianin wrote:
> The migration qtest all the way up to this point used to work by sheer
> luck relying on the contents of all pages from 1MiB to 100MiB to contain
> the same one value in the first byte initially.
> 
> This easily breaks if we reduce the amount of RAM for the test instances
> from 150MiB to e.g 110MiB since that makes SeaBIOS dirty some of the
> pages starting at about 0x5dd2000 (~93 MiB) as it reuses those for the
> HighMemory allocator since commit dc88f9b72df ("malloc: use large
> ZoneHigh when there is enough memory").
> 
> This would result in the following errors:
>      12/60 qemu:qtest+qtest-x86_64 / qtest-x86_64/migration-test                 ERROR           2.74s   killed by signal 6 SIGABRT
>      stderr:
>      Memory content inconsistency at 5dd2000 first_byte = cc last_byte = cb current = 9e hit_edge = 1
>      Memory content inconsistency at 5dd3000 first_byte = cc last_byte = cb current = 89 hit_edge = 1
>      Memory content inconsistency at 5dd4000 first_byte = cc last_byte = cb current = 23 hit_edge = 1
>      Memory content inconsistency at 5dd5000 first_byte = cc last_byte = cb current = 31 hit_edge = 1
>      Memory content inconsistency at 5dd6000 first_byte = cc last_byte = cb current = 70 hit_edge = 1
>      Memory content inconsistency at 5dd7000 first_byte = cc last_byte = cb current = ff hit_edge = 1
>      Memory content inconsistency at 5dd8000 first_byte = cc last_byte = cb current = 54 hit_edge = 1
>      Memory content inconsistency at 5dd9000 first_byte = cc last_byte = cb current = 64 hit_edge = 1
>      Memory content inconsistency at 5dda000 first_byte = cc last_byte = cb current = 1d hit_edge = 1
>      Memory content inconsistency at 5ddb000 first_byte = cc last_byte = cb current = 1a hit_edge = 1
>      and in another 26 pages**
>      ERROR:../tests/qtest/migration-test.c:300:check_guests_ram: assertion failed: (bad == 0)
> 
> Fix this by always zeroing the first byte of each page in the range so
> that we get consistent results no matter the initial contents.
> 
> Fixes: ea0c6d62391 ("test: Postcopy")
> Signed-off-by: Daniil Tatianin<d-tatianin@yandex-team.ru>
> Reviewed-by: Peter Xu<peterx@redhat.com>


Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>

-- 
Best regards,
Vladimir