hw/i386/x86.c | 21 +++++++++++++++++--- include/standard-headers/asm-x86/bootparam.h | 1 + 2 files changed, 19 insertions(+), 3 deletions(-)
Tiny machines optimized for fast boot time generally don't use EFI,
which means a random seed has to be supplied some other way. For this
purpose, Linux (≥5.20) supports passing a seed in the setup_data table
with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and
specialized bootloaders. The linked commit shows the upstream kernel
implementation.
Link: https://git.kernel.org/tip/tip/c/68b8e9713c8
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <eduardo@habkost.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
hw/i386/x86.c | 21 +++++++++++++++++---
include/standard-headers/asm-x86/bootparam.h | 1 +
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2df..284c97f158 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -26,6 +26,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/datadir.h"
+#include "qemu/guest-random.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qapi-visit-common.h"
@@ -771,7 +772,7 @@ void x86_load_linux(X86MachineState *x86ms,
bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
uint16_t protocol;
int setup_size, kernel_size, cmdline_size;
- int dtb_size, setup_data_offset;
+ int dtb_size, setup_data_offset, last_setup_data_offset = 0;
uint32_t initrd_max;
uint8_t header[8192], *setup, *kernel;
hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
@@ -1063,16 +1064,30 @@ void x86_load_linux(X86MachineState *x86ms,
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
kernel = g_realloc(kernel, kernel_size);
- stq_p(header + 0x250, prot_addr + setup_data_offset);
setup_data = (struct setup_data *)(kernel + setup_data_offset);
- setup_data->next = 0;
+ setup_data->next = last_setup_data_offset;
setup_data->type = cpu_to_le32(SETUP_DTB);
setup_data->len = cpu_to_le32(dtb_size);
load_image_size(dtb_filename, setup_data->data, dtb_size);
+
+ last_setup_data_offset = prot_addr + setup_data_offset;
}
+ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
+ kernel_size = setup_data_offset + sizeof(struct setup_data) + 32;
+ kernel = g_realloc(kernel, kernel_size);
+ setup_data = (struct setup_data *)(kernel + setup_data_offset);
+ setup_data->next = last_setup_data_offset;
+ setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
+ setup_data->len = cpu_to_le32(32);
+ qemu_guest_getrandom_nofail(setup_data->data, 32);
+
+ last_setup_data_offset = prot_addr + setup_data_offset;
+
+ stq_p(header + 0x250, last_setup_data_offset);
+
/*
* If we're starting an encrypted VM, it will be OVMF based, which uses the
* efi stub for booting and doesn't require any values to be placed in the
diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h
index 072e2ed546..b2aaad10e5 100644
--- a/include/standard-headers/asm-x86/bootparam.h
+++ b/include/standard-headers/asm-x86/bootparam.h
@@ -10,6 +10,7 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_RNG_SEED 9
#define SETUP_INDIRECT (1<<31)
--
2.35.1
On Thu, Jul 21, 2022 at 12:09:59PM +0200, Jason A. Donenfeld wrote: > Tiny machines optimized for fast boot time generally don't use EFI, > which means a random seed has to be supplied some other way. For this > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > specialized bootloaders. The linked commit shows the upstream kernel > implementation. > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > Cc: Michael S. Tsirkin <mst@redhat.com> > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Richard Henderson <richard.henderson@linaro.org> > Cc: Eduardo Habkost <eduardo@habkost.net> > Cc: Peter Maydell <peter.maydell@linaro.org> > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > Cc: Laurent Vivier <laurent@vivier.eu> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> > --- > hw/i386/x86.c | 21 +++++++++++++++++--- > include/standard-headers/asm-x86/bootparam.h | 1 + > 2 files changed, 19 insertions(+), 3 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 6003b4b2df..284c97f158 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -26,6 +26,7 @@ > #include "qemu/cutils.h" > #include "qemu/units.h" > #include "qemu/datadir.h" > +#include "qemu/guest-random.h" > #include "qapi/error.h" > #include "qapi/qmp/qerror.h" > #include "qapi/qapi-visit-common.h" > @@ -771,7 +772,7 @@ void x86_load_linux(X86MachineState *x86ms, > bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled; > uint16_t protocol; > int setup_size, kernel_size, cmdline_size; > - int dtb_size, setup_data_offset; > + int dtb_size, setup_data_offset, last_setup_data_offset = 0; > uint32_t initrd_max; > uint8_t header[8192], *setup, *kernel; > hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; > @@ -1063,16 +1064,30 @@ void x86_load_linux(X86MachineState *x86ms, > kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; > kernel = g_realloc(kernel, kernel_size); > > - stq_p(header + 0x250, prot_addr + setup_data_offset); > setup_data = (struct setup_data *)(kernel + setup_data_offset); > - setup_data->next = 0; > + setup_data->next = last_setup_data_offset; does this make any difference? if the idea is that we'll add more stuff down the road, then see below ... > setup_data->type = cpu_to_le32(SETUP_DTB); > setup_data->len = cpu_to_le32(dtb_size); > > load_image_size(dtb_filename, setup_data->data, dtb_size); > + > + last_setup_data_offset = prot_addr + setup_data_offset; if the idea is that we'll add more stuff down the road, then it should be += here. > } > > + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > + kernel_size = setup_data_offset + sizeof(struct setup_data) + 32; > + kernel = g_realloc(kernel, kernel_size); > + setup_data = (struct setup_data *)(kernel + setup_data_offset); > + setup_data->next = last_setup_data_offset; Likely broken on LE. > + setup_data->type = cpu_to_le32(SETUP_RNG_SEED); > + setup_data->len = cpu_to_le32(32); > + qemu_guest_getrandom_nofail(setup_data->data, 32); > + > + last_setup_data_offset = prot_addr + setup_data_offset; where does this 32 come from? maybe make it a macro. > + > + stq_p(header + 0x250, last_setup_data_offset); add a comment while we are at it? > + > /* > * If we're starting an encrypted VM, it will be OVMF based, which uses the > * efi stub for booting and doesn't require any values to be placed in the > diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h > index 072e2ed546..b2aaad10e5 100644 > --- a/include/standard-headers/asm-x86/bootparam.h > +++ b/include/standard-headers/asm-x86/bootparam.h > @@ -10,6 +10,7 @@ > #define SETUP_EFI 4 > #define SETUP_APPLE_PROPERTIES 5 > #define SETUP_JAILHOUSE 6 > +#define SETUP_RNG_SEED 9 > > #define SETUP_INDIRECT (1<<31) > > -- > 2.35.1
Hi Michael, Thanks for the feedback. On Thu, Jul 21, 2022 at 06:35:41AM -0400, Michael S. Tsirkin wrote: > > - setup_data->next = 0; > > + setup_data->next = last_setup_data_offset; > > does this make any difference? if the idea is that we'll add more stuff > down the road, then see below ... It doesn't; it's just for completeness, in case somebody decides to add something prior, and then less code has to change and there's less chance of an error. The compiler generates the same code either way. > > > setup_data->type = cpu_to_le32(SETUP_DTB); > > setup_data->len = cpu_to_le32(dtb_size); > > > > load_image_size(dtb_filename, setup_data->data, dtb_size); > > + > > + last_setup_data_offset = prot_addr + setup_data_offset; > > > if the idea is that we'll add more stuff down the road, then > it should be += here. It's just poorly named actually. It should be called "prev_setup_data_prot_addr" or something. I'll find a better name for v+1. > > > } > > > > + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > > + kernel_size = setup_data_offset + sizeof(struct setup_data) + 32; > > + kernel = g_realloc(kernel, kernel_size); > > + setup_data = (struct setup_data *)(kernel + setup_data_offset); > > + setup_data->next = last_setup_data_offset; > > Likely broken on LE. Nice catch, thanks. > > > + setup_data->type = cpu_to_le32(SETUP_RNG_SEED); > > + setup_data->len = cpu_to_le32(32); > > + qemu_guest_getrandom_nofail(setup_data->data, 32); > > + > > + last_setup_data_offset = prot_addr + setup_data_offset; > > > where does this 32 come from? maybe make it a macro. Will do. > > > + > > + stq_p(header + 0x250, last_setup_data_offset); > > add a comment while we are at it? Ack. Jason
Tiny machines optimized for fast boot time generally don't use EFI,
which means a random seed has to be supplied some other way. For this
purpose, Linux (≥5.20) supports passing a seed in the setup_data table
with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and
specialized bootloaders. The linked commit shows the upstream kernel
implementation.
Link: https://git.kernel.org/tip/tip/c/68b8e9713c8
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <eduardo@habkost.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
hw/i386/x86.c | 21 +++++++++++++++++---
include/standard-headers/asm-x86/bootparam.h | 1 +
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2df..dd44c72ba3 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -26,6 +26,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/datadir.h"
+#include "qemu/guest-random.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qapi-visit-common.h"
@@ -774,7 +775,7 @@ void x86_load_linux(X86MachineState *x86ms,
int dtb_size, setup_data_offset;
uint32_t initrd_max;
uint8_t header[8192], *setup, *kernel;
- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
+ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
FILE *f;
char *vmode;
MachineState *machine = MACHINE(x86ms);
@@ -784,6 +785,7 @@ void x86_load_linux(X86MachineState *x86ms,
const char *dtb_filename = machine->dtb;
const char *kernel_cmdline = machine->kernel_cmdline;
SevKernelLoaderContext sev_load_ctx = {};
+ enum { RNG_SEED_LENGTH = 32 };
/* Align to 16 bytes as a paranoia measure */
cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
@@ -1063,16 +1065,29 @@ void x86_load_linux(X86MachineState *x86ms,
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
kernel = g_realloc(kernel, kernel_size);
- stq_p(header + 0x250, prot_addr + setup_data_offset);
setup_data = (struct setup_data *)(kernel + setup_data_offset);
- setup_data->next = 0;
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
setup_data->type = cpu_to_le32(SETUP_DTB);
setup_data->len = cpu_to_le32(dtb_size);
load_image_size(dtb_filename, setup_data->data, dtb_size);
}
+ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
+ kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH;
+ kernel = g_realloc(kernel, kernel_size);
+ setup_data = (struct setup_data *)(kernel + setup_data_offset);
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
+ setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
+ setup_data->len = cpu_to_le32(32);
+ qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
+
+ /* Offset 0x250 is a pointer to the first setup_data link. */
+ stq_p(header + 0x250, first_setup_data);
+
/*
* If we're starting an encrypted VM, it will be OVMF based, which uses the
* efi stub for booting and doesn't require any values to be placed in the
diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h
index 072e2ed546..b2aaad10e5 100644
--- a/include/standard-headers/asm-x86/bootparam.h
+++ b/include/standard-headers/asm-x86/bootparam.h
@@ -10,6 +10,7 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_RNG_SEED 9
#define SETUP_INDIRECT (1<<31)
--
2.35.1
Tiny machines optimized for fast boot time generally don't use EFI,
which means a random seed has to be supplied some other way. For this
purpose, Linux (≥5.20) supports passing a seed in the setup_data table
with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and
specialized bootloaders. The linked commit shows the upstream kernel
implementation.
Link: https://git.kernel.org/tip/tip/c/68b8e9713c8
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <eduardo@habkost.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
hw/i386/x86.c | 21 +++++++++++++++++---
include/standard-headers/asm-x86/bootparam.h | 1 +
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2df..56896cb4b2 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -26,6 +26,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/datadir.h"
+#include "qemu/guest-random.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qapi-visit-common.h"
@@ -774,7 +775,7 @@ void x86_load_linux(X86MachineState *x86ms,
int dtb_size, setup_data_offset;
uint32_t initrd_max;
uint8_t header[8192], *setup, *kernel;
- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
+ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
FILE *f;
char *vmode;
MachineState *machine = MACHINE(x86ms);
@@ -784,6 +785,7 @@ void x86_load_linux(X86MachineState *x86ms,
const char *dtb_filename = machine->dtb;
const char *kernel_cmdline = machine->kernel_cmdline;
SevKernelLoaderContext sev_load_ctx = {};
+ enum { RNG_SEED_LENGTH = 32 };
/* Align to 16 bytes as a paranoia measure */
cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
@@ -1063,16 +1065,29 @@ void x86_load_linux(X86MachineState *x86ms,
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
kernel = g_realloc(kernel, kernel_size);
- stq_p(header + 0x250, prot_addr + setup_data_offset);
setup_data = (struct setup_data *)(kernel + setup_data_offset);
- setup_data->next = 0;
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
setup_data->type = cpu_to_le32(SETUP_DTB);
setup_data->len = cpu_to_le32(dtb_size);
load_image_size(dtb_filename, setup_data->data, dtb_size);
}
+ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
+ kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH;
+ kernel = g_realloc(kernel, kernel_size);
+ setup_data = (struct setup_data *)(kernel + setup_data_offset);
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
+ setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
+ setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
+ qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
+
+ /* Offset 0x250 is a pointer to the first setup_data link. */
+ stq_p(header + 0x250, first_setup_data);
+
/*
* If we're starting an encrypted VM, it will be OVMF based, which uses the
* efi stub for booting and doesn't require any values to be placed in the
diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h
index 072e2ed546..b2aaad10e5 100644
--- a/include/standard-headers/asm-x86/bootparam.h
+++ b/include/standard-headers/asm-x86/bootparam.h
@@ -10,6 +10,7 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_RNG_SEED 9
#define SETUP_INDIRECT (1<<31)
--
2.35.1
On Thu, Jul 21, 2022 at 12:49:50PM +0200, Jason A. Donenfeld wrote: > Tiny machines optimized for fast boot time generally don't use EFI, > which means a random seed has to be supplied some other way. For this > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > specialized bootloaders. The linked commit shows the upstream kernel > implementation. > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > Cc: Michael S. Tsirkin <mst@redhat.com> > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Richard Henderson <richard.henderson@linaro.org> > Cc: Eduardo Habkost <eduardo@habkost.net> > Cc: Peter Maydell <peter.maydell@linaro.org> > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > Cc: Laurent Vivier <laurent@vivier.eu> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Well why not. Reviewed-by: Michael S. Tsirkin <mst@redhat.com> who's merging this? Paolo me or you? > --- > hw/i386/x86.c | 21 +++++++++++++++++--- > include/standard-headers/asm-x86/bootparam.h | 1 + > 2 files changed, 19 insertions(+), 3 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 6003b4b2df..56896cb4b2 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -26,6 +26,7 @@ > #include "qemu/cutils.h" > #include "qemu/units.h" > #include "qemu/datadir.h" > +#include "qemu/guest-random.h" > #include "qapi/error.h" > #include "qapi/qmp/qerror.h" > #include "qapi/qapi-visit-common.h" > @@ -774,7 +775,7 @@ void x86_load_linux(X86MachineState *x86ms, > int dtb_size, setup_data_offset; > uint32_t initrd_max; > uint8_t header[8192], *setup, *kernel; > - hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0; > + hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0; > FILE *f; > char *vmode; > MachineState *machine = MACHINE(x86ms); > @@ -784,6 +785,7 @@ void x86_load_linux(X86MachineState *x86ms, > const char *dtb_filename = machine->dtb; > const char *kernel_cmdline = machine->kernel_cmdline; > SevKernelLoaderContext sev_load_ctx = {}; > + enum { RNG_SEED_LENGTH = 32 }; > > /* Align to 16 bytes as a paranoia measure */ > cmdline_size = (strlen(kernel_cmdline) + 16) & ~15; > @@ -1063,16 +1065,29 @@ void x86_load_linux(X86MachineState *x86ms, > kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size; > kernel = g_realloc(kernel, kernel_size); > > - stq_p(header + 0x250, prot_addr + setup_data_offset); > > setup_data = (struct setup_data *)(kernel + setup_data_offset); > - setup_data->next = 0; > + setup_data->next = cpu_to_le64(first_setup_data); > + first_setup_data = prot_addr + setup_data_offset; > setup_data->type = cpu_to_le32(SETUP_DTB); > setup_data->len = cpu_to_le32(dtb_size); > > load_image_size(dtb_filename, setup_data->data, dtb_size); > } > > + setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16); > + kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH; > + kernel = g_realloc(kernel, kernel_size); > + setup_data = (struct setup_data *)(kernel + setup_data_offset); > + setup_data->next = cpu_to_le64(first_setup_data); > + first_setup_data = prot_addr + setup_data_offset; > + setup_data->type = cpu_to_le32(SETUP_RNG_SEED); > + setup_data->len = cpu_to_le32(RNG_SEED_LENGTH); > + qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH); > + > + /* Offset 0x250 is a pointer to the first setup_data link. */ > + stq_p(header + 0x250, first_setup_data); > + > /* > * If we're starting an encrypted VM, it will be OVMF based, which uses the > * efi stub for booting and doesn't require any values to be placed in the > diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h > index 072e2ed546..b2aaad10e5 100644 > --- a/include/standard-headers/asm-x86/bootparam.h > +++ b/include/standard-headers/asm-x86/bootparam.h > @@ -10,6 +10,7 @@ > #define SETUP_EFI 4 > #define SETUP_APPLE_PROPERTIES 5 > #define SETUP_JAILHOUSE 6 > +#define SETUP_RNG_SEED 9 > > #define SETUP_INDIRECT (1<<31) > > -- > 2.35.1
On 7/21/22 13:00, Michael S. Tsirkin wrote: > Well why not. > > Reviewed-by: Michael S. Tsirkin<mst@redhat.com> > > who's merging this? Paolo me or you? I don't think this should be merged as is. The linuxboot ROM takes the data from fw_cfg, and (with the exception of ACPI tables) that data is not migrated. Because reading it into the guest is not atomic, both sides must match. This version of the patches at least doesn't move the preexisting DTB entry of the setup_data, but it still has a mismatching size and that can be a problem when migrating backwards. So for example there is a race window where the guest has read the list head that points to the random number seed entry, but the entry isn't there on the destination. Note that I _do_ agree that this is theoretical and basically impossible to hit. On the other hand, it is there and it's just not the way that we've handled guest ABI compatibility: migration issues are already not fun to debug without having to keep track of which differences are intentional and "harmless" differences and which are bugs. So for this particular case the structure of fw_cfg data MUST match on the source and destination given a machine type and options. Thinking more about it, it's trivial to hit it if you use TCG record-replay, because if the record and replay side will get different sizes fw_cfg and then everything goes south. This shows why this mindset is the correct one even for issues that are theoretical in the case migration: if we hadn't, record-replay would have been much harder to implement. Of course I'm not going to introduce a whole machinery to migrate the seed: if you want determinism (as in the record-replay case) there is the -seed option, if you don't then it's not a huge deal to have a seed that could theoretically be generated half on the source half on the destination. And in fact, after some refactoring, the changes aren't hard to do. My patch is 35(+) 0(-). Paolo
Hi Paolo, On Thu, Jul 21, 2022 at 01:47:02PM +0200, Paolo Bonzini wrote: > On 7/21/22 13:00, Michael S. Tsirkin wrote: > > Well why not. > > > > Reviewed-by: Michael S. Tsirkin<mst@redhat.com> > > > > who's merging this? Paolo me or you? > > I don't think this should be merged as is. > > The linuxboot ROM takes the data from fw_cfg, and (with the exception of > ACPI tables) that data is not migrated. Because reading it into the > guest is not atomic, both sides must match. This version of the patches > at least doesn't move the preexisting DTB entry of the setup_data, but > it still has a mismatching size and that can be a problem when migrating > backwards. As discussed online, this seems absolutely preposterous and will never happen anywhere real ever at all. Trying to account for it is adding needless complexity for no real world benefit; it's the type of thinking that results in a mess. Further, conditionalizing the RNG seed on something else means fewer users receive the increased security of having an early boottime seed. This seems like a silly direction go go in. But to assess things in the open here: - On upgrades, there's no problem because the old bytes don't move. - On downgrades, there's mostly no problem because next will point to 0. - On downgrade there could be some ridiculous theoretical problem if the reader has already read a non-zero next. But this will never actually happen in practice. So we really should just stick with the simple and straight forward path that this v6 accomplishes, and not muck things up with stupidity. Jason
On Thu, Jul 21, 2022 at 02:16:53PM +0200, Jason A. Donenfeld wrote: > Hi Paolo, > > On Thu, Jul 21, 2022 at 01:47:02PM +0200, Paolo Bonzini wrote: > > On 7/21/22 13:00, Michael S. Tsirkin wrote: > > > Well why not. > > > > > > Reviewed-by: Michael S. Tsirkin<mst@redhat.com> > > > > > > who's merging this? Paolo me or you? > > > > I don't think this should be merged as is. > > > > The linuxboot ROM takes the data from fw_cfg, and (with the exception of > > ACPI tables) that data is not migrated. Because reading it into the > > guest is not atomic, both sides must match. This version of the patches > > at least doesn't move the preexisting DTB entry of the setup_data, but > > it still has a mismatching size and that can be a problem when migrating > > backwards. > > As discussed online, this seems absolutely preposterous and will never > happen anywhere real ever at all. Trying to account for it is adding > needless complexity for no real world benefit; it's the type of thinking > that results in a mess. Further, conditionalizing the RNG seed on > something else means fewer users receive the increased security of > having an early boottime seed. This seems like a silly direction go go > in. As mentioned previously, few users are going to benefit from this anyway, because most public cloud VMs don't use direct kernel boot, the guest firmware loads the kernel from the guest /boot partition. Regardless though, what Paolo described with a machine type specific property won't have a significant impact on availablity. This is NOT requiring users to opt-in to the seed in general. Tieing settings to the machine type means that newly provisioned guests will get it enabled out of the box, as they'll typically use the latest machine type. Pre-existing guests which have merely upgraded their QEMU instance won't get the feature, because they'll be fixed on the old machine type to guarantee no guest ABI change. This isn't a problem, as such pre-existing guests likely won't have the new Linux code to consume the seed anyway. With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 7/21/22 14:27, Daniel P. Berrangé wrote: > > Pre-existing guests which have merely upgraded their QEMU instance > won't get the feature, because they'll be fixed on the old machine > type to guarantee no guest ABI change. This isn't a problem, as > such pre-existing guests likely won't have the new Linux code to > consume the seed anyway. In fact this isn't a given either and depends on how these pre-existing guests are managed. People that do not use Libvirt, or that just start their guests with "virsh create" and some on-disk XML file, _will_ get the new feature. The way QEMU does things is that fw_cfg is part of guest ABI, and this is _not_ up for discussion. You're not the first one to be confused[1] and you probably will not be the last, so this means that the whole concept of guest ABI should be documented better. (By the way, even though I agree that the failure is completely theoretical apart from the record-replay case, it can actually be reproduced easily by sticking a long-enough sleep in pc-bios/optionrom/linuxboot_dma.c). Paolo
Hi Paolo, Daniel, Okay, patch incoming. Jason
Tiny machines optimized for fast boot time generally don't use EFI,
which means a random seed has to be supplied some other way. For this
purpose, Linux (≥5.20) supports passing a seed in the setup_data table
with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and
specialized bootloaders. The linked commit shows the upstream kernel
implementation.
At Paolo's request, we don't pass these to versioned machine types ≤7.0.
Link: https://git.kernel.org/tip/tip/c/68b8e9713c8
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Eduardo Habkost <eduardo@habkost.net>
Cc: Peter Maydell <peter.maydell@linaro.org>
Cc: Philippe Mathieu-Daudé <f4bug@amsat.org>
Cc: Laurent Vivier <laurent@vivier.eu>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
hw/i386/microvm.c | 2 +-
hw/i386/pc.c | 4 +--
hw/i386/pc_piix.c | 2 ++
hw/i386/pc_q35.c | 2 ++
hw/i386/x86.c | 26 +++++++++++++++++---
include/hw/i386/pc.h | 3 +++
include/hw/i386/x86.h | 3 ++-
include/standard-headers/asm-x86/bootparam.h | 1 +
8 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index 754f1d0593..5b0ee3ab26 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -332,7 +332,7 @@ static void microvm_memory_init(MicrovmMachineState *mms)
rom_set_fw(fw_cfg);
if (machine->kernel_filename != NULL) {
- x86_load_linux(x86ms, fw_cfg, 0, true);
+ x86_load_linux(x86ms, fw_cfg, 0, true, false);
}
if (mms->option_roms) {
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 774cb2bf07..d2b5823ffb 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -796,7 +796,7 @@ void xen_load_linux(PCMachineState *pcms)
rom_set_fw(fw_cfg);
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
- pcmc->pvh_enabled);
+ pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
for (i = 0; i < nb_option_roms; i++) {
assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
!strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
@@ -992,7 +992,7 @@ void pc_memory_init(PCMachineState *pcms,
if (linux_boot) {
x86_load_linux(x86ms, fw_cfg, pcmc->acpi_data_size,
- pcmc->pvh_enabled);
+ pcmc->pvh_enabled, pcmc->legacy_no_rng_seed);
}
for (i = 0; i < nb_option_roms; i++) {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index a234989ac3..fbf9465318 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -438,9 +438,11 @@ DEFINE_I440FX_MACHINE(v7_1, "pc-i440fx-7.1", NULL,
static void pc_i440fx_7_0_machine_options(MachineClass *m)
{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_i440fx_7_1_machine_options(m);
m->alias = NULL;
m->is_default = false;
+ pcmc->legacy_no_rng_seed = true;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index f96cbd04e2..12cc76aaf8 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -375,8 +375,10 @@ DEFINE_Q35_MACHINE(v7_1, "pc-q35-7.1", NULL,
static void pc_q35_7_0_machine_options(MachineClass *m)
{
+ PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
pc_q35_7_1_machine_options(m);
m->alias = NULL;
+ pcmc->legacy_no_rng_seed = true;
compat_props_add(m->compat_props, hw_compat_7_0, hw_compat_7_0_len);
compat_props_add(m->compat_props, pc_compat_7_0, pc_compat_7_0_len);
}
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 6003b4b2df..ecea25d249 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -26,6 +26,7 @@
#include "qemu/cutils.h"
#include "qemu/units.h"
#include "qemu/datadir.h"
+#include "qemu/guest-random.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qapi/qapi-visit-common.h"
@@ -766,7 +767,8 @@ static bool load_elfboot(const char *kernel_filename,
void x86_load_linux(X86MachineState *x86ms,
FWCfgState *fw_cfg,
int acpi_data_size,
- bool pvh_enabled)
+ bool pvh_enabled,
+ bool legacy_no_rng_seed)
{
bool linuxboot_dma_enabled = X86_MACHINE_GET_CLASS(x86ms)->fwcfg_dma_enabled;
uint16_t protocol;
@@ -774,7 +776,7 @@ void x86_load_linux(X86MachineState *x86ms,
int dtb_size, setup_data_offset;
uint32_t initrd_max;
uint8_t header[8192], *setup, *kernel;
- hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0;
+ hwaddr real_addr, prot_addr, cmdline_addr, initrd_addr = 0, first_setup_data = 0;
FILE *f;
char *vmode;
MachineState *machine = MACHINE(x86ms);
@@ -784,6 +786,7 @@ void x86_load_linux(X86MachineState *x86ms,
const char *dtb_filename = machine->dtb;
const char *kernel_cmdline = machine->kernel_cmdline;
SevKernelLoaderContext sev_load_ctx = {};
+ enum { RNG_SEED_LENGTH = 32 };
/* Align to 16 bytes as a paranoia measure */
cmdline_size = (strlen(kernel_cmdline) + 16) & ~15;
@@ -1063,16 +1066,31 @@ void x86_load_linux(X86MachineState *x86ms,
kernel_size = setup_data_offset + sizeof(struct setup_data) + dtb_size;
kernel = g_realloc(kernel, kernel_size);
- stq_p(header + 0x250, prot_addr + setup_data_offset);
setup_data = (struct setup_data *)(kernel + setup_data_offset);
- setup_data->next = 0;
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
setup_data->type = cpu_to_le32(SETUP_DTB);
setup_data->len = cpu_to_le32(dtb_size);
load_image_size(dtb_filename, setup_data->data, dtb_size);
}
+ if (!legacy_no_rng_seed) {
+ setup_data_offset = QEMU_ALIGN_UP(kernel_size, 16);
+ kernel_size = setup_data_offset + sizeof(struct setup_data) + RNG_SEED_LENGTH;
+ kernel = g_realloc(kernel, kernel_size);
+ setup_data = (struct setup_data *)(kernel + setup_data_offset);
+ setup_data->next = cpu_to_le64(first_setup_data);
+ first_setup_data = prot_addr + setup_data_offset;
+ setup_data->type = cpu_to_le32(SETUP_RNG_SEED);
+ setup_data->len = cpu_to_le32(RNG_SEED_LENGTH);
+ qemu_guest_getrandom_nofail(setup_data->data, RNG_SEED_LENGTH);
+ }
+
+ /* Offset 0x250 is a pointer to the first setup_data link. */
+ stq_p(header + 0x250, first_setup_data);
+
/*
* If we're starting an encrypted VM, it will be OVMF based, which uses the
* efi stub for booting and doesn't require any values to be placed in the
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index b7735dccfc..2a8ffbcfa8 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -127,6 +127,9 @@ struct PCMachineClass {
/* create kvmclock device even when KVM PV features are not exposed */
bool kvmclock_create_always;
+
+ /* skip passing an rng seed for legacy machines */
+ bool legacy_no_rng_seed;
};
#define TYPE_PC_MACHINE "generic-pc-machine"
diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h
index 9089bdd99c..6bdf1f6ab2 100644
--- a/include/hw/i386/x86.h
+++ b/include/hw/i386/x86.h
@@ -123,7 +123,8 @@ void x86_bios_rom_init(MachineState *ms, const char *default_firmware,
void x86_load_linux(X86MachineState *x86ms,
FWCfgState *fw_cfg,
int acpi_data_size,
- bool pvh_enabled);
+ bool pvh_enabled,
+ bool legacy_no_rng_seed);
bool x86_machine_is_smm_enabled(const X86MachineState *x86ms);
bool x86_machine_is_acpi_enabled(const X86MachineState *x86ms);
diff --git a/include/standard-headers/asm-x86/bootparam.h b/include/standard-headers/asm-x86/bootparam.h
index 072e2ed546..b2aaad10e5 100644
--- a/include/standard-headers/asm-x86/bootparam.h
+++ b/include/standard-headers/asm-x86/bootparam.h
@@ -10,6 +10,7 @@
#define SETUP_EFI 4
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_RNG_SEED 9
#define SETUP_INDIRECT (1<<31)
--
2.35.1
On 7/21/22 14:56, Jason A. Donenfeld wrote: > Tiny machines optimized for fast boot time generally don't use EFI, > which means a random seed has to be supplied some other way. For this > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > specialized bootloaders. The linked commit shows the upstream kernel > implementation. > > At Paolo's request, we don't pass these to versioned machine types ≤7.0. > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Richard Henderson <richard.henderson@linaro.org> > Cc: Eduardo Habkost <eduardo@habkost.net> > Cc: Peter Maydell <peter.maydell@linaro.org> > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > Cc: Laurent Vivier <laurent@vivier.eu> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> LGTM. Paolo
Hi Paolo, On Thu, Jul 21, 2022 at 3:00 PM Paolo Bonzini <pbonzini@redhat.com> wrote: > > On 7/21/22 14:56, Jason A. Donenfeld wrote: > > Tiny machines optimized for fast boot time generally don't use EFI, > > which means a random seed has to be supplied some other way. For this > > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > > specialized bootloaders. The linked commit shows the upstream kernel > > implementation. > > > > At Paolo's request, we don't pass these to versioned machine types ≤7.0. > > > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: Richard Henderson <richard.henderson@linaro.org> > > Cc: Eduardo Habkost <eduardo@habkost.net> > > Cc: Peter Maydell <peter.maydell@linaro.org> > > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > > Cc: Laurent Vivier <laurent@vivier.eu> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> > > LGTM. Thanks for the Ack. Do you want to pick this up or will Michael? Also, I assume this now is the working patch, rather than your 4 parter that potentially bifurcates this discussion? Jason
On 7/21/22 15:04, Jason A. Donenfeld wrote: >> LGTM. > Thanks for the Ack. Do you want to pick this up or will Michael? I was waiting to check if Michael would reply, but I've since sent out the pull request so I included this one as well. > Also, I assume this now is the working patch, rather than your 4 > parter that potentially bifurcates this discussion? I will rebase my own patches and send after 7.1. You're all set. Paolo
On 7/21/22 14:56, Jason A. Donenfeld wrote: > Tiny machines optimized for fast boot time generally don't use EFI, > which means a random seed has to be supplied some other way. For this > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > specialized bootloaders. The linked commit shows the upstream kernel > implementation. > > At Paolo's request, we don't pass these to versioned machine types ≤7.0. > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Richard Henderson <richard.henderson@linaro.org> > Cc: Eduardo Habkost <eduardo@habkost.net> > Cc: Peter Maydell <peter.maydell@linaro.org> > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > Cc: Laurent Vivier <laurent@vivier.eu> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> LGTM. Paolo
On Thu, Jul 21, 2022 at 1:00 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Thu, Jul 21, 2022 at 12:49:50PM +0200, Jason A. Donenfeld wrote: > > Tiny machines optimized for fast boot time generally don't use EFI, > > which means a random seed has to be supplied some other way. For this > > purpose, Linux (≥5.20) supports passing a seed in the setup_data table > > with SETUP_RNG_SEED, specially intended for hypervisors, kexec, and > > specialized bootloaders. The linked commit shows the upstream kernel > > implementation. > > > > Link: https://git.kernel.org/tip/tip/c/68b8e9713c8 > > Cc: Michael S. Tsirkin <mst@redhat.com> > > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com> > > Cc: Paolo Bonzini <pbonzini@redhat.com> > > Cc: Richard Henderson <richard.henderson@linaro.org> > > Cc: Eduardo Habkost <eduardo@habkost.net> > > Cc: Peter Maydell <peter.maydell@linaro.org> > > Cc: Philippe Mathieu-Daudé <f4bug@amsat.org> > > Cc: Laurent Vivier <laurent@vivier.eu> > > Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> > > Well why not. > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > who's merging this? Paolo me or you? How about you go ahead and merge it. Thanks, Jason
© 2016 - 2024 Red Hat, Inc.