An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro
enclave[2] virtual machine. The EIF file contains the necessary
kernel, cmdline, ramdisk(s) sections to boot.
This commit adds support for loading EIF image using the microvm
machine code. For microvm to boot from an EIF file, the kernel and
ramdisk(s) are extracted into a temporary kernel and a temporary
initrd file which are then hooked into the regular x86 boot mechanism
along with the extracted cmdline.
Although not useful for the microvm machine itself, this is needed
as the following commit adds support for a new machine type
'nitro-enclave' which uses the microvm machine type as parent. The
code for checking and loading EIF will be put inside a 'nitro-enclave'
machine type check in the following commit so that microvm cannot load
EIF because it shouldn't.
[1] https://github.com/aws/aws-nitro-enclaves-image-format
[2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/
Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com>
---
hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++
hw/i386/eif.h | 20 ++
hw/i386/meson.build | 2 +-
hw/i386/microvm.c | 134 +++++++++++-
4 files changed, 640 insertions(+), 2 deletions(-)
create mode 100644 hw/i386/eif.c
create mode 100644 hw/i386/eif.h
diff --git a/hw/i386/eif.c b/hw/i386/eif.c
new file mode 100644
index 0000000000..761c489d33
--- /dev/null
+++ b/hw/i386/eif.c
@@ -0,0 +1,486 @@
+/*
+ * EIF (Enclave Image Format) related helpers
+ *
+ * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "qapi/error.h"
+#include <zlib.h> /* for crc32 */
+
+#include "hw/i386/eif.h"
+
+#define MAX_SECTIONS 32
+
+/* members are ordered according to field order in .eif file */
+typedef struct EifHeader {
+ uint8_t magic[4]; /* must be .eif in ascii i.e., [46, 101, 105, 102] */
+ uint16_t version;
+ uint16_t flags;
+ uint64_t default_memory;
+ uint64_t default_cpus;
+ uint16_t reserved;
+ uint16_t section_cnt;
+ uint64_t section_offsets[MAX_SECTIONS];
+ uint64_t section_sizes[MAX_SECTIONS];
+ uint32_t unused;
+ uint32_t eif_crc32;
+} QEMU_PACKED EifHeader;
+
+/* members are ordered according to field order in .eif file */
+typedef struct EifSectionHeader {
+ /*
+ * 0 = invalid, 1 = kernel, 2 = cmdline, 3 = ramdisk, 4 = signature,
+ * 5 = metadata
+ */
+ uint16_t section_type;
+ uint16_t flags;
+ uint64_t section_size;
+} QEMU_PACKED EifSectionHeader;
+
+enum EifSectionTypes {
+ EIF_SECTION_INVALID = 0,
+ EIF_SECTION_KERNEL = 1,
+ EIF_SECTION_CMDLINE = 2,
+ EIF_SECTION_RAMDISK = 3,
+ EIF_SECTION_SIGNATURE = 4,
+ EIF_SECTION_METADATA = 5,
+ EIF_SECTION_MAX = 6,
+};
+
+static const char *section_type_to_string(uint16_t type)
+{
+ const char *str;
+ switch (type) {
+ case EIF_SECTION_INVALID:
+ str = "invalid";
+ break;
+ case EIF_SECTION_KERNEL:
+ str = "kernel";
+ break;
+ case EIF_SECTION_CMDLINE:
+ str = "cmdline";
+ break;
+ case EIF_SECTION_RAMDISK:
+ str = "ramdisk";
+ break;
+ case EIF_SECTION_SIGNATURE:
+ str = "signature";
+ break;
+ case EIF_SECTION_METADATA:
+ str = "metadata";
+ break;
+ default:
+ str = "unknown";
+ break;
+ }
+
+ return str;
+}
+
+static bool read_eif_header(FILE *f, EifHeader *header, uint32_t *crc,
+ Error **errp)
+{
+ size_t got;
+ size_t header_size = sizeof(*header);
+
+ got = fread(header, 1, header_size, f);
+ if (got != header_size) {
+ error_setg(errp, "Failed to read EIF header");
+ return false;
+ }
+
+ if (memcmp(header->magic, ".eif", 4) != 0) {
+ error_setg(errp, "Invalid EIF image. Magic mismatch.");
+ return false;
+ }
+
+ /* Exclude header->eif_crc32 field from CRC calculation */
+ *crc = crc32(*crc, (uint8_t *)header, header_size - 4);
+
+ header->version = be16_to_cpu(header->version);
+ header->flags = be16_to_cpu(header->flags);
+ header->default_memory = be64_to_cpu(header->default_memory);
+ header->default_cpus = be64_to_cpu(header->default_cpus);
+ header->reserved = be16_to_cpu(header->reserved);
+ header->section_cnt = be16_to_cpu(header->section_cnt);
+
+ for (int i = 0; i < MAX_SECTIONS; ++i) {
+ header->section_offsets[i] = be64_to_cpu(header->section_offsets[i]);
+ }
+
+ for (int i = 0; i < MAX_SECTIONS; ++i) {
+ header->section_sizes[i] = be64_to_cpu(header->section_sizes[i]);
+ }
+
+ header->unused = be32_to_cpu(header->unused);
+ header->eif_crc32 = be32_to_cpu(header->eif_crc32);
+ return true;
+}
+
+static bool read_eif_section_header(FILE *f, EifSectionHeader *section_header,
+ uint32_t *crc, Error **errp)
+{
+ size_t got;
+ size_t section_header_size = sizeof(*section_header);
+
+ got = fread(section_header, 1, section_header_size, f);
+ if (got != section_header_size) {
+ error_setg(errp, "Failed to read EIF section header");
+ return false;
+ }
+
+ *crc = crc32(*crc, (uint8_t *)section_header, section_header_size);
+
+ section_header->section_type = be16_to_cpu(section_header->section_type);
+ section_header->flags = be16_to_cpu(section_header->flags);
+ section_header->section_size = be64_to_cpu(section_header->section_size);
+ return true;
+}
+
+/*
+ * Upon success, the caller is responsible for unlinking and freeing *tmp_path.
+ */
+static bool get_tmp_file(const char *template, char **tmp_path, Error **errp)
+{
+ int tmp_fd;
+
+ *tmp_path = NULL;
+ tmp_fd = g_file_open_tmp(template, tmp_path, NULL);
+ if (tmp_fd < 0 || *tmp_path == NULL) {
+ error_setg(errp, "Failed to create temporary file for template %s",
+ template);
+ return false;
+ }
+
+ close(tmp_fd);
+ return true;
+}
+
+static void safe_fclose(FILE *f)
+{
+ if (f) {
+ fclose(f);
+ }
+}
+
+static void safe_unlink(char *f)
+{
+ if (f) {
+ unlink(f);
+ }
+}
+
+/*
+ * Upon success, the caller is reponsible for unlinking and freeing *kernel_path
+ */
+static bool read_eif_kernel(FILE *f, uint64_t size, char **kernel_path,
+ uint32_t *crc, Error **errp)
+{
+ size_t got;
+ FILE *tmp_file = NULL;
+ uint8_t *kernel = NULL;
+
+ *kernel_path = NULL;
+ if (!get_tmp_file("eif-kernel-XXXXXX", kernel_path, errp)) {
+ goto cleanup;
+ }
+
+ tmp_file = fopen(*kernel_path, "wb");
+ if (tmp_file == NULL) {
+ error_setg_errno(errp, errno, "Failed to open temporary file %s",
+ *kernel_path);
+ goto cleanup;
+ }
+
+ kernel = g_malloc(size);
+ got = fread(kernel, 1, size, f);
+ if ((uint64_t) got != size) {
+ error_setg(errp, "Failed to read EIF kernel section data");
+ goto cleanup;
+ }
+
+ got = fwrite(kernel, 1, size, tmp_file);
+ if ((uint64_t) got != size) {
+ error_setg(errp, "Failed to write EIF kernel section data to temporary"
+ " file");
+ goto cleanup;
+ }
+
+ *crc = crc32(*crc, kernel, size);
+ g_free(kernel);
+ fclose(tmp_file);
+
+ return true;
+
+ cleanup:
+ safe_fclose(tmp_file);
+
+ safe_unlink(*kernel_path);
+ g_free(*kernel_path);
+ *kernel_path = NULL;
+
+ g_free(kernel);
+ return false;
+}
+
+static bool read_eif_cmdline(FILE *f, uint64_t size, char *cmdline,
+ uint32_t *crc, Error **errp)
+{
+ size_t got = fread(cmdline, 1, size, f);
+ if ((uint64_t) got != size) {
+ error_setg(errp, "Failed to read EIF cmdline section data");
+ return false;
+ }
+
+ *crc = crc32(*crc, (uint8_t *)cmdline, size);
+ return true;
+}
+
+static bool read_eif_ramdisk(FILE *eif, FILE *initrd, uint64_t size,
+ uint32_t *crc, Error **errp)
+{
+ size_t got;
+ uint8_t *ramdisk = g_malloc(size);
+
+ got = fread(ramdisk, 1, size, eif);
+ if ((uint64_t) got != size) {
+ error_setg(errp, "Failed to read EIF ramdisk section data");
+ goto cleanup;
+ }
+
+ got = fwrite(ramdisk, 1, size, initrd);
+ if ((uint64_t) got != size) {
+ error_setg(errp, "Failed to write EIF ramdisk data to temporary file");
+ goto cleanup;
+ }
+
+ *crc = crc32(*crc, ramdisk, size);
+ g_free(ramdisk);
+ return true;
+
+ cleanup:
+ g_free(ramdisk);
+ return false;
+}
+
+/*
+ * Upon success, the caller is reponsible for unlinking and freeing
+ * *kernel_path, *initrd_path and freeing *cmdline.
+ */
+bool read_eif_file(const char *eif_path, char **kernel_path, char **initrd_path,
+ char **cmdline, Error **errp)
+{
+ FILE *f = NULL;
+ FILE *initrd_f = NULL;
+ uint32_t crc = 0;
+ EifHeader eif_header;
+ bool seen_sections[EIF_SECTION_MAX] = {false};
+
+ *kernel_path = *initrd_path = *cmdline = NULL;
+
+ f = fopen(eif_path, "rb");
+ if (f == NULL) {
+ error_setg_errno(errp, errno, "Failed to open %s", eif_path);
+ goto cleanup;
+ }
+
+ if (!read_eif_header(f, &eif_header, &crc, errp)) {
+ goto cleanup;
+ }
+
+ if (eif_header.version < 4) {
+ error_setg(errp, "Expected EIF version 4 or greater");
+ goto cleanup;
+ }
+
+ if (eif_header.flags != 0) {
+ error_setg(errp, "Expected EIF flags to be 0");
+ goto cleanup;
+ }
+
+ if (eif_header.section_cnt > MAX_SECTIONS) {
+ error_setg(errp, "EIF header section count must not be greater than "
+ "%d but found %d", MAX_SECTIONS, eif_header.section_cnt);
+ goto cleanup;
+ }
+
+ for (int i = 0; i < eif_header.section_cnt; ++i) {
+ EifSectionHeader section_header;
+ uint16_t section_type;
+
+ if (fseek(f, eif_header.section_offsets[i], SEEK_SET) != 0) {
+ error_setg_errno(errp, errno, "Failed to offset to %lu in EIF file",
+ eif_header.section_offsets[i]);
+ goto cleanup;
+ }
+
+ if (!read_eif_section_header(f, §ion_header, &crc, errp)) {
+ goto cleanup;
+ }
+
+ if (section_header.flags != 0) {
+ error_setg(errp, "Expected EIF section header flags to be 0");
+ goto cleanup;
+ }
+
+ if (eif_header.section_sizes[i] != section_header.section_size) {
+ error_setg(errp, "EIF section size mismatch between header and "
+ "section header: header %lu, section header %lu",
+ eif_header.section_sizes[i],
+ section_header.section_size);
+ goto cleanup;
+ }
+
+ section_type = section_header.section_type;
+
+ switch (section_type) {
+ case EIF_SECTION_KERNEL:
+ if (seen_sections[EIF_SECTION_KERNEL]) {
+ error_setg(errp, "Invalid EIF image. More than 1 kernel "
+ "section");
+ goto cleanup;
+ }
+ if (!read_eif_kernel(f, section_header.section_size, kernel_path,
+ &crc, errp)) {
+ goto cleanup;
+ }
+
+ break;
+ case EIF_SECTION_CMDLINE:
+ {
+ uint64_t size;
+ if (seen_sections[EIF_SECTION_CMDLINE]) {
+ error_setg(errp, "Invalid EIF image. More than 1 cmdline "
+ "section");
+ goto cleanup;
+ }
+ size = section_header.section_size;
+ *cmdline = g_malloc(size + 1);
+ if (!read_eif_cmdline(f, size, *cmdline, &crc, errp)) {
+ goto cleanup;
+ }
+ (*cmdline)[size] = '\0';
+
+ break;
+ }
+ case EIF_SECTION_RAMDISK:
+ if (!seen_sections[EIF_SECTION_RAMDISK]) {
+ /*
+ * If this is the first time we are seeing a ramdisk section,
+ * we need to create the initrd temporary file.
+ */
+ if (!get_tmp_file("eif-initrd-XXXXXX", initrd_path, errp)) {
+ goto cleanup;
+ }
+ initrd_f = fopen(*initrd_path, "wb");
+ if (initrd_f == NULL) {
+ error_setg_errno(errp, errno, "Failed to open file %s",
+ *initrd_path);
+ goto cleanup;
+ }
+ }
+
+ if (!read_eif_ramdisk(f, initrd_f, section_header.section_size,
+ &crc, errp)) {
+ goto cleanup;
+ }
+
+ break;
+ default:
+ /* other sections including invalid or unknown sections */
+ {
+ uint8_t *buf;
+ size_t got;
+ uint64_t size = section_header.section_size;
+ buf = g_malloc(size);
+ got = fread(buf, 1, size, f);
+ if ((uint64_t) got != size) {
+ g_free(buf);
+ error_setg(errp, "Failed to read EIF %s section data",
+ section_type_to_string(section_type));
+ goto cleanup;
+ }
+ crc = crc32(crc, buf, size);
+ g_free(buf);
+ break;
+ }
+ }
+
+ if (section_type < EIF_SECTION_MAX) {
+ seen_sections[section_type] = true;
+ }
+ }
+
+ if (!seen_sections[EIF_SECTION_KERNEL]) {
+ error_setg(errp, "Invalid EIF image. No kernel section.");
+ goto cleanup;
+ }
+ if (!seen_sections[EIF_SECTION_CMDLINE]) {
+ error_setg(errp, "Invalid EIF image. No cmdline section.");
+ goto cleanup;
+ }
+ if (!seen_sections[EIF_SECTION_RAMDISK]) {
+ error_setg(errp, "Invalid EIF image. No ramdisk section.");
+ goto cleanup;
+ }
+
+ if (eif_header.eif_crc32 != crc) {
+ error_setg(errp, "CRC mismatch. Expected %u but header has %u.",
+ crc, eif_header.eif_crc32);
+ goto cleanup;
+ }
+
+ fclose(f);
+ fclose(initrd_f);
+ return true;
+
+ cleanup:
+ safe_fclose(f);
+ safe_fclose(initrd_f);
+
+ safe_unlink(*kernel_path);
+ g_free(*kernel_path);
+ *kernel_path = NULL;
+
+ safe_unlink(*initrd_path);
+ g_free(*initrd_path);
+ *initrd_path = NULL;
+
+ g_free(*cmdline);
+ *cmdline = NULL;
+
+ return false;
+}
+
+bool check_if_eif_file(const char *path, bool *is_eif, Error **errp)
+{
+ size_t got;
+ uint8_t buf[4];
+ FILE *f = NULL;
+
+ f = fopen(path, "rb");
+ if (f == NULL) {
+ error_setg_errno(errp, errno, "Failed to open file %s", path);
+ goto cleanup;
+ }
+
+ got = fread(buf, 1, 4, f);
+ if (got != 4) {
+ error_setg(errp, "Failed to read magic value from %s", path);
+ goto cleanup;
+ }
+
+ fclose(f);
+ *is_eif = !memcmp(buf, ".eif", 4);
+ return true;
+
+ cleanup:
+ safe_fclose(f);
+ return false;
+}
diff --git a/hw/i386/eif.h b/hw/i386/eif.h
new file mode 100644
index 0000000000..b71a27062d
--- /dev/null
+++ b/hw/i386/eif.h
@@ -0,0 +1,20 @@
+/*
+ * EIF (Enclave Image Format) related helpers
+ *
+ * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef HW_I386_EIF_H
+#define HW_I386_EIF_H
+
+bool read_eif_file(const char *eif_path, char **kernel_path, char **initrd_path,
+ char **kernel_cmdline, Error **errp);
+
+bool check_if_eif_file(const char *path, bool *is_eif, Error **errp);
+
+#endif
+
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 03aad10df7..e398fc1d74 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -14,7 +14,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'),
i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'),
if_false: files('amd_iommu-stub.c'))
i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c'))
-i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c'))
+i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c', 'eif.c'))
i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index fec63cacfa..d52d85bd01 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -34,6 +34,7 @@
#include "hw/irq.h"
#include "hw/i386/kvm/clock.h"
#include "hw/i386/microvm.h"
+#include "hw/i386/eif.h"
#include "hw/i386/x86.h"
#include "target/i386/cpu.h"
#include "hw/intc/i8259.h"
@@ -281,6 +282,127 @@ static void microvm_devices_init(MicrovmMachineState *mms)
x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true);
}
+/* Expects file to have offset 0 before this function is called */
+static long get_file_size(FILE *f, Error **errp)
+{
+ long size;
+
+ if (fseek(f, 0, SEEK_END) != 0) {
+ error_setg_errno(errp, errno, "Failed to seek to the end of file");
+ return -1;
+ }
+
+ size = ftell(f);
+ if (size == -1) {
+ error_setg_errno(errp, errno, "Failed to get offset");
+ return -1;
+ }
+
+ if (fseek(f, 0, SEEK_SET) != 0) {
+ error_setg_errno(errp, errno, "Failed to seek back to the start");
+ return -1;
+ }
+
+ return size;
+}
+
+static void load_eif(MicrovmMachineState *mms, FWCfgState *fw_cfg)
+{
+ Error *err;
+ char *eif_kernel, *eif_initrd, *eif_cmdline;
+ MachineState *machine = MACHINE(mms);
+ X86MachineState *x86ms = X86_MACHINE(mms);
+
+ if (!read_eif_file(machine->kernel_filename, &eif_kernel, &eif_initrd,
+ &eif_cmdline, &err)) {
+ error_report_err(err);
+ exit(1);
+ }
+
+ g_free(machine->kernel_filename);
+ machine->kernel_filename = eif_kernel;
+
+ /*
+ * If an initrd argument was provided, let's concatenate it to the
+ * extracted EIF initrd temporary file.
+ */
+ if (machine->initrd_filename) {
+ long size;
+ size_t got;
+ uint8_t *buf;
+ FILE *initrd_f, *eif_initrd_f;
+
+ initrd_f = fopen(machine->initrd_filename, "rb");
+ if (initrd_f == NULL) {
+ error_setg_errno(&err, errno, "Failed to open initrd file %s",
+ machine->initrd_filename);
+ goto cleanup;
+ }
+
+ size = get_file_size(initrd_f, &err);
+ if (size == -1) {
+ goto cleanup;
+ }
+
+ buf = g_malloc(size);
+ got = fread(buf, 1, size, initrd_f);
+ if ((uint64_t) got != (uint64_t) size) {
+ error_setg(&err, "Failed to read initrd file %s",
+ machine->initrd_filename);
+ goto cleanup;
+ }
+
+ eif_initrd_f = fopen(eif_initrd, "ab");
+ if (eif_initrd_f == NULL) {
+ error_setg_errno(&err, errno, "Failed to open EIF initrd file %s",
+ eif_initrd);
+ goto cleanup;
+ }
+ got = fwrite(buf, 1, size, eif_initrd_f);
+ if ((uint64_t) got != (uint64_t) size) {
+ error_setg(&err, "Failed to append initrd %s to %s",
+ machine->initrd_filename, eif_initrd);
+ goto cleanup;
+ }
+
+ fclose(initrd_f);
+ fclose(eif_initrd_f);
+
+ g_free(buf);
+ g_free(machine->initrd_filename);
+
+ machine->initrd_filename = eif_initrd;
+ } else {
+ machine->initrd_filename = eif_initrd;
+ }
+
+ /*
+ * If kernel cmdline argument was provided, let's concatenate it to the
+ * extracted EIF kernel cmdline.
+ */
+ if (machine->kernel_cmdline != NULL) {
+ char *cmd = g_strdup_printf("%s %s", eif_cmdline,
+ machine->kernel_cmdline);
+ g_free(eif_cmdline);
+ g_free(machine->kernel_cmdline);
+ machine->kernel_cmdline = cmd;
+ } else {
+ machine->kernel_cmdline = eif_cmdline;
+ }
+
+ x86_load_linux(x86ms, fw_cfg, 0, true);
+
+ unlink(machine->kernel_filename);
+ unlink(machine->initrd_filename);
+ return;
+
+ cleanup:
+ error_report_err(err);
+ unlink(eif_kernel);
+ unlink(eif_initrd);
+ exit(1);
+}
+
static void microvm_memory_init(MicrovmMachineState *mms)
{
MachineState *machine = MACHINE(mms);
@@ -330,7 +452,17 @@ static void microvm_memory_init(MicrovmMachineState *mms)
rom_set_fw(fw_cfg);
if (machine->kernel_filename != NULL) {
- x86_load_linux(x86ms, fw_cfg, 0, true);
+ Error *err;
+ bool is_eif = false;
+ if (!check_if_eif_file(machine->kernel_filename, &is_eif, &err)) {
+ error_report_err(err);
+ exit(1);
+ }
+ if (is_eif) {
+ load_eif(mms, fw_cfg);
+ } else {
+ x86_load_linux(x86ms, fw_cfg, 0, true);
+ }
}
if (mms->option_roms) {
--
2.39.2
Hi Dorjoy, On 18/5/24 10:07, Dorjoy Chowdhury wrote: > An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > enclave[2] virtual machine. The EIF file contains the necessary > kernel, cmdline, ramdisk(s) sections to boot. > > This commit adds support for loading EIF image using the microvm > machine code. For microvm to boot from an EIF file, the kernel and > ramdisk(s) are extracted into a temporary kernel and a temporary > initrd file which are then hooked into the regular x86 boot mechanism > along with the extracted cmdline. > > Although not useful for the microvm machine itself, this is needed > as the following commit adds support for a new machine type > 'nitro-enclave' which uses the microvm machine type as parent. The > code for checking and loading EIF will be put inside a 'nitro-enclave' > machine type check in the following commit so that microvm cannot load > EIF because it shouldn't. > > [1] https://github.com/aws/aws-nitro-enclaves-image-format The documentation is rather scarse... > [2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ > > Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com> > --- > hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++ > hw/i386/eif.h | 20 ++ > hw/i386/meson.build | 2 +- ... still it seems a generic loader, not restricted to x86. Maybe better add it as hw/core/loader-eif.[ch]? > hw/i386/microvm.c | 134 +++++++++++- > 4 files changed, 640 insertions(+), 2 deletions(-) > create mode 100644 hw/i386/eif.c > create mode 100644 hw/i386/eif.h
Hi Philippe, Thank you for reviewing. On Mon, May 27, 2024 at 4:47 PM Philippe Mathieu-Daudé <philmd@linaro.org> wrote: > > Hi Dorjoy, > > On 18/5/24 10:07, Dorjoy Chowdhury wrote: > > An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > > enclave[2] virtual machine. The EIF file contains the necessary > > kernel, cmdline, ramdisk(s) sections to boot. > > > > This commit adds support for loading EIF image using the microvm > > machine code. For microvm to boot from an EIF file, the kernel and > > ramdisk(s) are extracted into a temporary kernel and a temporary > > initrd file which are then hooked into the regular x86 boot mechanism > > along with the extracted cmdline. > > > > Although not useful for the microvm machine itself, this is needed > > as the following commit adds support for a new machine type > > 'nitro-enclave' which uses the microvm machine type as parent. The > > code for checking and loading EIF will be put inside a 'nitro-enclave' > > machine type check in the following commit so that microvm cannot load > > EIF because it shouldn't. > > > > [1] https://github.com/aws/aws-nitro-enclaves-image-format > > The documentation is rather scarse... > Do you mean documentation about EIF file format? If so, yes, right now there is no specification other than the github repo for EIF. > > [2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ > > > > Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com> > > --- > > hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++ > > hw/i386/eif.h | 20 ++ > > hw/i386/meson.build | 2 +- > > ... still it seems a generic loader, not restricted to x86. > > Maybe better add it as hw/core/loader-eif.[ch]? > Yes, the code in eif.c is architecture agnostic. So it could make sense to move the files to hw/core. But I don't think the names should have "loader" prefix as there is no loading logic in eif.c. There is only logic for parsing and extracting kernel, intird, cmdline etc. Because nitro-enclave machine type is based on microvm which only supports x86 now, I think it also makes sense to keep the files inside hw/i386 for now as we can only really load x86 kernel using it. Maybe if we, in the future, add support for other architectures, then we can move them to hw/core. What do you think? Regards, Dorjoy
On 27.05.24 16:52, Dorjoy Chowdhury wrote: > Hi Philippe, > Thank you for reviewing. > > On Mon, May 27, 2024 at 4:47 PM Philippe Mathieu-Daudé > <philmd@linaro.org> wrote: >> Hi Dorjoy, >> >> On 18/5/24 10:07, Dorjoy Chowdhury wrote: >>> An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro >>> enclave[2] virtual machine. The EIF file contains the necessary >>> kernel, cmdline, ramdisk(s) sections to boot. >>> >>> This commit adds support for loading EIF image using the microvm >>> machine code. For microvm to boot from an EIF file, the kernel and >>> ramdisk(s) are extracted into a temporary kernel and a temporary >>> initrd file which are then hooked into the regular x86 boot mechanism >>> along with the extracted cmdline. >>> >>> Although not useful for the microvm machine itself, this is needed >>> as the following commit adds support for a new machine type >>> 'nitro-enclave' which uses the microvm machine type as parent. The >>> code for checking and loading EIF will be put inside a 'nitro-enclave' >>> machine type check in the following commit so that microvm cannot load >>> EIF because it shouldn't. >>> >>> [1] https://github.com/aws/aws-nitro-enclaves-image-format >> The documentation is rather scarse... >> > Do you mean documentation about EIF file format? If so, yes, right > now there is no specification other than the github repo for EIF. > >>> [2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ >>> >>> Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com> >>> --- >>> hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++ >>> hw/i386/eif.h | 20 ++ >>> hw/i386/meson.build | 2 +- >> ... still it seems a generic loader, not restricted to x86. >> >> Maybe better add it as hw/core/loader-eif.[ch]? >> > Yes, the code in eif.c is architecture agnostic. So it could make > sense to move the files to hw/core. But I don't think the names should > have "loader" prefix as there is no loading logic in eif.c. There is > only logic for parsing and extracting kernel, intird, cmdline etc. > Because nitro-enclave machine type is based on microvm which only > supports x86 now, I think it also makes sense to keep the files inside > hw/i386 for now as we can only really load x86 kernel using it. Maybe > if we, in the future, add support for other architectures, then we can > move them to hw/core. What do you think? I think it makes sense to put EIF parsing into generic code from the start. Nitro Enclaves supports Aarch64 with the same EIF semantics. In fact, it would be pretty simple to do a sub-machine-class similar to the x86 NE one for arm based on -M virt as a follow-up and by making the EIF logic x86 only we're only making our lives harder for that future. Alex Amazon Web Services Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597
Hey Alex, On Mon, May 27, 2024 at 9:51 PM Alexander Graf <graf@amazon.com> wrote: > > > On 27.05.24 16:52, Dorjoy Chowdhury wrote: > > Hi Philippe, > > Thank you for reviewing. > > > > On Mon, May 27, 2024 at 4:47 PM Philippe Mathieu-Daudé > > <philmd@linaro.org> wrote: > >> Hi Dorjoy, > >> > >> On 18/5/24 10:07, Dorjoy Chowdhury wrote: > >>> An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > >>> enclave[2] virtual machine. The EIF file contains the necessary > >>> kernel, cmdline, ramdisk(s) sections to boot. > >>> > >>> This commit adds support for loading EIF image using the microvm > >>> machine code. For microvm to boot from an EIF file, the kernel and > >>> ramdisk(s) are extracted into a temporary kernel and a temporary > >>> initrd file which are then hooked into the regular x86 boot mechanism > >>> along with the extracted cmdline. > >>> > >>> Although not useful for the microvm machine itself, this is needed > >>> as the following commit adds support for a new machine type > >>> 'nitro-enclave' which uses the microvm machine type as parent. The > >>> code for checking and loading EIF will be put inside a 'nitro-enclave' > >>> machine type check in the following commit so that microvm cannot load > >>> EIF because it shouldn't. > >>> > >>> [1] https://github.com/aws/aws-nitro-enclaves-image-format > >> The documentation is rather scarse... > >> > > Do you mean documentation about EIF file format? If so, yes, right > > now there is no specification other than the github repo for EIF. > > > >>> [2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ > >>> > >>> Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com> > >>> --- > >>> hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++ > >>> hw/i386/eif.h | 20 ++ > >>> hw/i386/meson.build | 2 +- > >> ... still it seems a generic loader, not restricted to x86. > >> > >> Maybe better add it as hw/core/loader-eif.[ch]? > >> > > Yes, the code in eif.c is architecture agnostic. So it could make > > sense to move the files to hw/core. But I don't think the names should > > have "loader" prefix as there is no loading logic in eif.c. There is > > only logic for parsing and extracting kernel, intird, cmdline etc. > > Because nitro-enclave machine type is based on microvm which only > > supports x86 now, I think it also makes sense to keep the files inside > > hw/i386 for now as we can only really load x86 kernel using it. Maybe > > if we, in the future, add support for other architectures, then we can > > move them to hw/core. What do you think? > > > I think it makes sense to put EIF parsing into generic code from the > start. Nitro Enclaves supports Aarch64 with the same EIF semantics. In > fact, it would be pretty simple to do a sub-machine-class similar to the > x86 NE one for arm based on -M virt as a follow-up and by making the EIF > logic x86 only we're only making our lives harder for that future. > Understood. I will move the eif.c and eif.h files to the hw/core directory. Thanks. Regards, Dorjoy
On Sat, May 18, 2024 at 02:07:52PM +0600, Dorjoy Chowdhury wrote: > An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > enclave[2] virtual machine. The EIF file contains the necessary > kernel, cmdline, ramdisk(s) sections to boot. > > This commit adds support for loading EIF image using the microvm > machine code. For microvm to boot from an EIF file, the kernel and > ramdisk(s) are extracted into a temporary kernel and a temporary > initrd file which are then hooked into the regular x86 boot mechanism > along with the extracted cmdline. I can understand why you did it this way, but I feel its pretty gross to be loading everything into memory, writing it back to disk, and then immediately loading it all back into memory. The root problem is the x86_load_linux() method, which directly accesses the machine properties: const char *kernel_filename = machine->kernel_filename; const char *initrd_filename = machine->initrd_filename; const char *dtb_filename = machine->dtb; const char *kernel_cmdline = machine->kernel_cmdline; To properly handle this, I'd say we need to introduce an abstraction for loading the kernel/inittrd/cmdlkine data. ie on the X86MachineClass object, we should introduce four virtual methods uint8_t *(*load_kernel)(X86MachineState *machine); uint8_t *(*load_initrd)(X86MachineState *machine); uint8_t *(*load_dtb)(X86MachineState *machine); uint8_t *(*load_cmdline)(X86MachineState *machine); The default impl of these four methods should just following the existing logic, of reading and returning the data associated with the kernel_filename, initrd_filename, dtb and kernel_cmdline properties. The Nitro machine sub-class, however, can provide an alternative impl of thse virtual methods which returns data directly from the EIF file instead. > > Although not useful for the microvm machine itself, this is needed > as the following commit adds support for a new machine type > 'nitro-enclave' which uses the microvm machine type as parent. The > code for checking and loading EIF will be put inside a 'nitro-enclave' > machine type check in the following commit so that microvm cannot load > EIF because it shouldn't. > > [1] https://github.com/aws/aws-nitro-enclaves-image-format > [2] https://aws.amazon.com/ec2/nitro/nitro-enclaves/ > > Signed-off-by: Dorjoy Chowdhury <dorjoychy111@gmail.com> > --- > hw/i386/eif.c | 486 ++++++++++++++++++++++++++++++++++++++++++++ > hw/i386/eif.h | 20 ++ > hw/i386/meson.build | 2 +- > hw/i386/microvm.c | 134 +++++++++++- > 4 files changed, 640 insertions(+), 2 deletions(-) > create mode 100644 hw/i386/eif.c > create mode 100644 hw/i386/eif.h > > diff --git a/hw/i386/eif.c b/hw/i386/eif.c > new file mode 100644 > index 0000000000..761c489d33 > --- /dev/null > +++ b/hw/i386/eif.c > @@ -0,0 +1,486 @@ > +/* > + * EIF (Enclave Image Format) related helpers > + * > + * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * (at your option) any later version. See the COPYING file in the > + * top-level directory. > + */ > + > +#include "qemu/osdep.h" > +#include "qemu/bswap.h" > +#include "qapi/error.h" > +#include <zlib.h> /* for crc32 */ > + > +#include "hw/i386/eif.h" > + > +#define MAX_SECTIONS 32 > + > +/* members are ordered according to field order in .eif file */ > +typedef struct EifHeader { > + uint8_t magic[4]; /* must be .eif in ascii i.e., [46, 101, 105, 102] */ > + uint16_t version; > + uint16_t flags; > + uint64_t default_memory; > + uint64_t default_cpus; > + uint16_t reserved; > + uint16_t section_cnt; > + uint64_t section_offsets[MAX_SECTIONS]; > + uint64_t section_sizes[MAX_SECTIONS]; > + uint32_t unused; > + uint32_t eif_crc32; > +} QEMU_PACKED EifHeader; > + > +/* members are ordered according to field order in .eif file */ > +typedef struct EifSectionHeader { > + /* > + * 0 = invalid, 1 = kernel, 2 = cmdline, 3 = ramdisk, 4 = signature, > + * 5 = metadata > + */ > + uint16_t section_type; > + uint16_t flags; > + uint64_t section_size; > +} QEMU_PACKED EifSectionHeader; > + > +enum EifSectionTypes { > + EIF_SECTION_INVALID = 0, > + EIF_SECTION_KERNEL = 1, > + EIF_SECTION_CMDLINE = 2, > + EIF_SECTION_RAMDISK = 3, > + EIF_SECTION_SIGNATURE = 4, > + EIF_SECTION_METADATA = 5, > + EIF_SECTION_MAX = 6, > +}; > + > +static const char *section_type_to_string(uint16_t type) > +{ > + const char *str; > + switch (type) { > + case EIF_SECTION_INVALID: > + str = "invalid"; > + break; > + case EIF_SECTION_KERNEL: > + str = "kernel"; > + break; > + case EIF_SECTION_CMDLINE: > + str = "cmdline"; > + break; > + case EIF_SECTION_RAMDISK: > + str = "ramdisk"; > + break; > + case EIF_SECTION_SIGNATURE: > + str = "signature"; > + break; > + case EIF_SECTION_METADATA: > + str = "metadata"; > + break; > + default: > + str = "unknown"; > + break; > + } > + > + return str; > +} > + > +static bool read_eif_header(FILE *f, EifHeader *header, uint32_t *crc, > + Error **errp) > +{ > + size_t got; > + size_t header_size = sizeof(*header); > + > + got = fread(header, 1, header_size, f); > + if (got != header_size) { > + error_setg(errp, "Failed to read EIF header"); > + return false; > + } > + > + if (memcmp(header->magic, ".eif", 4) != 0) { > + error_setg(errp, "Invalid EIF image. Magic mismatch."); > + return false; > + } > + > + /* Exclude header->eif_crc32 field from CRC calculation */ > + *crc = crc32(*crc, (uint8_t *)header, header_size - 4); > + > + header->version = be16_to_cpu(header->version); > + header->flags = be16_to_cpu(header->flags); > + header->default_memory = be64_to_cpu(header->default_memory); > + header->default_cpus = be64_to_cpu(header->default_cpus); > + header->reserved = be16_to_cpu(header->reserved); > + header->section_cnt = be16_to_cpu(header->section_cnt); > + > + for (int i = 0; i < MAX_SECTIONS; ++i) { > + header->section_offsets[i] = be64_to_cpu(header->section_offsets[i]); > + } > + > + for (int i = 0; i < MAX_SECTIONS; ++i) { > + header->section_sizes[i] = be64_to_cpu(header->section_sizes[i]); > + } > + > + header->unused = be32_to_cpu(header->unused); > + header->eif_crc32 = be32_to_cpu(header->eif_crc32); > + return true; > +} > + > +static bool read_eif_section_header(FILE *f, EifSectionHeader *section_header, > + uint32_t *crc, Error **errp) > +{ > + size_t got; > + size_t section_header_size = sizeof(*section_header); > + > + got = fread(section_header, 1, section_header_size, f); > + if (got != section_header_size) { > + error_setg(errp, "Failed to read EIF section header"); > + return false; > + } > + > + *crc = crc32(*crc, (uint8_t *)section_header, section_header_size); > + > + section_header->section_type = be16_to_cpu(section_header->section_type); > + section_header->flags = be16_to_cpu(section_header->flags); > + section_header->section_size = be64_to_cpu(section_header->section_size); > + return true; > +} > + > +/* > + * Upon success, the caller is responsible for unlinking and freeing *tmp_path. > + */ > +static bool get_tmp_file(const char *template, char **tmp_path, Error **errp) > +{ > + int tmp_fd; > + > + *tmp_path = NULL; > + tmp_fd = g_file_open_tmp(template, tmp_path, NULL); > + if (tmp_fd < 0 || *tmp_path == NULL) { > + error_setg(errp, "Failed to create temporary file for template %s", > + template); > + return false; > + } > + > + close(tmp_fd); > + return true; > +} > + > +static void safe_fclose(FILE *f) > +{ > + if (f) { > + fclose(f); > + } > +} > + > +static void safe_unlink(char *f) > +{ > + if (f) { > + unlink(f); > + } > +} > + > +/* > + * Upon success, the caller is reponsible for unlinking and freeing *kernel_path > + */ > +static bool read_eif_kernel(FILE *f, uint64_t size, char **kernel_path, > + uint32_t *crc, Error **errp) > +{ > + size_t got; > + FILE *tmp_file = NULL; > + uint8_t *kernel = NULL; > + > + *kernel_path = NULL; > + if (!get_tmp_file("eif-kernel-XXXXXX", kernel_path, errp)) { > + goto cleanup; > + } > + > + tmp_file = fopen(*kernel_path, "wb"); > + if (tmp_file == NULL) { > + error_setg_errno(errp, errno, "Failed to open temporary file %s", > + *kernel_path); > + goto cleanup; > + } > + > + kernel = g_malloc(size); > + got = fread(kernel, 1, size, f); > + if ((uint64_t) got != size) { > + error_setg(errp, "Failed to read EIF kernel section data"); > + goto cleanup; > + } > + > + got = fwrite(kernel, 1, size, tmp_file); > + if ((uint64_t) got != size) { > + error_setg(errp, "Failed to write EIF kernel section data to temporary" > + " file"); > + goto cleanup; > + } > + > + *crc = crc32(*crc, kernel, size); > + g_free(kernel); > + fclose(tmp_file); > + > + return true; > + > + cleanup: > + safe_fclose(tmp_file); > + > + safe_unlink(*kernel_path); > + g_free(*kernel_path); > + *kernel_path = NULL; > + > + g_free(kernel); > + return false; > +} > + > +static bool read_eif_cmdline(FILE *f, uint64_t size, char *cmdline, > + uint32_t *crc, Error **errp) > +{ > + size_t got = fread(cmdline, 1, size, f); > + if ((uint64_t) got != size) { > + error_setg(errp, "Failed to read EIF cmdline section data"); > + return false; > + } > + > + *crc = crc32(*crc, (uint8_t *)cmdline, size); > + return true; > +} > + > +static bool read_eif_ramdisk(FILE *eif, FILE *initrd, uint64_t size, > + uint32_t *crc, Error **errp) > +{ > + size_t got; > + uint8_t *ramdisk = g_malloc(size); > + > + got = fread(ramdisk, 1, size, eif); > + if ((uint64_t) got != size) { > + error_setg(errp, "Failed to read EIF ramdisk section data"); > + goto cleanup; > + } > + > + got = fwrite(ramdisk, 1, size, initrd); > + if ((uint64_t) got != size) { > + error_setg(errp, "Failed to write EIF ramdisk data to temporary file"); > + goto cleanup; > + } > + > + *crc = crc32(*crc, ramdisk, size); > + g_free(ramdisk); > + return true; > + > + cleanup: > + g_free(ramdisk); > + return false; > +} > + > +/* > + * Upon success, the caller is reponsible for unlinking and freeing > + * *kernel_path, *initrd_path and freeing *cmdline. > + */ > +bool read_eif_file(const char *eif_path, char **kernel_path, char **initrd_path, > + char **cmdline, Error **errp) > +{ > + FILE *f = NULL; > + FILE *initrd_f = NULL; > + uint32_t crc = 0; > + EifHeader eif_header; > + bool seen_sections[EIF_SECTION_MAX] = {false}; > + > + *kernel_path = *initrd_path = *cmdline = NULL; > + > + f = fopen(eif_path, "rb"); > + if (f == NULL) { > + error_setg_errno(errp, errno, "Failed to open %s", eif_path); > + goto cleanup; > + } > + > + if (!read_eif_header(f, &eif_header, &crc, errp)) { > + goto cleanup; > + } > + > + if (eif_header.version < 4) { > + error_setg(errp, "Expected EIF version 4 or greater"); > + goto cleanup; > + } > + > + if (eif_header.flags != 0) { > + error_setg(errp, "Expected EIF flags to be 0"); > + goto cleanup; > + } > + > + if (eif_header.section_cnt > MAX_SECTIONS) { > + error_setg(errp, "EIF header section count must not be greater than " > + "%d but found %d", MAX_SECTIONS, eif_header.section_cnt); > + goto cleanup; > + } > + > + for (int i = 0; i < eif_header.section_cnt; ++i) { > + EifSectionHeader section_header; > + uint16_t section_type; > + > + if (fseek(f, eif_header.section_offsets[i], SEEK_SET) != 0) { > + error_setg_errno(errp, errno, "Failed to offset to %lu in EIF file", > + eif_header.section_offsets[i]); > + goto cleanup; > + } > + > + if (!read_eif_section_header(f, §ion_header, &crc, errp)) { > + goto cleanup; > + } > + > + if (section_header.flags != 0) { > + error_setg(errp, "Expected EIF section header flags to be 0"); > + goto cleanup; > + } > + > + if (eif_header.section_sizes[i] != section_header.section_size) { > + error_setg(errp, "EIF section size mismatch between header and " > + "section header: header %lu, section header %lu", > + eif_header.section_sizes[i], > + section_header.section_size); > + goto cleanup; > + } > + > + section_type = section_header.section_type; > + > + switch (section_type) { > + case EIF_SECTION_KERNEL: > + if (seen_sections[EIF_SECTION_KERNEL]) { > + error_setg(errp, "Invalid EIF image. More than 1 kernel " > + "section"); > + goto cleanup; > + } > + if (!read_eif_kernel(f, section_header.section_size, kernel_path, > + &crc, errp)) { > + goto cleanup; > + } > + > + break; > + case EIF_SECTION_CMDLINE: > + { > + uint64_t size; > + if (seen_sections[EIF_SECTION_CMDLINE]) { > + error_setg(errp, "Invalid EIF image. More than 1 cmdline " > + "section"); > + goto cleanup; > + } > + size = section_header.section_size; > + *cmdline = g_malloc(size + 1); > + if (!read_eif_cmdline(f, size, *cmdline, &crc, errp)) { > + goto cleanup; > + } > + (*cmdline)[size] = '\0'; > + > + break; > + } > + case EIF_SECTION_RAMDISK: > + if (!seen_sections[EIF_SECTION_RAMDISK]) { > + /* > + * If this is the first time we are seeing a ramdisk section, > + * we need to create the initrd temporary file. > + */ > + if (!get_tmp_file("eif-initrd-XXXXXX", initrd_path, errp)) { > + goto cleanup; > + } > + initrd_f = fopen(*initrd_path, "wb"); > + if (initrd_f == NULL) { > + error_setg_errno(errp, errno, "Failed to open file %s", > + *initrd_path); > + goto cleanup; > + } > + } > + > + if (!read_eif_ramdisk(f, initrd_f, section_header.section_size, > + &crc, errp)) { > + goto cleanup; > + } > + > + break; > + default: > + /* other sections including invalid or unknown sections */ > + { > + uint8_t *buf; > + size_t got; > + uint64_t size = section_header.section_size; > + buf = g_malloc(size); > + got = fread(buf, 1, size, f); > + if ((uint64_t) got != size) { > + g_free(buf); > + error_setg(errp, "Failed to read EIF %s section data", > + section_type_to_string(section_type)); > + goto cleanup; > + } > + crc = crc32(crc, buf, size); > + g_free(buf); > + break; > + } > + } > + > + if (section_type < EIF_SECTION_MAX) { > + seen_sections[section_type] = true; > + } > + } > + > + if (!seen_sections[EIF_SECTION_KERNEL]) { > + error_setg(errp, "Invalid EIF image. No kernel section."); > + goto cleanup; > + } > + if (!seen_sections[EIF_SECTION_CMDLINE]) { > + error_setg(errp, "Invalid EIF image. No cmdline section."); > + goto cleanup; > + } > + if (!seen_sections[EIF_SECTION_RAMDISK]) { > + error_setg(errp, "Invalid EIF image. No ramdisk section."); > + goto cleanup; > + } > + > + if (eif_header.eif_crc32 != crc) { > + error_setg(errp, "CRC mismatch. Expected %u but header has %u.", > + crc, eif_header.eif_crc32); > + goto cleanup; > + } > + > + fclose(f); > + fclose(initrd_f); > + return true; > + > + cleanup: > + safe_fclose(f); > + safe_fclose(initrd_f); > + > + safe_unlink(*kernel_path); > + g_free(*kernel_path); > + *kernel_path = NULL; > + > + safe_unlink(*initrd_path); > + g_free(*initrd_path); > + *initrd_path = NULL; > + > + g_free(*cmdline); > + *cmdline = NULL; > + > + return false; > +} > + > +bool check_if_eif_file(const char *path, bool *is_eif, Error **errp) > +{ > + size_t got; > + uint8_t buf[4]; > + FILE *f = NULL; > + > + f = fopen(path, "rb"); > + if (f == NULL) { > + error_setg_errno(errp, errno, "Failed to open file %s", path); > + goto cleanup; > + } > + > + got = fread(buf, 1, 4, f); > + if (got != 4) { > + error_setg(errp, "Failed to read magic value from %s", path); > + goto cleanup; > + } > + > + fclose(f); > + *is_eif = !memcmp(buf, ".eif", 4); > + return true; > + > + cleanup: > + safe_fclose(f); > + return false; > +} > diff --git a/hw/i386/eif.h b/hw/i386/eif.h > new file mode 100644 > index 0000000000..b71a27062d > --- /dev/null > +++ b/hw/i386/eif.h > @@ -0,0 +1,20 @@ > +/* > + * EIF (Enclave Image Format) related helpers > + * > + * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com> > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or > + * (at your option) any later version. See the COPYING file in the > + * top-level directory. > + */ > + > +#ifndef HW_I386_EIF_H > +#define HW_I386_EIF_H > + > +bool read_eif_file(const char *eif_path, char **kernel_path, char **initrd_path, > + char **kernel_cmdline, Error **errp); > + > +bool check_if_eif_file(const char *path, bool *is_eif, Error **errp); > + > +#endif > + > diff --git a/hw/i386/meson.build b/hw/i386/meson.build > index 03aad10df7..e398fc1d74 100644 > --- a/hw/i386/meson.build > +++ b/hw/i386/meson.build > @@ -14,7 +14,7 @@ i386_ss.add(when: 'CONFIG_X86_IOMMU', if_true: files('x86-iommu.c'), > i386_ss.add(when: 'CONFIG_AMD_IOMMU', if_true: files('amd_iommu.c'), > if_false: files('amd_iommu-stub.c')) > i386_ss.add(when: 'CONFIG_I440FX', if_true: files('pc_piix.c')) > -i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c')) > +i386_ss.add(when: 'CONFIG_MICROVM', if_true: files('x86-common.c', 'microvm.c', 'acpi-microvm.c', 'microvm-dt.c', 'eif.c')) > i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c')) > i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c')) > i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c')) > diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c > index fec63cacfa..d52d85bd01 100644 > --- a/hw/i386/microvm.c > +++ b/hw/i386/microvm.c > @@ -34,6 +34,7 @@ > #include "hw/irq.h" > #include "hw/i386/kvm/clock.h" > #include "hw/i386/microvm.h" > +#include "hw/i386/eif.h" > #include "hw/i386/x86.h" > #include "target/i386/cpu.h" > #include "hw/intc/i8259.h" > @@ -281,6 +282,127 @@ static void microvm_devices_init(MicrovmMachineState *mms) > x86_bios_rom_init(x86ms, default_firmware, get_system_memory(), true); > } > > +/* Expects file to have offset 0 before this function is called */ > +static long get_file_size(FILE *f, Error **errp) > +{ > + long size; > + > + if (fseek(f, 0, SEEK_END) != 0) { > + error_setg_errno(errp, errno, "Failed to seek to the end of file"); > + return -1; > + } > + > + size = ftell(f); > + if (size == -1) { > + error_setg_errno(errp, errno, "Failed to get offset"); > + return -1; > + } > + > + if (fseek(f, 0, SEEK_SET) != 0) { > + error_setg_errno(errp, errno, "Failed to seek back to the start"); > + return -1; > + } > + > + return size; > +} > + > +static void load_eif(MicrovmMachineState *mms, FWCfgState *fw_cfg) > +{ > + Error *err; > + char *eif_kernel, *eif_initrd, *eif_cmdline; > + MachineState *machine = MACHINE(mms); > + X86MachineState *x86ms = X86_MACHINE(mms); > + > + if (!read_eif_file(machine->kernel_filename, &eif_kernel, &eif_initrd, > + &eif_cmdline, &err)) { > + error_report_err(err); > + exit(1); > + } > + > + g_free(machine->kernel_filename); > + machine->kernel_filename = eif_kernel; > + > + /* > + * If an initrd argument was provided, let's concatenate it to the > + * extracted EIF initrd temporary file. > + */ > + if (machine->initrd_filename) { > + long size; > + size_t got; > + uint8_t *buf; > + FILE *initrd_f, *eif_initrd_f; > + > + initrd_f = fopen(machine->initrd_filename, "rb"); > + if (initrd_f == NULL) { > + error_setg_errno(&err, errno, "Failed to open initrd file %s", > + machine->initrd_filename); > + goto cleanup; > + } > + > + size = get_file_size(initrd_f, &err); > + if (size == -1) { > + goto cleanup; > + } > + > + buf = g_malloc(size); > + got = fread(buf, 1, size, initrd_f); > + if ((uint64_t) got != (uint64_t) size) { > + error_setg(&err, "Failed to read initrd file %s", > + machine->initrd_filename); > + goto cleanup; > + } > + > + eif_initrd_f = fopen(eif_initrd, "ab"); > + if (eif_initrd_f == NULL) { > + error_setg_errno(&err, errno, "Failed to open EIF initrd file %s", > + eif_initrd); > + goto cleanup; > + } > + got = fwrite(buf, 1, size, eif_initrd_f); > + if ((uint64_t) got != (uint64_t) size) { > + error_setg(&err, "Failed to append initrd %s to %s", > + machine->initrd_filename, eif_initrd); > + goto cleanup; > + } > + > + fclose(initrd_f); > + fclose(eif_initrd_f); > + > + g_free(buf); > + g_free(machine->initrd_filename); > + > + machine->initrd_filename = eif_initrd; > + } else { > + machine->initrd_filename = eif_initrd; > + } > + > + /* > + * If kernel cmdline argument was provided, let's concatenate it to the > + * extracted EIF kernel cmdline. > + */ > + if (machine->kernel_cmdline != NULL) { > + char *cmd = g_strdup_printf("%s %s", eif_cmdline, > + machine->kernel_cmdline); > + g_free(eif_cmdline); > + g_free(machine->kernel_cmdline); > + machine->kernel_cmdline = cmd; > + } else { > + machine->kernel_cmdline = eif_cmdline; > + } > + > + x86_load_linux(x86ms, fw_cfg, 0, true); > + > + unlink(machine->kernel_filename); > + unlink(machine->initrd_filename); > + return; > + > + cleanup: > + error_report_err(err); > + unlink(eif_kernel); > + unlink(eif_initrd); > + exit(1); > +} > + > static void microvm_memory_init(MicrovmMachineState *mms) > { > MachineState *machine = MACHINE(mms); > @@ -330,7 +452,17 @@ static void microvm_memory_init(MicrovmMachineState *mms) > rom_set_fw(fw_cfg); > > if (machine->kernel_filename != NULL) { > - x86_load_linux(x86ms, fw_cfg, 0, true); > + Error *err; > + bool is_eif = false; > + if (!check_if_eif_file(machine->kernel_filename, &is_eif, &err)) { > + error_report_err(err); > + exit(1); > + } > + if (is_eif) { > + load_eif(mms, fw_cfg); > + } else { > + x86_load_linux(x86ms, fw_cfg, 0, true); > + } > } > > if (mms->option_roms) { > -- > 2.39.2 > > With regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
Hi Daniel, Thanks for reviewing. On Wed, May 22, 2024 at 9:32 PM Daniel P. Berrangé <berrange@redhat.com> wrote: > > On Sat, May 18, 2024 at 02:07:52PM +0600, Dorjoy Chowdhury wrote: > > An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > > enclave[2] virtual machine. The EIF file contains the necessary > > kernel, cmdline, ramdisk(s) sections to boot. > > > > This commit adds support for loading EIF image using the microvm > > machine code. For microvm to boot from an EIF file, the kernel and > > ramdisk(s) are extracted into a temporary kernel and a temporary > > initrd file which are then hooked into the regular x86 boot mechanism > > along with the extracted cmdline. > > I can understand why you did it this way, but I feel its pretty > gross to be loading everything into memory, writing it back to > disk, and then immediately loading it all back into memory. > > The root problem is the x86_load_linux() method, which directly > accesses the machine properties: > > const char *kernel_filename = machine->kernel_filename; > const char *initrd_filename = machine->initrd_filename; > const char *dtb_filename = machine->dtb; > const char *kernel_cmdline = machine->kernel_cmdline; > > To properly handle this, I'd say we need to introduce an abstraction > for loading the kernel/inittrd/cmdlkine data. > > ie on the X86MachineClass object, we should introduce four virtual > methods > > uint8_t *(*load_kernel)(X86MachineState *machine); > uint8_t *(*load_initrd)(X86MachineState *machine); > uint8_t *(*load_dtb)(X86MachineState *machine); > uint8_t *(*load_cmdline)(X86MachineState *machine); > > The default impl of these four methods should just following the > existing logic, of reading and returning the data associated with > the kernel_filename, initrd_filename, dtb and kernel_cmdline > properties. > > The Nitro machine sub-class, however, can provide an alternative > impl of thse virtual methods which returns data directly from > the EIF file instead. > Great suggestion! I agree the implementation path you suggested would look much nicer as a whole. Now that I looked a bit into the "x86_load_linux" implementation, it looks like pretty much everything is tied to expecting a filename. For example, after reading the header from the kernel_filename x86_load_linux calls into load_multiboot, load_elf (which in turn calls load_elf64, 32) and they all expect a filename. I think I would need to refactor all of them to instead work with (uint8_t *) buffers instead, right? Also in case of initrd_filename the existing code maps the file using g_mapped_file_new to the X86MachineState member initrd_mapped_file. So that will need to be looked into and refactored. Please correct me if I misunderstood something about the way to implement your suggested approach. If I am understanding this right, this probably requires a lot of work which will also probably not be straightforward to implement or test. Right now, the way the code is, it's easy to see that the existing code paths are still correct as they are not changed and the new nitro-enclave machine code just hooks into them. As the loading to memory, writing to disk and loading back to memory only is in the execution path of the new nitro-enclave machine type, I think the way the patch is right now, is a good first implementation. What do you think? Thanks. Regards, Dorjoy
On 22.05.24 19:23, Dorjoy Chowdhury wrote: > Hi Daniel, > Thanks for reviewing. > > On Wed, May 22, 2024 at 9:32 PM Daniel P. Berrangé <berrange@redhat.com> wrote: >> On Sat, May 18, 2024 at 02:07:52PM +0600, Dorjoy Chowdhury wrote: >>> An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro >>> enclave[2] virtual machine. The EIF file contains the necessary >>> kernel, cmdline, ramdisk(s) sections to boot. >>> >>> This commit adds support for loading EIF image using the microvm >>> machine code. For microvm to boot from an EIF file, the kernel and >>> ramdisk(s) are extracted into a temporary kernel and a temporary >>> initrd file which are then hooked into the regular x86 boot mechanism >>> along with the extracted cmdline. >> I can understand why you did it this way, but I feel its pretty >> gross to be loading everything into memory, writing it back to >> disk, and then immediately loading it all back into memory. >> >> The root problem is the x86_load_linux() method, which directly >> accesses the machine properties: >> >> const char *kernel_filename = machine->kernel_filename; >> const char *initrd_filename = machine->initrd_filename; >> const char *dtb_filename = machine->dtb; >> const char *kernel_cmdline = machine->kernel_cmdline; >> >> To properly handle this, I'd say we need to introduce an abstraction >> for loading the kernel/inittrd/cmdlkine data. >> >> ie on the X86MachineClass object, we should introduce four virtual >> methods >> >> uint8_t *(*load_kernel)(X86MachineState *machine); >> uint8_t *(*load_initrd)(X86MachineState *machine); >> uint8_t *(*load_dtb)(X86MachineState *machine); >> uint8_t *(*load_cmdline)(X86MachineState *machine); >> >> The default impl of these four methods should just following the >> existing logic, of reading and returning the data associated with >> the kernel_filename, initrd_filename, dtb and kernel_cmdline >> properties. >> >> The Nitro machine sub-class, however, can provide an alternative >> impl of thse virtual methods which returns data directly from >> the EIF file instead. >> > Great suggestion! I agree the implementation path you suggested would > look much nicer as a whole. Now that I looked a bit into the > "x86_load_linux" implementation, it looks like pretty much everything > is tied to expecting a filename. For example, after reading the header > from the kernel_filename x86_load_linux calls into load_multiboot, > load_elf (which in turn calls load_elf64, 32) and they all expect a > filename. I think I would need to refactor all of them to instead work > with (uint8_t *) buffers instead, right? Also in case of > initrd_filename the existing code maps the file using > g_mapped_file_new to the X86MachineState member initrd_mapped_file. So > that will need to be looked into and refactored. Please correct me if > I misunderstood something about the way to implement your suggested > approach. > > If I am understanding this right, this probably requires a lot of work > which will also probably not be straightforward to implement or test. > Right now, the way the code is, it's easy to see that the existing > code paths are still correct as they are not changed and the new > nitro-enclave machine code just hooks into them. As the loading to > memory, writing to disk and loading back to memory only is in the > execution path of the new nitro-enclave machine type, I think the way > the patch is right now, is a good first implementation. What do you > think? I think the "real" fix here is to move all of the crufty loader logic from "file access" to "block layer access". Along with that, create a generic helper function (like this[1]) that opens all -kernel/-initrd/-dtb files through the block layer and calls a board specific callback to perform the load. With that in place, we would have a reentrant code path for the EIF case: EIF could plug into the generic x86 loader and when it detects EIF, create internal block devices that reference the existing file plus an offset/limit setting to ensure it only accesses the correct range in the target file. It can then simply reinvoke the x86 loader with the new block device objects. With that in place, we could also finally support -kernel http://.../vmlinuz command line invocations which currently only works on block devices. However, I do agree that the above is significant effort to get going and shouldn't hold back the Nitro Enclave machine model. Alex [1] https://github.com/agraf/qemu/commit/e49b7a18f2d8a386e5f207c567ad9ab2e3cb5429 Amazon Web Services Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B Sitz: Berlin Ust-ID: DE 365 538 597
Am 31.05.2024 um 09:54 hat Alexander Graf geschrieben: > > On 22.05.24 19:23, Dorjoy Chowdhury wrote: > > Hi Daniel, > > Thanks for reviewing. > > > > On Wed, May 22, 2024 at 9:32 PM Daniel P. Berrangé <berrange@redhat.com> wrote: > > > On Sat, May 18, 2024 at 02:07:52PM +0600, Dorjoy Chowdhury wrote: > > > > An EIF (Enclave Image Format)[1] image is used to boot an AWS nitro > > > > enclave[2] virtual machine. The EIF file contains the necessary > > > > kernel, cmdline, ramdisk(s) sections to boot. > > > > > > > > This commit adds support for loading EIF image using the microvm > > > > machine code. For microvm to boot from an EIF file, the kernel and > > > > ramdisk(s) are extracted into a temporary kernel and a temporary > > > > initrd file which are then hooked into the regular x86 boot mechanism > > > > along with the extracted cmdline. > > > I can understand why you did it this way, but I feel its pretty > > > gross to be loading everything into memory, writing it back to > > > disk, and then immediately loading it all back into memory. > > > > > > The root problem is the x86_load_linux() method, which directly > > > accesses the machine properties: > > > > > > const char *kernel_filename = machine->kernel_filename; > > > const char *initrd_filename = machine->initrd_filename; > > > const char *dtb_filename = machine->dtb; > > > const char *kernel_cmdline = machine->kernel_cmdline; > > > > > > To properly handle this, I'd say we need to introduce an abstraction > > > for loading the kernel/inittrd/cmdlkine data. > > > > > > ie on the X86MachineClass object, we should introduce four virtual > > > methods > > > > > > uint8_t *(*load_kernel)(X86MachineState *machine); > > > uint8_t *(*load_initrd)(X86MachineState *machine); > > > uint8_t *(*load_dtb)(X86MachineState *machine); > > > uint8_t *(*load_cmdline)(X86MachineState *machine); > > > > > > The default impl of these four methods should just following the > > > existing logic, of reading and returning the data associated with > > > the kernel_filename, initrd_filename, dtb and kernel_cmdline > > > properties. > > > > > > The Nitro machine sub-class, however, can provide an alternative > > > impl of thse virtual methods which returns data directly from > > > the EIF file instead. > > > > > Great suggestion! I agree the implementation path you suggested would > > look much nicer as a whole. Now that I looked a bit into the > > "x86_load_linux" implementation, it looks like pretty much everything > > is tied to expecting a filename. For example, after reading the header > > from the kernel_filename x86_load_linux calls into load_multiboot, > > load_elf (which in turn calls load_elf64, 32) and they all expect a > > filename. I think I would need to refactor all of them to instead work > > with (uint8_t *) buffers instead, right? Also in case of > > initrd_filename the existing code maps the file using > > g_mapped_file_new to the X86MachineState member initrd_mapped_file. So > > that will need to be looked into and refactored. Please correct me if > > I misunderstood something about the way to implement your suggested > > approach. > > > > If I am understanding this right, this probably requires a lot of work > > which will also probably not be straightforward to implement or test. > > Right now, the way the code is, it's easy to see that the existing > > code paths are still correct as they are not changed and the new > > nitro-enclave machine code just hooks into them. As the loading to > > memory, writing to disk and loading back to memory only is in the > > execution path of the new nitro-enclave machine type, I think the way > > the patch is right now, is a good first implementation. What do you > > think? > > I think the "real" fix here is to move all of the crufty loader logic from > "file access" to "block layer access". Along with that, create a generic > helper function (like this[1]) that opens all -kernel/-initrd/-dtb files > through the block layer and calls a board specific callback to perform the > load. Not sure if I would call that a "real fix", it's more like a hack. Kernel images aren't block devices and their size may not even be aligned to 512, which is the minimum block size the block layer supports. So there might be some complications around that area. That said, even if it's an abuse of the block layer, it might be a useful abuse that saves you writing quite a bit of FILE * based logic providing similar functionality, so who I am to stop you... > With that in place, we would have a reentrant code path for the EIF case: > EIF could plug into the generic x86 loader and when it detects EIF, create > internal block devices that reference the existing file plus an offset/limit > setting to ensure it only accesses the correct range in the target file. It > can then simply reinvoke the x86 loader with the new block device objects. > > With that in place, we could also finally support -kernel http://.../vmlinuz > command line invocations which currently only works on block devices. > > However, I do agree that the above is significant effort to get going and > shouldn't hold back the Nitro Enclave machine model. Kevin
© 2016 - 2024 Red Hat, Inc.