From nobody Wed Apr 15 05:43:14 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9A572CCA473 for ; Mon, 25 Jul 2022 08:39:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233718AbiGYIjv (ORCPT ); Mon, 25 Jul 2022 04:39:51 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:52852 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234087AbiGYIjt (ORCPT ); Mon, 25 Jul 2022 04:39:49 -0400 Received: from mail-pl1-x629.google.com (mail-pl1-x629.google.com [IPv6:2607:f8b0:4864:20::629]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7D6F314090 for ; Mon, 25 Jul 2022 01:39:46 -0700 (PDT) Received: by mail-pl1-x629.google.com with SMTP id y24so9768273plh.7 for ; Mon, 25 Jul 2022 01:39:46 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance-com.20210112.gappssmtp.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=l2mUZlhHXaJYVViscPjc1ubjCnfx3TCozkJkbzk0hd8=; b=0L3lA508COfe7MYrJ4i5QeT8+Aln6nIuQKE4RjJ1Lx3z6h7Hy9kUjWZ0Dtq1oiyrfw PTns7AvxZBE1CNzq4WUT071rCS1NHGXXCF7WZaZEf938VsriD1eqv04CegdB7dT6uLgC n45h8c6GSP1avg2epBxw/331XhByphoSroy9C5tUw1F2RKtd/suhkKBROlBS9PyCn5kx MBEHexqpPKHlAMcBgOVZMIZfQFopzw0/3/4nQRyWQSx9kLldSLNLCkSrOIeR7ZttkNOy 69niKl5UAS4TQhyHuFiRJXva4MR5Ab7tJcW2fxkEqXfrHoriDinHkKfs98Tc4K0u3mat MMsQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=l2mUZlhHXaJYVViscPjc1ubjCnfx3TCozkJkbzk0hd8=; b=WiTFOonwduoqsrpe4WMQQHLk4xrjZBGLtCngWwSEy7H8Za6LzV1Mg4r4oTqKmBwEKz LqIIgPyzOq3CGuIeo86Lt7Pk64AqNmEKW7R2XZxRx//oJ6coaY5HOQ/m1zjojkJA1+2N Lfea6GGDQRX+9MMfqO2ZJNkv4poz67tA/vSmaNYQL44SUaOBqeCQ80JaKkVcca/W2rix UeTDIDEudiw0pHrNG+7o5wIC35ydNM2Pr/eoC0krZ+VH6/NMCaCccRgz/mcfGw1vO9zm xWilU7WX3YXPrSfX0BxLa5rsOjeOuqqUv5rWxyfz0zt5+NsQ9mizA0Rjlp5gYBu7jqqd +WvQ== X-Gm-Message-State: AJIora8/0/mb5MKBQVffcects4GMwKHBEiBmGEz5LtQPhnBhgz9X0uSR bEvZrd16t5x//fW0HCGdhNil9Q== X-Google-Smtp-Source: AGRyM1siiSysQPlgusyaQKwzG5LJXqkZAwzcxclETgd4W9LqD/TVNYUmNWTVv5wHUkaSyZ+EVYdOQQ== X-Received: by 2002:a17:90b:4d92:b0:1f0:47e4:f36 with SMTP id oj18-20020a17090b4d9200b001f047e40f36mr13052394pjb.194.1658738385695; Mon, 25 Jul 2022 01:39:45 -0700 (PDT) Received: from C02FG34NMD6R.bytedance.net ([139.177.225.241]) by smtp.gmail.com with ESMTPSA id 128-20020a621886000000b0052abc2438f1sm8893479pfy.55.2022.07.25.01.39.38 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 25 Jul 2022 01:39:45 -0700 (PDT) From: Albert Huang Cc: "huangjie.albert" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H. Peter Anvin" , Eric Biederman , Masahiro Yamada , Michal Marek , Nick Desaulniers , "Kirill A. Shutemov" , Kuppuswamy Sathyanarayanan , Michael Roth , Nathan Chancellor , Ard Biesheuvel , Joerg Roedel , Mark Rutland , Peter Zijlstra , Sean Christopherson , Kees Cook , linux-kernel@vger.kernel.org, kexec@lists.infradead.org, linux-kbuild@vger.kernel.org Subject: [PATCH 1/4] kexec: reuse crash kernel reserved memory for normal kexec Date: Mon, 25 Jul 2022 16:38:53 +0800 Message-Id: <20220725083904.56552-2-huangjie.albert@bytedance.com> X-Mailer: git-send-email 2.30.1 (Apple Git-130) In-Reply-To: <20220725083904.56552-1-huangjie.albert@bytedance.com> References: <20220725083904.56552-1-huangjie.albert@bytedance.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable To: unlisted-recipients:; (no To-header on input) Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: "huangjie.albert" normally, for kexec reboot, each segment of the second os (such as : kernel=E3=80=81initrd=E3=80=81etc.) will be copied to discontinu= ous physical memory during kexec load. and then a memory copy will be performed when kexec -e is executed to copy each segment of the second os to contiguous physical memory, which will Affects the time the kexec switch to the new os. Therefore, if we reuse the crash kernel reserved memory for kexec. When kexec loads the second os, each segment of the second OS is directly copied to the contiguous physical memory, so there is no need to make a second copy when kexec -e is executed later. The kexec userspace tool also needs to add parameter options(-r) that support the use of reserved memory (see another patch for kexec) examples: bzimage: 53M initramfs: 28M can save aboat 40 ms, The larger the image size, the greater the time savings Signed-off-by: huangjie.albert --- include/linux/kexec.h | 9 +++++---- include/uapi/linux/kexec.h | 2 ++ kernel/kexec.c | 19 ++++++++++++++++++- kernel/kexec_core.c | 16 +++++++++------- kernel/kexec_file.c | 20 ++++++++++++++++++-- 5 files changed, 52 insertions(+), 14 deletions(-) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 475683cd67f1..9a8b9932b42a 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -305,9 +305,10 @@ struct kimage { unsigned long control_page; =20 /* Flags to indicate special processing */ - unsigned int type : 1; + unsigned int type : 2; #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 +#define KEXEC_TYPE_RESERVED_MEM 2 unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; @@ -377,14 +378,14 @@ extern int kexec_load_disabled; =20 /* List of defined/legal kexec flags */ #ifndef CONFIG_KEXEC_JUMP -#define KEXEC_FLAGS KEXEC_ON_CRASH +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_RESERVED_MEM) #else -#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT) +#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_RE= SERVED_MEM) #endif =20 /* List of defined/legal kexec file flags */ #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ - KEXEC_FILE_NO_INITRAMFS) + KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_RESERVED_MEM) =20 /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h index 981016e05cfa..c29011eb7fc2 100644 --- a/include/uapi/linux/kexec.h +++ b/include/uapi/linux/kexec.h @@ -12,6 +12,7 @@ /* kexec flags for different usage scenarios */ #define KEXEC_ON_CRASH 0x00000001 #define KEXEC_PRESERVE_CONTEXT 0x00000002 +#define KEXEC_RESERVED_MEM 0x00000004 #define KEXEC_ARCH_MASK 0xffff0000 =20 /* @@ -24,6 +25,7 @@ #define KEXEC_FILE_UNLOAD 0x00000001 #define KEXEC_FILE_ON_CRASH 0x00000002 #define KEXEC_FILE_NO_INITRAMFS 0x00000004 +#define KEXEC_FILE_RESERVED_MEM 0x00000008 =20 /* These values match the ELF architecture values. * Unless there is a good reason that should continue to be the case. diff --git a/kernel/kexec.c b/kernel/kexec.c index b5e40f069768..0d9ea52c81c1 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -27,8 +27,14 @@ static int kimage_alloc_init(struct kimage **rimage, uns= igned long entry, int ret; struct kimage *image; bool kexec_on_panic =3D flags & KEXEC_ON_CRASH; + bool kexec_on_reserved =3D flags & KEXEC_RESERVED_MEM; =20 - if (kexec_on_panic) { + if (kexec_on_panic && kexec_on_reserved) { + pr_err("both kexec_on_panic and kexec_on_reserved is true, they can not = coexist"); + return -EINVAL; + } + + if (kexec_on_panic || kexec_on_reserved) { /* Verify we have a valid entry point */ if ((entry < phys_to_boot_phys(crashk_res.start)) || (entry > phys_to_boot_phys(crashk_res.end))) @@ -50,6 +56,12 @@ static int kimage_alloc_init(struct kimage **rimage, uns= igned long entry, image->type =3D KEXEC_TYPE_CRASH; } =20 + if (kexec_on_reserved) { + /* Enable special reserved kernel control page alloc policy. */ + image->control_page =3D crashk_res.start; + image->type =3D KEXEC_TYPE_RESERVED_MEM; + } + ret =3D sanity_check_segment_list(image); if (ret) goto out_free_image; @@ -110,6 +122,11 @@ static int do_kexec_load(unsigned long entry, unsigned= long nr_segments, dest_image =3D &kexec_image; } =20 + if (flags & KEXEC_RESERVED_MEM) { + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } + if (nr_segments =3D=3D 0) { /* Uninstall image */ kimage_free(xchg(dest_image, NULL)); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 4d34c78334ce..6220c2e0d6f7 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -230,13 +230,13 @@ int sanity_check_segment_list(struct kimage *image) * Verify we have good destination addresses. Normally * the caller is responsible for making certain we don't * attempt to load the new image into invalid or reserved - * areas of RAM. But crash kernels are preloaded into a + * areas of RAM. But crash kernels (or we specify to load + * the new image into reserved areas) are preloaded into a * reserved area of ram. We must ensure the addresses * are in the reserved area otherwise preloading the * kernel could corrupt things. */ - - if (image->type =3D=3D KEXEC_TYPE_CRASH) { + if (image->type =3D=3D KEXEC_TYPE_CRASH || image->type =3D=3D KEXEC_TYPE_= RESERVED_MEM) { for (i =3D 0; i < nr_segments; i++) { unsigned long mstart, mend; =20 @@ -414,7 +414,7 @@ static struct page *kimage_alloc_normal_control_pages(s= truct kimage *image, return pages; } =20 -static struct page *kimage_alloc_crash_control_pages(struct kimage *image, +static struct page *kimage_alloc_reserverd_control_pages(struct kimage *im= age, unsigned int order) { /* Control pages are special, they are the intermediaries @@ -491,7 +491,8 @@ struct page *kimage_alloc_control_pages(struct kimage *= image, pages =3D kimage_alloc_normal_control_pages(image, order); break; case KEXEC_TYPE_CRASH: - pages =3D kimage_alloc_crash_control_pages(image, order); + case KEXEC_TYPE_RESERVED_MEM: + pages =3D kimage_alloc_reserverd_control_pages(image, order); break; } =20 @@ -846,7 +847,7 @@ static int kimage_load_normal_segment(struct kimage *im= age, return result; } =20 -static int kimage_load_crash_segment(struct kimage *image, +static int kimage_load_reserved_segment(struct kimage *image, struct kexec_segment *segment) { /* For crash dumps kernels we simply copy the data from @@ -924,7 +925,8 @@ int kimage_load_segment(struct kimage *image, result =3D kimage_load_normal_segment(image, segment); break; case KEXEC_TYPE_CRASH: - result =3D kimage_load_crash_segment(image, segment); + case KEXEC_TYPE_RESERVED_MEM: + result =3D kimage_load_reserved_segment(image, segment); break; } =20 diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f9261c07b048..5242ad7e5302 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -277,7 +277,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kern= el_fd, int ret; struct kimage *image; bool kexec_on_panic =3D flags & KEXEC_FILE_ON_CRASH; - + bool kexec_on_reserved =3D flags & KEXEC_FILE_RESERVED_MEM; image =3D do_kimage_alloc_init(); if (!image) return -ENOMEM; @@ -290,6 +290,12 @@ kimage_file_alloc_init(struct kimage **rimage, int ker= nel_fd, image->type =3D KEXEC_TYPE_CRASH; } =20 + if (kexec_on_reserved) { + /* Enable special crash kernel control page alloc policy. */ + image->control_page =3D crashk_res.start; + image->type =3D KEXEC_TYPE_RESERVED_MEM; + } + ret =3D kimage_file_prepare_segments(image, kernel_fd, initrd_fd, cmdline_ptr, cmdline_len, flags); if (ret) @@ -346,6 +352,11 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, = initrd_fd, if (!mutex_trylock(&kexec_mutex)) return -EBUSY; =20 + if ((flags & KEXEC_FILE_ON_CRASH) && (flags & KEXEC_FILE_RESERVED_MEM)) { + pr_err("both kexec_on_panic and kexec_on_reserved is true, they can not = coexist"); + return -EINVAL; + } + dest_image =3D &kexec_image; if (flags & KEXEC_FILE_ON_CRASH) { dest_image =3D &kexec_crash_image; @@ -353,6 +364,11 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, = initrd_fd, arch_kexec_unprotect_crashkres(); } =20 + if (flags & KEXEC_FILE_RESERVED_MEM) { + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } + if (flags & KEXEC_FILE_UNLOAD) goto exchange; =20 @@ -588,7 +604,7 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf, static int kexec_walk_resources(struct kexec_buf *kbuf, int (*func)(struct resource *, void *)) { - if (kbuf->image->type =3D=3D KEXEC_TYPE_CRASH) + if (kbuf->image->type =3D=3D KEXEC_TYPE_CRASH || kbuf->image->type =3D=3D= KEXEC_TYPE_RESERVED_MEM) return walk_iomem_res_desc(crashk_res.desc, IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, crashk_res.start, crashk_res.end, --=20 2.31.1 From nobody Wed Apr 15 05:43:14 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1DB77C43334 for ; Mon, 25 Jul 2022 08:40:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234118AbiGYIkQ (ORCPT ); Mon, 25 Jul 2022 04:40:16 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53298 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234095AbiGYIkF (ORCPT ); Mon, 25 Jul 2022 04:40:05 -0400 Received: from mail-pj1-x102e.google.com (mail-pj1-x102e.google.com [IPv6:2607:f8b0:4864:20::102e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3A97814095 for ; Mon, 25 Jul 2022 01:40:04 -0700 (PDT) Received: by mail-pj1-x102e.google.com with SMTP id ku18so9756942pjb.2 for ; Mon, 25 Jul 2022 01:40:04 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance-com.20210112.gappssmtp.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=kHG5WLJXoefEp1rBIYi3cGiQrYB7FX8IJTWGZ213f6w=; b=oKvYNiy1a+B/y9HqhMS1eeDAuKnP/B7jW4Edzu3YQyc2TAbENzdFRYsH/9lNY8kWnv mDlx1UTjYca33UMNa2+p/+myi0hxsLQCasyKkMq1x8zl+YEaPYMhwxaMj58FfiJQkuem oY7MdUKQxELSYp92yDUBkKgkRKS8izlCQYk+7v76P0U8k8YBea+7YQXsWcTyurNEw6+h g7Hjv+yxgV99jZT+HZxNmp1iEIFgYtKfT6SxHsrN3SkrcdYdnshla+dW249GJ/IUi3U+ WuJc6bRxEjWXYuxp5MZLGXyU+w+1HAYCMZ0gYpDElGaSKspUa6sKF1m4l4DG677dRYU2 G8NA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=kHG5WLJXoefEp1rBIYi3cGiQrYB7FX8IJTWGZ213f6w=; b=EJVV0UzpgyifHQuJ0X8NR0Sxq54jtjSE9GnMv9oQjNPFhhDwW+Lz5R80EmHDsRSGPK pG6eG8x6D6/1bnIU9Pvh5KOA1ncpiba6HK1W1fcOH2F3kVNEtr/nQF0/a9wTuceme3AJ WLychrYobB9Z12Z7/Jqw7airc4sN22UO9nG+BLCUZnT3hyE08ZxsSOFhADCGet4CDIKR OjeNv3+hvvEqIoN0AG9jGv/+qQI8yTPwRpo0yHHWU5pXWE0P02TmZVo8b9LXSOK0s58N wNsBGTLV1wKBRqrXCY1Gh9kD0o9pAnI2YWsfiCGYwdtRkfiwPSJ8yfQJ5T0oMGQR6X5q 0Gjg== X-Gm-Message-State: AJIora+NtOkE/uw2YqYEX2tGBGL4pctWiQlcSEJhd5jaeEgZBpSS7y5X 3ge0Vt2Vc1B+AiD6GzDYVNVVWw== X-Google-Smtp-Source: AGRyM1sbSkqOWAM8qvsfiZmn6OqYnP2aJAsp+Ac/PTrZCoO3LlsIuCdT88SmUqcbCQVOLPgNlQ5J8g== X-Received: by 2002:a17:90a:c4f:b0:1df:a178:897f with SMTP id u15-20020a17090a0c4f00b001dfa178897fmr13156990pje.19.1658738403592; Mon, 25 Jul 2022 01:40:03 -0700 (PDT) Received: from C02FG34NMD6R.bytedance.net ([139.177.225.241]) by smtp.gmail.com with ESMTPSA id 128-20020a621886000000b0052abc2438f1sm8893479pfy.55.2022.07.25.01.39.55 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 25 Jul 2022 01:40:03 -0700 (PDT) From: Albert Huang Cc: "huangjie.albert" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H. Peter Anvin" , Eric Biederman , Masahiro Yamada , Michal Marek , Nick Desaulniers , "Kirill A. Shutemov" , Brijesh Singh , Michael Roth , Nathan Chancellor , Kuppuswamy Sathyanarayanan , Ard Biesheuvel , Peter Zijlstra , Sean Christopherson , Joerg Roedel , Mark Rutland , Kees Cook , linux-kernel@vger.kernel.org, kexec@lists.infradead.org, linux-kbuild@vger.kernel.org Subject: [PATCH 2/4] kexec: add CONFING_KEXEC_PURGATORY_SKIP_SIG Date: Mon, 25 Jul 2022 16:38:54 +0800 Message-Id: <20220725083904.56552-3-huangjie.albert@bytedance.com> X-Mailer: git-send-email 2.30.1 (Apple Git-130) In-Reply-To: <20220725083904.56552-1-huangjie.albert@bytedance.com> References: <20220725083904.56552-1-huangjie.albert@bytedance.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable To: unlisted-recipients:; (no To-header on input) Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: "huangjie.albert" the verify_sha256_digest may cost 300+ ms in my test environment: bzImage: 53M initramfs:28M We can add a macro to control whether to enable this check. If we can confirm that the data in this will not change, we can turn off the check and get a faster startup. Signed-off-by: huangjie.albert --- arch/x86/Kconfig | 9 +++++++++ arch/x86/purgatory/purgatory.c | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52a7f91527fe..adbd3a2bd60f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2052,6 +2052,15 @@ config KEXEC_BZIMAGE_VERIFY_SIG help Enable bzImage signature verification support. =20 +config KEXEC_PURGATORY_SKIP_SIG + bool "skip kexec purgatory signature verification" + depends on ARCH_HAS_KEXEC_PURGATORY + help + this options makes the kexec purgatory do not signature verification + which would get hundreds of milliseconds saved during kexec boot. If we= can + confirm that the data of each segment loaded by kexec will not change w= e may + enable this option + config CRASH_DUMP bool "kernel crash dumps" depends on X86_64 || (X86_32 && HIGHMEM) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7558139920f8..b3f15774d86d 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -20,6 +20,12 @@ u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section= (".kexec-purgatory"); =20 struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section= (".kexec-purgatory"); =20 +#ifdef CONFIG_KEXEC_PURGATORY_SKIP_SIG +static int verify_sha256_digest(void) +{ + return 0; +} +#else static int verify_sha256_digest(void) { struct kexec_sha_region *ptr, *end; @@ -39,6 +45,7 @@ static int verify_sha256_digest(void) =20 return 0; } +#endif =20 void purgatory(void) { --=20 2.31.1 From nobody Wed Apr 15 05:43:14 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 26BEDCCA473 for ; Mon, 25 Jul 2022 08:40:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233720AbiGYIk2 (ORCPT ); Mon, 25 Jul 2022 04:40:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:53296 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S234094AbiGYIkX (ORCPT ); Mon, 25 Jul 2022 04:40:23 -0400 Received: from mail-pl1-x62a.google.com (mail-pl1-x62a.google.com [IPv6:2607:f8b0:4864:20::62a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id CAEAAC32 for ; Mon, 25 Jul 2022 01:40:22 -0700 (PDT) Received: by mail-pl1-x62a.google.com with SMTP id r8so1412938plh.8 for ; Mon, 25 Jul 2022 01:40:22 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance-com.20210112.gappssmtp.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=kSe97Tx8ctZCEn1X6J3jTXp+4FQKjeXptEPACGdKsu4=; b=38IdOZqiUSSjfpSN5DaswpKLzz++/3lQOzoZC4Ut2U51DggOBUm5pTe/PQ/doSkYuO y0+Wke94niGekams7+xaCPk/FKZYHAbVOR/JI/jY1NFxusaEy64k1b5dvUMuC07SCRbd Wd5FXomw1uG5DwpKUr10bW4d/vWWl3qe7nnxJLhw7kwFUk/ykI0+SDW6ppITZPSCj8mn CHxh92xmN0EykOCigMNAHgEBRqJKhZlAeZ0MJ5tDaYaBgXI+OuI/Lc3H4+H+vQ/z40do s3Z8YXe9qLVk/neXJ5qFMNQ+WPNhOAvEfTWU8f3N5elGga2x1HdMtFs+Rt2e8yACwh1H +iUQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=kSe97Tx8ctZCEn1X6J3jTXp+4FQKjeXptEPACGdKsu4=; b=NvVsnD3sB1Gl5DnCgDrRxwzmEwvChwIBe37Xb2/YSexUGfZ0aoirxV2vnnRbhBrsmu oFIFYsF1esUjSqBuyMC1J2pxNFgriH6QrHPdCeJfYTmXYR3qr4AxDUGH2gqggsVgooQI OTGFuRFQH5o6ZPN1701/hHMyyCRsLecLL8o2Zy6oE+dMseU0Dvzy1hB5ywwlbvuYrkps O1yxi+OfjPEQxQW/+AdEXgxHHFb2t0+LaBDI8tsPVRlSnIGbBXOygs+MFtiaqGYZvE/c Us0D6jyZo4kH4zBWUCEXu/WVr7g8lxE7ZPOeDaQprqnDwDuiKjOG17M24TSIcLtyucCn bh2w== X-Gm-Message-State: AJIora9QLGOZR/JY4GwCMF+F8MFYzp7zb6wvnL1OuPrfuEfe/+020QFA e/rH8j0KFJZHWU6REmIx1yLOuA== X-Google-Smtp-Source: AGRyM1svbozEeNZXu0H7s4TemfoTbDBbW/i8GL2DnVrh9Dqh48UuMfLqkyU6b9gNMerxnzQoh3m1FA== X-Received: by 2002:a17:90a:5508:b0:1f2:bedd:980e with SMTP id b8-20020a17090a550800b001f2bedd980emr2882794pji.178.1658738422352; Mon, 25 Jul 2022 01:40:22 -0700 (PDT) Received: from C02FG34NMD6R.bytedance.net ([139.177.225.241]) by smtp.gmail.com with ESMTPSA id 128-20020a621886000000b0052abc2438f1sm8893479pfy.55.2022.07.25.01.40.14 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 25 Jul 2022 01:40:22 -0700 (PDT) From: Albert Huang Cc: "huangjie.albert" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H. Peter Anvin" , Eric Biederman , Masahiro Yamada , Michal Marek , Nick Desaulniers , "Kirill A. Shutemov" , Kuppuswamy Sathyanarayanan , Tony Luck , Michael Roth , Nathan Chancellor , Ard Biesheuvel , Mark Rutland , Joerg Roedel , Sean Christopherson , Peter Zijlstra , Kees Cook , linux-kernel@vger.kernel.org, kexec@lists.infradead.org, linux-kbuild@vger.kernel.org Subject: [PATCH 3/4] x86: Support the uncompressed kernel to speed up booting Date: Mon, 25 Jul 2022 16:38:55 +0800 Message-Id: <20220725083904.56552-4-huangjie.albert@bytedance.com> X-Mailer: git-send-email 2.30.1 (Apple Git-130) In-Reply-To: <20220725083904.56552-1-huangjie.albert@bytedance.com> References: <20220725083904.56552-1-huangjie.albert@bytedance.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable To: unlisted-recipients:; (no To-header on input) Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: "huangjie.albert" Although the compressed kernel can save the time of loading the kernel into the memory and save the disk space for storing the kernel, but in some time-sensitive scenarios, the time for decompressing the kernel is intolerable. Therefore, it is necessary to support uncompressed kernel images, so that the time of kernel decompression can be saved when the kernel is started. This part of the time on my machine is approximately: image type image size times compressed(gzip) 8.5M 159ms uncompressed 53M 8.5ms Signed-off-by: huangjie.albert --- arch/x86/Kconfig | 1 + arch/x86/boot/compressed/Makefile | 5 ++++- arch/x86/boot/compressed/misc.c | 13 +++++++++++++ scripts/Makefile.lib | 5 +++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index adbd3a2bd60f..231187624c68 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -221,6 +221,7 @@ config X86 select HAVE_KERNEL_LZO select HAVE_KERNEL_XZ select HAVE_KERNEL_ZSTD + select HAVE_KERNEL_UNCOMPRESSED select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE select HAVE_FUNCTION_ERROR_INJECTION diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/M= akefile index 19e1905dcbf6..0c8417a2f792 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -26,7 +26,7 @@ OBJECT_FILES_NON_STANDARD :=3D y KCOV_INSTRUMENT :=3D n =20 targets :=3D vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bi= n.lzma \ - vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst + vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst vmlinux.bi= n.none =20 # CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in # case of cross compiling, as it has the '--target=3D' flag, which is need= ed to @@ -139,6 +139,8 @@ $(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE $(call if_changed,lz4_with_size) $(obj)/vmlinux.bin.zst: $(vmlinux.bin.all-y) FORCE $(call if_changed,zstd22_with_size) +$(obj)/vmlinux.bin.none: $(vmlinux.bin.all-y) FORCE + $(call if_changed,none) =20 suffix-$(CONFIG_KERNEL_GZIP) :=3D gz suffix-$(CONFIG_KERNEL_BZIP2) :=3D bz2 @@ -147,6 +149,7 @@ suffix-$(CONFIG_KERNEL_XZ) :=3D xz suffix-$(CONFIG_KERNEL_LZO) :=3D lzo suffix-$(CONFIG_KERNEL_LZ4) :=3D lz4 suffix-$(CONFIG_KERNEL_ZSTD) :=3D zst +suffix-$(CONFIG_KERNEL_UNCOMPRESSED) :=3D none =20 quiet_cmd_mkpiggy =3D MKPIGGY $@ cmd_mkpiggy =3D $(obj)/mkpiggy $< > $@ diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/mis= c.c index cf690d8712f4..c23c0f525d93 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -181,6 +181,19 @@ void __puthex(unsigned long value) } } =20 +#ifdef CONFIG_KERNEL_UNCOMPRESSED +#include +static int __decompress(unsigned char *buf, long len, + long (*fill)(void*, unsigned long), + long (*flush)(void*, unsigned long), + unsigned char *outbuf, long olen, + long *pos, void (*error)(char *x)) +{ + memcpy(outbuf, buf, olen); + return 0; +} +#endif + #ifdef CONFIG_X86_NEED_RELOCS static void handle_relocations(void *output, unsigned long output_len, unsigned long virt_addr) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 3fb6a99e78c4..c89d5466c617 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -438,6 +438,11 @@ quiet_cmd_lz4 =3D LZ4 $@ quiet_cmd_lz4_with_size =3D LZ4 $@ cmd_lz4_with_size =3D { cat $(real-prereqs) | $(LZ4) -l -c1 stdin st= dout; \ $(size_append); } > $@ +# none +quiet_cmd_none =3D NONE $@ + cmd_none =3D (cat $(filter-out FORCE,$^) && \ + $(call size_append, $(filter-out FORCE,$^))) > $@ || \ + (rm -f $@ ; false) =20 # U-Boot mkimage # ------------------------------------------------------------------------= --- --=20 2.31.1 From nobody Wed Apr 15 05:43:14 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 27012C433EF for ; Mon, 25 Jul 2022 08:40:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S234110AbiGYIko (ORCPT ); Mon, 25 Jul 2022 04:40:44 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54602 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233167AbiGYIkl (ORCPT ); Mon, 25 Jul 2022 04:40:41 -0400 Received: from mail-pj1-x1035.google.com (mail-pj1-x1035.google.com [IPv6:2607:f8b0:4864:20::1035]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0DB5AF16 for ; Mon, 25 Jul 2022 01:40:40 -0700 (PDT) Received: by mail-pj1-x1035.google.com with SMTP id gq7so9767577pjb.1 for ; Mon, 25 Jul 2022 01:40:40 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=bytedance-com.20210112.gappssmtp.com; s=20210112; h=from:to:cc:subject:date:message-id:in-reply-to:references :mime-version:content-transfer-encoding; bh=JRx05kPh3Ii0mpG7hTQJSpxflPSjOu6HTrFUf3L55sY=; b=b/gJd3iUl8OERmp+WyI5dC15jE1/7lh4kN0ML2m8TDjjg5+A09Zs5c/jcux/umjCNB E9b2j3m11+23MfAV2gUuOIwI4Tafw/ldONvsGh4u2J8Vpdcvltmgs6t+k5KXpZzo0VZ9 Y1b6DFFSZs/j7GucnJod6cEQ+zQ5+1cbLlm3Gk94Vu/bqLd4ImYU+t3/WYn7oY+2dhKr B5ZHR1BDmNOKRNaElyjk4HYwq9sdhws0NKdtm4Z0V+ls3QKwWfZFpeMSsVggUD+tsR9v ydOTI29C1Ubsavi5EufYYWVC6mJ2f1KC1HCQCHSHAQ8jJXNmCUzdmst8S5xXiUUZic+Q lmXg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references:mime-version:content-transfer-encoding; bh=JRx05kPh3Ii0mpG7hTQJSpxflPSjOu6HTrFUf3L55sY=; b=jlJm4TKI0Vt4bVvZTu5uddykoH/tnBJfE1YyWCs7ryRu8o4G1RTZz4FGz7xIXv0SbM RDMWKu3q44IDYWr9fI7JBhQNa0ymuWQRphs5OrsqNUb4XfhAiBChvTDFTbY0XWH7f8tz AO/3d4+/G0c8zzIS53QGiBCKLdtdZPw8qMIJ+BChz6MSDKltEoUg9WDziRWfH3JE4G36 DGElONZ49sFuszDni4kMF/ipIZizMLopAXAtjkfOdbQb+yr8HEoCFG4z1k/IUTsQzvj3 OFIn/l0I83o59oSnUXAgs91uhJoeBp9vu6xaTKDRQmuQZ9f73Chor2hDS03Y0H21iDfE SX8w== X-Gm-Message-State: AJIora8d33KnmPY3WNgE8otwHi4ayQVH+2XL0rBWANNRKlWdQ4x3rn5/ w9LpPtIckCIKHo54T1xFrmVgrQ== X-Google-Smtp-Source: AGRyM1vlctvOgVdFh/Fgb7CwxXla4DK2/n69hV3Zju/JQYHyXfZs1xGC4mXFAdMg4Fh/tEID47FsXQ== X-Received: by 2002:a17:902:c401:b0:16d:8109:4f18 with SMTP id k1-20020a170902c40100b0016d81094f18mr2037084plk.32.1658738439453; Mon, 25 Jul 2022 01:40:39 -0700 (PDT) Received: from C02FG34NMD6R.bytedance.net ([139.177.225.241]) by smtp.gmail.com with ESMTPSA id 128-20020a621886000000b0052abc2438f1sm8893479pfy.55.2022.07.25.01.40.31 (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); Mon, 25 Jul 2022 01:40:39 -0700 (PDT) From: Albert Huang Cc: "huangjie.albert" , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, "H. Peter Anvin" , Eric Biederman , Masahiro Yamada , Michal Marek , Nick Desaulniers , "Kirill A. Shutemov" , Kuppuswamy Sathyanarayanan , Michael Roth , Nathan Chancellor , Ard Biesheuvel , Mark Rutland , Sean Christopherson , Peter Zijlstra , Kees Cook , Tony Luck , linux-kernel@vger.kernel.org, kexec@lists.infradead.org, linux-kbuild@vger.kernel.org Subject: [PATCH 4/4] x86: boot: avoid memory copy if kernel is uncompressed Date: Mon, 25 Jul 2022 16:38:56 +0800 Message-Id: <20220725083904.56552-5-huangjie.albert@bytedance.com> X-Mailer: git-send-email 2.30.1 (Apple Git-130) In-Reply-To: <20220725083904.56552-1-huangjie.albert@bytedance.com> References: <20220725083904.56552-1-huangjie.albert@bytedance.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable To: unlisted-recipients:; (no To-header on input) Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: "huangjie.albert" 1=E3=80=81if kernel is uncompressed. we do not need to relocate kernel image for decompression 2=E3=80=81if kaslr is disabled, we do not need to do a memory copy before prase_elf. Two memory copies can be skipped with this patch. this can save aboat 20ms during booting. Signed-off-by: huangjie.albert --- arch/x86/boot/compressed/head_64.S | 8 ++++++-- arch/x86/boot/compressed/misc.c | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/= head_64.S index d33f060900d2..9e7770c7047b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -398,10 +398,13 @@ SYM_CODE_START(startup_64) 1: =20 /* Target address to relocate to for decompression */ +#ifdef CONFIG_KERNEL_UNCOMPRESSED + movq %rbp, %rbx +#else movl BP_init_size(%rsi), %ebx subl $ rva(_end), %ebx addq %rbp, %rbx - +#endif /* Set up the stack */ leaq rva(boot_stack_end)(%rbx), %rsp =20 @@ -522,6 +525,7 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ +#ifndef CONFIG_KERNEL_UNCOMPRESSED pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi @@ -531,7 +535,7 @@ trampoline_return: rep movsq cld popq %rsi - +#endif /* * The GDT may get overwritten either during the copy we just did or * during extract_kernel below. To avoid any issues, repoint the GDTR diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/mis= c.c index c23c0f525d93..d8445562d4e9 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -290,7 +290,7 @@ static inline void handle_relocations(void *output, uns= igned long output_len, { } #endif =20 -static void parse_elf(void *output) +static void parse_elf(void *output, void *input) { #ifdef CONFIG_X86_64 Elf64_Ehdr ehdr; @@ -302,7 +302,7 @@ static void parse_elf(void *output) void *dest; int i; =20 - memcpy(&ehdr, output, sizeof(ehdr)); + memcpy(&ehdr, input, sizeof(ehdr)); if (ehdr.e_ident[EI_MAG0] !=3D ELFMAG0 || ehdr.e_ident[EI_MAG1] !=3D ELFMAG1 || ehdr.e_ident[EI_MAG2] !=3D ELFMAG2 || @@ -317,7 +317,7 @@ static void parse_elf(void *output) if (!phdrs) error("Failed to allocate space for phdrs"); =20 - memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); + memcpy(phdrs, input + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum); =20 for (i =3D 0; i < ehdr.e_phnum; i++) { phdr =3D &phdrs[i]; @@ -334,7 +334,7 @@ static void parse_elf(void *output) #else dest =3D (void *)(phdr->p_paddr); #endif - memmove(dest, output + phdr->p_offset, phdr->p_filesz); + memmove(dest, input + phdr->p_offset, phdr->p_filesz); break; default: /* Ignore other PT_* */ break; } @@ -467,9 +467,21 @@ asmlinkage __visible void *extract_kernel(void *rmode,= memptr heap, #endif =20 debug_putstr("\nDecompressing Linux... "); + +#ifdef CONFIG_KERNEL_UNCOMPRESSED + if (cmdline_find_option_bool("nokaslr")) { + parse_elf(output, input_data); + } else { + __decompress(input_data, input_len, NULL, NULL, output, output_len, + NULL, error); + parse_elf(output, output); + } +#else __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); - parse_elf(output); + parse_elf(output, output); +#endif + handle_relocations(output, output_len, virt_addr); debug_putstr("done.\nBooting the kernel.\n"); =20 --=20 2.31.1