1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Start refactoring the x86 startup code so we keep all the code that is | 3 | !! NOTE: patches #7 - #10 depend on [0] !! |
4 | shared between different boot stages (EFI stub, decompressor, early | ||
5 | startup in the core kernel *) and/or needs to be built in a special way | ||
6 | (due to the fact that it is C code that runs from the 1:1 mapping of | ||
7 | RAM) in a single place, sharing all the C flags and other runes that are | ||
8 | needed to disable instrumentation, sanitizers, etc. | ||
9 | 4 | ||
10 | This is an RFC so I have left some things for later, e.g., the SEV-SNP | 5 | Reorganize C code that is used during early boot, either in the |
11 | init code in arch/x86/coco that is shared between all of the above [*] | 6 | decompressor/EFI stub or the kernel proper, but before the kernel |
12 | and will be tricky to disentangle; there are also some known issues in | 7 | virtual mapping is up. |
13 | that code related to EFI boot that we are addressing in parallel. | 8 | |
9 | v4: | ||
10 | - drop patches that were queued up | ||
11 | - fix address space error in patch #1 | ||
12 | - add patches for SEV-SNP boot code - these cannot be applied yet, but | ||
13 | are included for completeness | ||
14 | |||
15 | v3: | ||
16 | - keep rip_rel_ptr() around in PIC code - sadly, it is still needed in | ||
17 | some cases | ||
18 | - remove RIP_REL_REF() uses in separate patches | ||
19 | - keep __head annotations for now, they will all be removed later | ||
20 | - disable objtool validation for library objects (i.e., pieces that are | ||
21 | not linked into vmlinux) | ||
22 | |||
23 | I will follow up with a series that gets rid of .head.text altogether, | ||
24 | as it will no longer be needed at all once the startup code is checked | ||
25 | for absolute relocations. | ||
14 | 26 | ||
15 | Cc: Tom Lendacky <thomas.lendacky@amd.com> | 27 | Cc: Tom Lendacky <thomas.lendacky@amd.com> |
16 | Cc: Dionna Amalie Glaze <dionnaglaze@google.com> | 28 | Cc: Dionna Amalie Glaze <dionnaglaze@google.com> |
17 | Cc: Kevin Loughlin <kevinloughlin@google.com> | 29 | Cc: Kevin Loughlin <kevinloughlin@google.com> |
18 | 30 | ||
19 | Ard Biesheuvel (6): | 31 | [0] https://lore.kernel.org/all/20250410132850.3708703-2-ardb+git@google.com/T/#u |
20 | x86/boot/compressed: Merge local pgtable.h include into asm/boot.h | 32 | |
21 | x86/boot: Move 5-level paging trampoline into startup code | 33 | Ard Biesheuvel (11): |
22 | x86/boot: Move EFI mixed mode startup code back under arch/x86 | 34 | x86/asm: Make rip_rel_ptr() usable from fPIC code |
23 | x86/boot: Move early GDT/IDT setup code into startup/ | 35 | x86/boot: Move the early GDT/IDT setup code into startup/ |
24 | x86/boot: Move early kernel mapping code into startup/ | 36 | x86/boot: Move early kernel mapping code into startup/ |
37 | x86/boot: Drop RIP_REL_REF() uses from early mapping code | ||
25 | x86/boot: Move early SME init code into startup/ | 38 | x86/boot: Move early SME init code into startup/ |
39 | x86/boot: Drop RIP_REL_REF() uses from SME startup code | ||
40 | x86/sev: Prepare for splitting off early SEV code | ||
41 | x86/sev: Split off startup code from core code | ||
42 | x86/boot: Move SEV startup code into startup/ | ||
43 | x86/boot: Drop RIP_REL_REF() uses from early SEV code | ||
44 | x86/asm: Retire RIP_REL_REF() | ||
26 | 45 | ||
27 | arch/x86/Makefile | 1 + | 46 | arch/x86/boot/compressed/Makefile | 2 +- |
28 | arch/x86/boot/compressed/Makefile | 4 +- | 47 | arch/x86/boot/compressed/sev.c | 17 +- |
29 | arch/x86/boot/compressed/head_64.S | 1 - | 48 | arch/x86/boot/startup/Makefile | 16 + |
30 | arch/x86/boot/compressed/misc.c | 1 - | 49 | arch/x86/boot/startup/gdt_idt.c | 84 + |
31 | arch/x86/boot/compressed/pgtable.h | 18 -- | 50 | arch/x86/boot/startup/map_kernel.c | 225 +++ |
32 | arch/x86/boot/compressed/pgtable_64.c | 1 - | 51 | arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} | 375 +---- |
33 | arch/x86/boot/startup/Makefile | 22 ++ | 52 | arch/x86/boot/startup/sev-startup.c | 1395 ++++++++++++++++ |
34 | drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S | 0 | 53 | arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 19 +- |
35 | arch/x86/boot/startup/gdt_idt.c | 82 ++++++ | 54 | arch/x86/coco/sev/Makefile | 19 - |
36 | arch/x86/boot/{compressed => startup}/la57toggle.S | 1 - | 55 | arch/x86/coco/sev/core.c | 1726 ++++---------------- |
37 | arch/x86/boot/startup/map_kernel.c | 232 +++++++++++++++ | 56 | arch/x86/include/asm/asm.h | 5 - |
38 | arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 45 ++- | 57 | arch/x86/include/asm/coco.h | 2 +- |
39 | arch/x86/include/asm/boot.h | 10 + | 58 | arch/x86/include/asm/mem_encrypt.h | 2 +- |
40 | arch/x86/include/asm/mem_encrypt.h | 2 +- | 59 | arch/x86/include/asm/sev-internal.h | 112 ++ |
41 | arch/x86/kernel/head64.c | 302 +------------------- | 60 | arch/x86/include/asm/sev.h | 37 + |
42 | arch/x86/mm/Makefile | 6 - | 61 | arch/x86/kernel/head64.c | 285 +--- |
43 | drivers/firmware/efi/libstub/Makefile | 1 - | 62 | arch/x86/mm/Makefile | 6 - |
44 | 17 files changed, 372 insertions(+), 357 deletions(-) | 63 | 17 files changed, 2208 insertions(+), 2119 deletions(-) |
45 | delete mode 100644 arch/x86/boot/compressed/pgtable.h | ||
46 | create mode 100644 arch/x86/boot/startup/Makefile | ||
47 | rename drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S (100%) | ||
48 | create mode 100644 arch/x86/boot/startup/gdt_idt.c | 64 | create mode 100644 arch/x86/boot/startup/gdt_idt.c |
49 | rename arch/x86/boot/{compressed => startup}/la57toggle.S (99%) | ||
50 | create mode 100644 arch/x86/boot/startup/map_kernel.c | 65 | create mode 100644 arch/x86/boot/startup/map_kernel.c |
51 | rename arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} (92%) | 66 | rename arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} (78%) |
67 | create mode 100644 arch/x86/boot/startup/sev-startup.c | ||
68 | rename arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} (97%) | ||
69 | create mode 100644 arch/x86/include/asm/sev-internal.h | ||
52 | 70 | ||
53 | -- | 71 | -- |
54 | 2.49.0.472.ge94155a9ec-goog | 72 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Ard Biesheuvel <ardb@kernel.org> | |
2 | |||
3 | RIP_REL_REF() is used in non-PIC C code that is called very early, | ||
4 | before the kernel virtual mapping is up, which is the mapping that the | ||
5 | linker expects. It is currently used in two different ways: | ||
6 | - to refer to the value of a global variable, including as an lvalue in | ||
7 | assignments; | ||
8 | - to take the address of a global variable via the mapping that the code | ||
9 | currently executes at. | ||
10 | |||
11 | The former case is only needed in non-PIC code, as PIC code will never | ||
12 | use absolute symbol references when the address of the symbol is not | ||
13 | being used. But taking the address of a variable in PIC code may still | ||
14 | require extra care, as a stack allocated struct assignment may be | ||
15 | emitted as a memcpy() from a statically allocated copy in .rodata. | ||
16 | |||
17 | For instance, this | ||
18 | |||
19 | void startup_64_setup_gdt_idt(void) | ||
20 | { | ||
21 | struct desc_ptr startup_gdt_descr = { | ||
22 | .address = (__force unsigned long)gdt_page.gdt, | ||
23 | .size = GDT_SIZE - 1, | ||
24 | }; | ||
25 | |||
26 | may result in an absolute symbol reference in PIC code, even though the | ||
27 | struct is allocated on the stack and populated at runtime. | ||
28 | |||
29 | To address this case, make rip_rel_ptr() accessible in PIC code, and | ||
30 | update any existing uses where the address of a global variable is | ||
31 | taken using RIP_REL_REF. | ||
32 | |||
33 | Once all code of this nature has been moved into arch/x86/boot/startup | ||
34 | and built with -fPIC, RIP_REL_REF() can be retired, and only | ||
35 | rip_rel_ptr() will remain. | ||
36 | |||
37 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
38 | --- | ||
39 | arch/x86/coco/sev/core.c | 2 +- | ||
40 | arch/x86/coco/sev/shared.c | 4 ++-- | ||
41 | arch/x86/include/asm/asm.h | 2 +- | ||
42 | arch/x86/kernel/head64.c | 24 ++++++++++---------- | ||
43 | arch/x86/mm/mem_encrypt_identity.c | 6 ++--- | ||
44 | 5 files changed, 19 insertions(+), 19 deletions(-) | ||
45 | |||
46 | diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c | ||
47 | index XXXXXXX..XXXXXXX 100644 | ||
48 | --- a/arch/x86/coco/sev/core.c | ||
49 | +++ b/arch/x86/coco/sev/core.c | ||
50 | @@ -XXX,XX +XXX,XX @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info) | ||
51 | * kernel was loaded (physbase), so the get the CA address using | ||
52 | * RIP-relative addressing. | ||
53 | */ | ||
54 | - pa = (u64)&RIP_REL_REF(boot_svsm_ca_page); | ||
55 | + pa = (u64)rip_rel_ptr(&boot_svsm_ca_page); | ||
56 | |||
57 | /* | ||
58 | * Switch over to the boot SVSM CA while the current CA is still | ||
59 | diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c | ||
60 | index XXXXXXX..XXXXXXX 100644 | ||
61 | --- a/arch/x86/coco/sev/shared.c | ||
62 | +++ b/arch/x86/coco/sev/shared.c | ||
63 | @@ -XXX,XX +XXX,XX @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid | ||
64 | */ | ||
65 | static const struct snp_cpuid_table *snp_cpuid_get_table(void) | ||
66 | { | ||
67 | - return &RIP_REL_REF(cpuid_table_copy); | ||
68 | + return rip_rel_ptr(&cpuid_table_copy); | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | @@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) | ||
73 | * routine is running identity mapped when called, both by the decompressor | ||
74 | * code and the early kernel code. | ||
75 | */ | ||
76 | - if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1)) | ||
77 | + if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1)) | ||
78 | return false; | ||
79 | |||
80 | /* | ||
81 | diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/arch/x86/include/asm/asm.h | ||
84 | +++ b/arch/x86/include/asm/asm.h | ||
85 | @@ -XXX,XX +XXX,XX @@ | ||
86 | #endif | ||
87 | |||
88 | #ifndef __ASSEMBLER__ | ||
89 | -#ifndef __pic__ | ||
90 | static __always_inline __pure void *rip_rel_ptr(void *p) | ||
91 | { | ||
92 | asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p)); | ||
93 | |||
94 | return p; | ||
95 | } | ||
96 | +#ifndef __pic__ | ||
97 | #define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) | ||
98 | #else | ||
99 | #define RIP_REL_REF(var) (var) | ||
100 | diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/arch/x86/kernel/head64.c | ||
103 | +++ b/arch/x86/kernel/head64.c | ||
104 | @@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, | ||
105 | * attribute. | ||
106 | */ | ||
107 | if (sme_get_me_mask()) { | ||
108 | - paddr = (unsigned long)&RIP_REL_REF(__start_bss_decrypted); | ||
109 | - paddr_end = (unsigned long)&RIP_REL_REF(__end_bss_decrypted); | ||
110 | + paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); | ||
111 | + paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); | ||
112 | |||
113 | for (; paddr < paddr_end; paddr += PMD_SIZE) { | ||
114 | /* | ||
115 | @@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, | ||
116 | unsigned long __head __startup_64(unsigned long p2v_offset, | ||
117 | struct boot_params *bp) | ||
118 | { | ||
119 | - pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts); | ||
120 | - unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text); | ||
121 | + pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); | ||
122 | + unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); | ||
123 | unsigned long va_text, va_end; | ||
124 | unsigned long pgtable_flags; | ||
125 | unsigned long load_delta; | ||
126 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
127 | for (;;); | ||
128 | |||
129 | va_text = physaddr - p2v_offset; | ||
130 | - va_end = (unsigned long)&RIP_REL_REF(_end) - p2v_offset; | ||
131 | + va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; | ||
132 | |||
133 | /* Include the SME encryption mask in the fixup value */ | ||
134 | load_delta += sme_get_me_mask(); | ||
135 | |||
136 | /* Fixup the physical addresses in the page table */ | ||
137 | |||
138 | - pgd = &RIP_REL_REF(early_top_pgt)->pgd; | ||
139 | + pgd = rip_rel_ptr(early_top_pgt); | ||
140 | pgd[pgd_index(__START_KERNEL_map)] += load_delta; | ||
141 | |||
142 | if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { | ||
143 | - p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); | ||
144 | + p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); | ||
145 | p4d[MAX_PTRS_PER_P4D - 1] += load_delta; | ||
146 | |||
147 | pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; | ||
148 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
149 | * error, causing the BIOS to halt the system. | ||
150 | */ | ||
151 | |||
152 | - pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd; | ||
153 | + pmd = rip_rel_ptr(level2_kernel_pgt); | ||
154 | |||
155 | /* invalidate pages before the kernel image */ | ||
156 | for (i = 0; i < pmd_index(va_text); i++) | ||
157 | @@ -XXX,XX +XXX,XX @@ static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; | ||
158 | static void __head startup_64_load_idt(void *vc_handler) | ||
159 | { | ||
160 | struct desc_ptr desc = { | ||
161 | - .address = (unsigned long)&RIP_REL_REF(bringup_idt_table), | ||
162 | + .address = (unsigned long)rip_rel_ptr(bringup_idt_table), | ||
163 | .size = sizeof(bringup_idt_table) - 1, | ||
164 | }; | ||
165 | struct idt_data data; | ||
166 | @@ -XXX,XX +XXX,XX @@ void early_setup_idt(void) | ||
167 | */ | ||
168 | void __head startup_64_setup_gdt_idt(void) | ||
169 | { | ||
170 | - struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt; | ||
171 | + struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page); | ||
172 | void *handler = NULL; | ||
173 | |||
174 | struct desc_ptr startup_gdt_descr = { | ||
175 | - .address = (unsigned long)&RIP_REL_REF(*gdt), | ||
176 | + .address = (unsigned long)gp->gdt, | ||
177 | .size = GDT_SIZE - 1, | ||
178 | }; | ||
179 | |||
180 | @@ -XXX,XX +XXX,XX @@ void __head startup_64_setup_gdt_idt(void) | ||
181 | "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); | ||
182 | |||
183 | if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) | ||
184 | - handler = &RIP_REL_REF(vc_no_ghcb); | ||
185 | + handler = rip_rel_ptr(vc_no_ghcb); | ||
186 | |||
187 | startup_64_load_idt(handler); | ||
188 | } | ||
189 | diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c | ||
190 | index XXXXXXX..XXXXXXX 100644 | ||
191 | --- a/arch/x86/mm/mem_encrypt_identity.c | ||
192 | +++ b/arch/x86/mm/mem_encrypt_identity.c | ||
193 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
194 | * memory from being cached. | ||
195 | */ | ||
196 | |||
197 | - kernel_start = (unsigned long)RIP_REL_REF(_text); | ||
198 | - kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); | ||
199 | + kernel_start = (unsigned long)rip_rel_ptr(_text); | ||
200 | + kernel_end = ALIGN((unsigned long)rip_rel_ptr(_end), PMD_SIZE); | ||
201 | kernel_len = kernel_end - kernel_start; | ||
202 | |||
203 | initrd_start = 0; | ||
204 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
205 | * pagetable structures for the encryption of the kernel | ||
206 | * pagetable structures for workarea (in case not currently mapped) | ||
207 | */ | ||
208 | - execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); | ||
209 | + execute_start = workarea_start = (unsigned long)rip_rel_ptr(sme_workarea); | ||
210 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; | ||
211 | execute_len = execute_end - execute_start; | ||
212 | |||
213 | -- | ||
214 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Move the early GDT/IDT setup code that runs long before the kernel | 3 | Move the early GDT/IDT setup code that runs long before the kernel |
4 | virtual mapping is up into arch/x86/boot/startup/, and build it in a way | 4 | virtual mapping is up into arch/x86/boot/startup/, and build it in a way |
5 | that ensures that the code tolerates being called from the 1:1 mapping | 5 | that ensures that the code tolerates being called from the 1:1 mapping |
6 | of memory. | 6 | of memory. The code itself is left unchanged by this patch. |
7 | |||
8 | This allows the RIP_REL_REF() macro uses to be dropped, and removes the | ||
9 | need for emitting the code into the special .head.text section. | ||
10 | 7 | ||
11 | Also tweak the sed symbol matching pattern in the decompressor to match | 8 | Also tweak the sed symbol matching pattern in the decompressor to match |
12 | on lower case 't' or 'b', as these will be emitted by Clang for symbols | 9 | on lower case 't' or 'b', as these will be emitted by Clang for symbols |
13 | with hidden linkage. | 10 | with hidden linkage. |
14 | 11 | ||
15 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 12 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
16 | --- | 13 | --- |
17 | arch/x86/boot/compressed/Makefile | 2 +- | 14 | arch/x86/boot/compressed/Makefile | 2 +- |
18 | arch/x86/boot/startup/Makefile | 15 ++++ | 15 | arch/x86/boot/startup/Makefile | 15 ++++ |
19 | arch/x86/boot/startup/gdt_idt.c | 82 ++++++++++++++++++++ | 16 | arch/x86/boot/startup/gdt_idt.c | 84 ++++++++++++++++++++ |
20 | arch/x86/kernel/head64.c | 74 ------------------ | 17 | arch/x86/kernel/head64.c | 74 ----------------- |
21 | 4 files changed, 98 insertions(+), 75 deletions(-) | 18 | 4 files changed, 100 insertions(+), 75 deletions(-) |
22 | 19 | ||
23 | diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile | 20 | diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile |
24 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
25 | --- a/arch/x86/boot/compressed/Makefile | 22 | --- a/arch/x86/boot/compressed/Makefile |
26 | +++ b/arch/x86/boot/compressed/Makefile | 23 | +++ b/arch/x86/boot/compressed/Makefile |
... | ... | ||
69 | + | 66 | + |
70 | +#include <linux/linkage.h> | 67 | +#include <linux/linkage.h> |
71 | +#include <linux/types.h> | 68 | +#include <linux/types.h> |
72 | + | 69 | + |
73 | +#include <asm/desc.h> | 70 | +#include <asm/desc.h> |
71 | +#include <asm/init.h> | ||
74 | +#include <asm/setup.h> | 72 | +#include <asm/setup.h> |
75 | +#include <asm/sev.h> | 73 | +#include <asm/sev.h> |
76 | +#include <asm/trapnr.h> | 74 | +#include <asm/trapnr.h> |
77 | + | 75 | + |
78 | +/* | 76 | +/* |
... | ... | ||
88 | + * which also hasn't happened yet in early CPU bringup. | 86 | + * which also hasn't happened yet in early CPU bringup. |
89 | + */ | 87 | + */ |
90 | +static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; | 88 | +static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data; |
91 | + | 89 | + |
92 | +/* This may run while still in the direct mapping */ | 90 | +/* This may run while still in the direct mapping */ |
93 | +static void startup_64_load_idt(void *vc_handler) | 91 | +static void __head startup_64_load_idt(void *vc_handler) |
94 | +{ | 92 | +{ |
95 | + struct desc_ptr desc = { | 93 | + struct desc_ptr desc = { |
96 | + .address = (unsigned long)bringup_idt_table, | 94 | + .address = (unsigned long)rip_rel_ptr(bringup_idt_table), |
97 | + .size = sizeof(bringup_idt_table) - 1, | 95 | + .size = sizeof(bringup_idt_table) - 1, |
98 | + }; | 96 | + }; |
99 | + struct idt_data data; | 97 | + struct idt_data data; |
100 | + gate_desc idt_desc; | 98 | + gate_desc idt_desc; |
101 | + | 99 | + |
... | ... | ||
123 | +} | 121 | +} |
124 | + | 122 | + |
125 | +/* | 123 | +/* |
126 | + * Setup boot CPU state needed before kernel switches to virtual addresses. | 124 | + * Setup boot CPU state needed before kernel switches to virtual addresses. |
127 | + */ | 125 | + */ |
128 | +void __init startup_64_setup_gdt_idt(void) | 126 | +void __head startup_64_setup_gdt_idt(void) |
129 | +{ | 127 | +{ |
128 | + struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page); | ||
130 | + void *handler = NULL; | 129 | + void *handler = NULL; |
131 | + | 130 | + |
132 | + struct desc_ptr startup_gdt_descr = { | 131 | + struct desc_ptr startup_gdt_descr = { |
133 | + .address = (__force unsigned long)gdt_page.gdt, | 132 | + .address = (unsigned long)gp->gdt, |
134 | + .size = GDT_SIZE - 1, | 133 | + .size = GDT_SIZE - 1, |
135 | + }; | 134 | + }; |
136 | + | 135 | + |
137 | + /* Load GDT */ | 136 | + /* Load GDT */ |
138 | + native_load_gdt(&startup_gdt_descr); | 137 | + native_load_gdt(&startup_gdt_descr); |
... | ... | ||
141 | + asm volatile("movl %%eax, %%ds\n" | 140 | + asm volatile("movl %%eax, %%ds\n" |
142 | + "movl %%eax, %%ss\n" | 141 | + "movl %%eax, %%ss\n" |
143 | + "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); | 142 | + "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); |
144 | + | 143 | + |
145 | + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) | 144 | + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) |
146 | + handler = vc_no_ghcb; | 145 | + handler = rip_rel_ptr(vc_no_ghcb); |
147 | + | 146 | + |
148 | + startup_64_load_idt(handler); | 147 | + startup_64_load_idt(handler); |
149 | +} | 148 | +} |
150 | diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c | 149 | diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c |
151 | index XXXXXXX..XXXXXXX 100644 | 150 | index XXXXXXX..XXXXXXX 100644 |
... | ... | ||
172 | - | 171 | - |
173 | -/* This may run while still in the direct mapping */ | 172 | -/* This may run while still in the direct mapping */ |
174 | -static void __head startup_64_load_idt(void *vc_handler) | 173 | -static void __head startup_64_load_idt(void *vc_handler) |
175 | -{ | 174 | -{ |
176 | - struct desc_ptr desc = { | 175 | - struct desc_ptr desc = { |
177 | - .address = (unsigned long)&RIP_REL_REF(bringup_idt_table), | 176 | - .address = (unsigned long)rip_rel_ptr(bringup_idt_table), |
178 | - .size = sizeof(bringup_idt_table) - 1, | 177 | - .size = sizeof(bringup_idt_table) - 1, |
179 | - }; | 178 | - }; |
180 | - struct idt_data data; | 179 | - struct idt_data data; |
181 | - gate_desc idt_desc; | 180 | - gate_desc idt_desc; |
182 | - | 181 | - |
... | ... | ||
206 | -/* | 205 | -/* |
207 | - * Setup boot CPU state needed before kernel switches to virtual addresses. | 206 | - * Setup boot CPU state needed before kernel switches to virtual addresses. |
208 | - */ | 207 | - */ |
209 | -void __head startup_64_setup_gdt_idt(void) | 208 | -void __head startup_64_setup_gdt_idt(void) |
210 | -{ | 209 | -{ |
211 | - struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt; | 210 | - struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page); |
212 | - void *handler = NULL; | 211 | - void *handler = NULL; |
213 | - | 212 | - |
214 | - struct desc_ptr startup_gdt_descr = { | 213 | - struct desc_ptr startup_gdt_descr = { |
215 | - .address = (unsigned long)&RIP_REL_REF(*gdt), | 214 | - .address = (unsigned long)gp->gdt, |
216 | - .size = GDT_SIZE - 1, | 215 | - .size = GDT_SIZE - 1, |
217 | - }; | 216 | - }; |
218 | - | 217 | - |
219 | - /* Load GDT */ | 218 | - /* Load GDT */ |
220 | - native_load_gdt(&startup_gdt_descr); | 219 | - native_load_gdt(&startup_gdt_descr); |
... | ... | ||
223 | - asm volatile("movl %%eax, %%ds\n" | 222 | - asm volatile("movl %%eax, %%ds\n" |
224 | - "movl %%eax, %%ss\n" | 223 | - "movl %%eax, %%ss\n" |
225 | - "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); | 224 | - "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); |
226 | - | 225 | - |
227 | - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) | 226 | - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) |
228 | - handler = &RIP_REL_REF(vc_no_ghcb); | 227 | - handler = rip_rel_ptr(vc_no_ghcb); |
229 | - | 228 | - |
230 | - startup_64_load_idt(handler); | 229 | - startup_64_load_idt(handler); |
231 | -} | 230 | -} |
232 | -- | 231 | -- |
233 | 2.49.0.472.ge94155a9ec-goog | 232 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | The startup code that constructs the kernel virtual mapping runs from | 3 | The startup code that constructs the kernel virtual mapping runs from |
4 | the 1:1 mapping of memory itself, and therefore, cannot use absolute | 4 | the 1:1 mapping of memory itself, and therefore, cannot use absolute |
5 | symbol references. Move this code into a separate source file under | 5 | symbol references. Before making changes in subsequent patches, move |
6 | arch/x86/boot/startup/ where all such code will be kept from now on. | 6 | this code into a separate source file under arch/x86/boot/startup/ where |
7 | 7 | all such code will be kept from now on. | |
8 | Since all code here is constructed in a manner that ensures that it | ||
9 | tolerates running from the 1:1 mapping of memory, any uses of the | ||
10 | RIP_REL_REF() macro can be dropped, along with __head annotations for | ||
11 | placing this code in a dedicated startup section. | ||
12 | 8 | ||
13 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 9 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
14 | --- | 10 | --- |
15 | arch/x86/boot/startup/Makefile | 2 +- | 11 | arch/x86/boot/startup/Makefile | 2 +- |
16 | arch/x86/boot/startup/map_kernel.c | 232 ++++++++++++++++++++ | 12 | arch/x86/boot/startup/map_kernel.c | 224 ++++++++++++++++++++ |
17 | arch/x86/kernel/head64.c | 228 +------------------ | 13 | arch/x86/kernel/head64.c | 211 +----------------- |
18 | 3 files changed, 234 insertions(+), 228 deletions(-) | 14 | 3 files changed, 226 insertions(+), 211 deletions(-) |
19 | 15 | ||
20 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile | 16 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile |
21 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
22 | --- a/arch/x86/boot/startup/Makefile | 18 | --- a/arch/x86/boot/startup/Makefile |
23 | +++ b/arch/x86/boot/startup/Makefile | 19 | +++ b/arch/x86/boot/startup/Makefile |
... | ... | ||
42 | +#include <linux/linkage.h> | 38 | +#include <linux/linkage.h> |
43 | +#include <linux/types.h> | 39 | +#include <linux/types.h> |
44 | +#include <linux/kernel.h> | 40 | +#include <linux/kernel.h> |
45 | +#include <linux/pgtable.h> | 41 | +#include <linux/pgtable.h> |
46 | + | 42 | + |
43 | +#include <asm/init.h> | ||
47 | +#include <asm/sections.h> | 44 | +#include <asm/sections.h> |
48 | +#include <asm/setup.h> | 45 | +#include <asm/setup.h> |
49 | +#include <asm/sev.h> | 46 | +#include <asm/sev.h> |
50 | + | 47 | + |
51 | +extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | 48 | +extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; |
52 | +extern unsigned int next_early_pgt; | 49 | +extern unsigned int next_early_pgt; |
53 | + | ||
54 | +#ifdef CONFIG_X86_5LEVEL | ||
55 | +unsigned int __pgtable_l5_enabled __ro_after_init; | ||
56 | +unsigned int pgdir_shift __ro_after_init = 39; | ||
57 | +EXPORT_SYMBOL(pgdir_shift); | ||
58 | +unsigned int ptrs_per_p4d __ro_after_init = 1; | ||
59 | +EXPORT_SYMBOL(ptrs_per_p4d); | ||
60 | +#endif | ||
61 | + | ||
62 | +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT | ||
63 | +unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; | ||
64 | +EXPORT_SYMBOL(page_offset_base); | ||
65 | +unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; | ||
66 | +EXPORT_SYMBOL(vmalloc_base); | ||
67 | +unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; | ||
68 | +EXPORT_SYMBOL(vmemmap_base); | ||
69 | +#endif | ||
70 | + | 50 | + |
71 | +static inline bool check_la57_support(void) | 51 | +static inline bool check_la57_support(void) |
72 | +{ | 52 | +{ |
73 | + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 53 | + if (!IS_ENABLED(CONFIG_X86_5LEVEL)) |
74 | + return false; | 54 | + return false; |
... | ... | ||
78 | + * stage. Only check if it has been enabled there. | 58 | + * stage. Only check if it has been enabled there. |
79 | + */ | 59 | + */ |
80 | + if (!(native_read_cr4() & X86_CR4_LA57)) | 60 | + if (!(native_read_cr4() & X86_CR4_LA57)) |
81 | + return false; | 61 | + return false; |
82 | + | 62 | + |
83 | + __pgtable_l5_enabled = 1; | 63 | + RIP_REL_REF(__pgtable_l5_enabled) = 1; |
84 | + pgdir_shift = 48; | 64 | + RIP_REL_REF(pgdir_shift) = 48; |
85 | + ptrs_per_p4d = 512; | 65 | + RIP_REL_REF(ptrs_per_p4d) = 512; |
86 | + page_offset_base = __PAGE_OFFSET_BASE_L5; | 66 | + RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; |
87 | + vmalloc_base = __VMALLOC_BASE_L5; | 67 | + RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; |
88 | + vmemmap_base = __VMEMMAP_BASE_L5; | 68 | + RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; |
89 | + | 69 | + |
90 | + return true; | 70 | + return true; |
91 | +} | 71 | +} |
92 | + | 72 | + |
93 | +static unsigned long sme_postprocess_startup(struct boot_params *bp, | 73 | +static unsigned long __head sme_postprocess_startup(struct boot_params *bp, |
94 | + pmdval_t *pmd, | 74 | + pmdval_t *pmd, |
95 | + unsigned long p2v_offset) | 75 | + unsigned long p2v_offset) |
96 | +{ | 76 | +{ |
97 | + unsigned long paddr, paddr_end; | 77 | + unsigned long paddr, paddr_end; |
98 | + int i; | 78 | + int i; |
99 | + | 79 | + |
100 | + /* Encrypt the kernel and related (if SME is active) */ | 80 | + /* Encrypt the kernel and related (if SME is active) */ |
... | ... | ||
105 | + * The bss section will be memset to zero later in the initialization so | 85 | + * The bss section will be memset to zero later in the initialization so |
106 | + * there is no need to zero it after changing the memory encryption | 86 | + * there is no need to zero it after changing the memory encryption |
107 | + * attribute. | 87 | + * attribute. |
108 | + */ | 88 | + */ |
109 | + if (sme_get_me_mask()) { | 89 | + if (sme_get_me_mask()) { |
110 | + paddr = (unsigned long)__start_bss_decrypted; | 90 | + paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); |
111 | + paddr_end = (unsigned long)__end_bss_decrypted; | 91 | + paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); |
112 | + | 92 | + |
113 | + for (; paddr < paddr_end; paddr += PMD_SIZE) { | 93 | + for (; paddr < paddr_end; paddr += PMD_SIZE) { |
114 | + /* | 94 | + /* |
115 | + * On SNP, transition the page to shared in the RMP table so that | 95 | + * On SNP, transition the page to shared in the RMP table so that |
116 | + * it is consistent with the page table attribute change. | 96 | + * it is consistent with the page table attribute change. |
... | ... | ||
133 | + * modifier for the initial pgdir entry programmed into CR3. | 113 | + * modifier for the initial pgdir entry programmed into CR3. |
134 | + */ | 114 | + */ |
135 | + return sme_get_me_mask(); | 115 | + return sme_get_me_mask(); |
136 | +} | 116 | +} |
137 | + | 117 | + |
138 | +unsigned long __init __startup_64(unsigned long p2v_offset, | 118 | +/* Code in __startup_64() can be relocated during execution, but the compiler |
119 | + * doesn't have to generate PC-relative relocations when accessing globals from | ||
120 | + * that function. Clang actually does not generate them, which leads to | ||
121 | + * boot-time crashes. To work around this problem, every global pointer must | ||
122 | + * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined | ||
123 | + * by subtracting p2v_offset from the RIP-relative address. | ||
124 | + */ | ||
125 | +unsigned long __head __startup_64(unsigned long p2v_offset, | ||
139 | + struct boot_params *bp) | 126 | + struct boot_params *bp) |
140 | +{ | 127 | +{ |
141 | + pmd_t (*early_pgts)[PTRS_PER_PMD] = early_dynamic_pgts; | 128 | + pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); |
142 | + unsigned long physaddr = (unsigned long)_text; | 129 | + unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); |
143 | + unsigned long va_text, va_end; | 130 | + unsigned long va_text, va_end; |
144 | + unsigned long pgtable_flags; | 131 | + unsigned long pgtable_flags; |
145 | + unsigned long load_delta; | 132 | + unsigned long load_delta; |
146 | + pgdval_t *pgd; | 133 | + pgdval_t *pgd; |
147 | + p4dval_t *p4d; | 134 | + p4dval_t *p4d; |
... | ... | ||
158 | + | 145 | + |
159 | + /* | 146 | + /* |
160 | + * Compute the delta between the address I am compiled to run at | 147 | + * Compute the delta between the address I am compiled to run at |
161 | + * and the address I am actually running at. | 148 | + * and the address I am actually running at. |
162 | + */ | 149 | + */ |
163 | + phys_base = load_delta = __START_KERNEL_map + p2v_offset; | 150 | + load_delta = __START_KERNEL_map + p2v_offset; |
151 | + RIP_REL_REF(phys_base) = load_delta; | ||
164 | + | 152 | + |
165 | + /* Is the address not 2M aligned? */ | 153 | + /* Is the address not 2M aligned? */ |
166 | + if (load_delta & ~PMD_MASK) | 154 | + if (load_delta & ~PMD_MASK) |
167 | + for (;;); | 155 | + for (;;); |
168 | + | 156 | + |
169 | + va_text = physaddr - p2v_offset; | 157 | + va_text = physaddr - p2v_offset; |
170 | + va_end = (unsigned long)_end - p2v_offset; | 158 | + va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; |
171 | + | 159 | + |
172 | + /* Include the SME encryption mask in the fixup value */ | 160 | + /* Include the SME encryption mask in the fixup value */ |
173 | + load_delta += sme_get_me_mask(); | 161 | + load_delta += sme_get_me_mask(); |
174 | + | 162 | + |
175 | + /* Fixup the physical addresses in the page table */ | 163 | + /* Fixup the physical addresses in the page table */ |
176 | + | 164 | + |
177 | + pgd = &early_top_pgt[0].pgd; | 165 | + pgd = rip_rel_ptr(early_top_pgt); |
178 | + pgd[pgd_index(__START_KERNEL_map)] += load_delta; | 166 | + pgd[pgd_index(__START_KERNEL_map)] += load_delta; |
179 | + | 167 | + |
180 | + if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { | 168 | + if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { |
181 | + p4d = (p4dval_t *)level4_kernel_pgt; | 169 | + p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); |
182 | + p4d[MAX_PTRS_PER_P4D - 1] += load_delta; | 170 | + p4d[MAX_PTRS_PER_P4D - 1] += load_delta; |
183 | + | 171 | + |
184 | + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; | 172 | + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; |
185 | + } | 173 | + } |
186 | + | 174 | + |
187 | + level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta; | 175 | + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; |
188 | + level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta; | 176 | + RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta; |
189 | + | 177 | + |
190 | + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) | 178 | + for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) |
191 | + level2_fixmap_pgt[i].pmd += load_delta; | 179 | + RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta; |
192 | + | 180 | + |
193 | + /* | 181 | + /* |
194 | + * Set up the identity mapping for the switchover. These | 182 | + * Set up the identity mapping for the switchover. These |
195 | + * entries should *NOT* have the global bit set! This also | 183 | + * entries should *NOT* have the global bit set! This also |
196 | + * creates a bunch of nonsense entries but that is fine -- | 184 | + * creates a bunch of nonsense entries but that is fine -- |
197 | + * it avoids problems around wraparound. | 185 | + * it avoids problems around wraparound. |
198 | + */ | 186 | + */ |
199 | + | 187 | + |
200 | + pud = &early_pgts[0]->pmd; | 188 | + pud = &early_pgts[0]->pmd; |
201 | + pmd = &early_pgts[1]->pmd; | 189 | + pmd = &early_pgts[1]->pmd; |
202 | + next_early_pgt = 2; | 190 | + RIP_REL_REF(next_early_pgt) = 2; |
203 | + | 191 | + |
204 | + pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); | 192 | + pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); |
205 | + | 193 | + |
206 | + if (la57) { | 194 | + if (la57) { |
207 | + p4d = &early_pgts[next_early_pgt++]->pmd; | 195 | + p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd; |
208 | + | 196 | + |
209 | + i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; | 197 | + i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; |
210 | + pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; | 198 | + pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; |
211 | + pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; | 199 | + pgd[i + 1] = (pgdval_t)p4d + pgtable_flags; |
212 | + | 200 | + |
... | ... | ||
223 | + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; | 211 | + pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; |
224 | + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; | 212 | + pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags; |
225 | + | 213 | + |
226 | + pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; | 214 | + pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; |
227 | + /* Filter out unsupported __PAGE_KERNEL_* bits: */ | 215 | + /* Filter out unsupported __PAGE_KERNEL_* bits: */ |
228 | + pmd_entry &= __supported_pte_mask; | 216 | + pmd_entry &= RIP_REL_REF(__supported_pte_mask); |
229 | + pmd_entry += sme_get_me_mask(); | 217 | + pmd_entry += sme_get_me_mask(); |
230 | + pmd_entry += physaddr; | 218 | + pmd_entry += physaddr; |
231 | + | 219 | + |
232 | + for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { | 220 | + for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) { |
233 | + int idx = i + (physaddr >> PMD_SHIFT); | 221 | + int idx = i + (physaddr >> PMD_SHIFT); |
... | ... | ||
249 | + * and on some hardware (particularly the UV platform) even | 237 | + * and on some hardware (particularly the UV platform) even |
250 | + * speculative access to some reserved areas is caught as an | 238 | + * speculative access to some reserved areas is caught as an |
251 | + * error, causing the BIOS to halt the system. | 239 | + * error, causing the BIOS to halt the system. |
252 | + */ | 240 | + */ |
253 | + | 241 | + |
254 | + pmd = &level2_kernel_pgt[0].pmd; | 242 | + pmd = rip_rel_ptr(level2_kernel_pgt); |
255 | + | 243 | + |
256 | + /* invalidate pages before the kernel image */ | 244 | + /* invalidate pages before the kernel image */ |
257 | + for (i = 0; i < pmd_index(va_text); i++) | 245 | + for (i = 0; i < pmd_index(va_text); i++) |
258 | + pmd[i] &= ~_PAGE_PRESENT; | 246 | + pmd[i] &= ~_PAGE_PRESENT; |
259 | + | 247 | + |
... | ... | ||
278 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; | 266 | extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; |
279 | -static unsigned int __initdata next_early_pgt; | 267 | -static unsigned int __initdata next_early_pgt; |
280 | +unsigned int __initdata next_early_pgt; | 268 | +unsigned int __initdata next_early_pgt; |
281 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); | 269 | pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); |
282 | 270 | ||
283 | -#ifdef CONFIG_X86_5LEVEL | 271 | #ifdef CONFIG_X86_5LEVEL |
284 | -unsigned int __pgtable_l5_enabled __ro_after_init; | 272 | @@ -XXX,XX +XXX,XX @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; |
285 | -unsigned int pgdir_shift __ro_after_init = 39; | 273 | EXPORT_SYMBOL(vmemmap_base); |
286 | -EXPORT_SYMBOL(pgdir_shift); | 274 | #endif |
287 | -unsigned int ptrs_per_p4d __ro_after_init = 1; | 275 | |
288 | -EXPORT_SYMBOL(ptrs_per_p4d); | ||
289 | -#endif | ||
290 | - | ||
291 | -#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT | ||
292 | -unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4; | ||
293 | -EXPORT_SYMBOL(page_offset_base); | ||
294 | -unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4; | ||
295 | -EXPORT_SYMBOL(vmalloc_base); | ||
296 | -unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4; | ||
297 | -EXPORT_SYMBOL(vmemmap_base); | ||
298 | -#endif | ||
299 | - | ||
300 | -static inline bool check_la57_support(void) | 276 | -static inline bool check_la57_support(void) |
301 | -{ | 277 | -{ |
302 | - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) | 278 | - if (!IS_ENABLED(CONFIG_X86_5LEVEL)) |
303 | - return false; | 279 | - return false; |
304 | - | 280 | - |
... | ... | ||
334 | - * The bss section will be memset to zero later in the initialization so | 310 | - * The bss section will be memset to zero later in the initialization so |
335 | - * there is no need to zero it after changing the memory encryption | 311 | - * there is no need to zero it after changing the memory encryption |
336 | - * attribute. | 312 | - * attribute. |
337 | - */ | 313 | - */ |
338 | - if (sme_get_me_mask()) { | 314 | - if (sme_get_me_mask()) { |
339 | - paddr = (unsigned long)&RIP_REL_REF(__start_bss_decrypted); | 315 | - paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted); |
340 | - paddr_end = (unsigned long)&RIP_REL_REF(__end_bss_decrypted); | 316 | - paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted); |
341 | - | 317 | - |
342 | - for (; paddr < paddr_end; paddr += PMD_SIZE) { | 318 | - for (; paddr < paddr_end; paddr += PMD_SIZE) { |
343 | - /* | 319 | - /* |
344 | - * On SNP, transition the page to shared in the RMP table so that | 320 | - * On SNP, transition the page to shared in the RMP table so that |
345 | - * it is consistent with the page table attribute change. | 321 | - * it is consistent with the page table attribute change. |
... | ... | ||
372 | - * by subtracting p2v_offset from the RIP-relative address. | 348 | - * by subtracting p2v_offset from the RIP-relative address. |
373 | - */ | 349 | - */ |
374 | -unsigned long __head __startup_64(unsigned long p2v_offset, | 350 | -unsigned long __head __startup_64(unsigned long p2v_offset, |
375 | - struct boot_params *bp) | 351 | - struct boot_params *bp) |
376 | -{ | 352 | -{ |
377 | - pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts); | 353 | - pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts); |
378 | - unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text); | 354 | - unsigned long physaddr = (unsigned long)rip_rel_ptr(_text); |
379 | - unsigned long va_text, va_end; | 355 | - unsigned long va_text, va_end; |
380 | - unsigned long pgtable_flags; | 356 | - unsigned long pgtable_flags; |
381 | - unsigned long load_delta; | 357 | - unsigned long load_delta; |
382 | - pgdval_t *pgd; | 358 | - pgdval_t *pgd; |
383 | - p4dval_t *p4d; | 359 | - p4dval_t *p4d; |
... | ... | ||
402 | - /* Is the address not 2M aligned? */ | 378 | - /* Is the address not 2M aligned? */ |
403 | - if (load_delta & ~PMD_MASK) | 379 | - if (load_delta & ~PMD_MASK) |
404 | - for (;;); | 380 | - for (;;); |
405 | - | 381 | - |
406 | - va_text = physaddr - p2v_offset; | 382 | - va_text = physaddr - p2v_offset; |
407 | - va_end = (unsigned long)&RIP_REL_REF(_end) - p2v_offset; | 383 | - va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset; |
408 | - | 384 | - |
409 | - /* Include the SME encryption mask in the fixup value */ | 385 | - /* Include the SME encryption mask in the fixup value */ |
410 | - load_delta += sme_get_me_mask(); | 386 | - load_delta += sme_get_me_mask(); |
411 | - | 387 | - |
412 | - /* Fixup the physical addresses in the page table */ | 388 | - /* Fixup the physical addresses in the page table */ |
413 | - | 389 | - |
414 | - pgd = &RIP_REL_REF(early_top_pgt)->pgd; | 390 | - pgd = rip_rel_ptr(early_top_pgt); |
415 | - pgd[pgd_index(__START_KERNEL_map)] += load_delta; | 391 | - pgd[pgd_index(__START_KERNEL_map)] += load_delta; |
416 | - | 392 | - |
417 | - if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { | 393 | - if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) { |
418 | - p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); | 394 | - p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt); |
419 | - p4d[MAX_PTRS_PER_P4D - 1] += load_delta; | 395 | - p4d[MAX_PTRS_PER_P4D - 1] += load_delta; |
420 | - | 396 | - |
421 | - pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; | 397 | - pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; |
422 | - } | 398 | - } |
423 | - | 399 | - |
... | ... | ||
486 | - * and on some hardware (particularly the UV platform) even | 462 | - * and on some hardware (particularly the UV platform) even |
487 | - * speculative access to some reserved areas is caught as an | 463 | - * speculative access to some reserved areas is caught as an |
488 | - * error, causing the BIOS to halt the system. | 464 | - * error, causing the BIOS to halt the system. |
489 | - */ | 465 | - */ |
490 | - | 466 | - |
491 | - pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd; | 467 | - pmd = rip_rel_ptr(level2_kernel_pgt); |
492 | - | 468 | - |
493 | - /* invalidate pages before the kernel image */ | 469 | - /* invalidate pages before the kernel image */ |
494 | - for (i = 0; i < pmd_index(va_text); i++) | 470 | - for (i = 0; i < pmd_index(va_text); i++) |
495 | - pmd[i] &= ~_PAGE_PRESENT; | 471 | - pmd[i] &= ~_PAGE_PRESENT; |
496 | - | 472 | - |
... | ... | ||
508 | - | 484 | - |
509 | /* Wipe all early page tables except for the kernel symbol map */ | 485 | /* Wipe all early page tables except for the kernel symbol map */ |
510 | static void __init reset_early_page_tables(void) | 486 | static void __init reset_early_page_tables(void) |
511 | { | 487 | { |
512 | -- | 488 | -- |
513 | 2.49.0.472.ge94155a9ec-goog | 489 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Ard Biesheuvel <ardb@kernel.org> | ||
1 | 2 | ||
3 | Now that __startup_64() is built using -fPIC, RIP_REL_REF() has become a | ||
4 | NOP and can be removed. Only some occurrences of rip_rel_ptr() will | ||
5 | remain, to explicitly take the address of certain global structures in | ||
6 | the 1:1 mapping of memory. | ||
7 | |||
8 | While at it, update the code comment to describe why this is needed. | ||
9 | |||
10 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
11 | --- | ||
12 | arch/x86/boot/startup/map_kernel.c | 41 ++++++++++---------- | ||
13 | 1 file changed, 21 insertions(+), 20 deletions(-) | ||
14 | |||
15 | diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/arch/x86/boot/startup/map_kernel.c | ||
18 | +++ b/arch/x86/boot/startup/map_kernel.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static inline bool check_la57_support(void) | ||
20 | if (!(native_read_cr4() & X86_CR4_LA57)) | ||
21 | return false; | ||
22 | |||
23 | - RIP_REL_REF(__pgtable_l5_enabled) = 1; | ||
24 | - RIP_REL_REF(pgdir_shift) = 48; | ||
25 | - RIP_REL_REF(ptrs_per_p4d) = 512; | ||
26 | - RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; | ||
27 | - RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; | ||
28 | - RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; | ||
29 | + __pgtable_l5_enabled = 1; | ||
30 | + pgdir_shift = 48; | ||
31 | + ptrs_per_p4d = 512; | ||
32 | + page_offset_base = __PAGE_OFFSET_BASE_L5; | ||
33 | + vmalloc_base = __VMALLOC_BASE_L5; | ||
34 | + vmemmap_base = __VMEMMAP_BASE_L5; | ||
35 | |||
36 | return true; | ||
37 | } | ||
38 | @@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp, | ||
39 | return sme_get_me_mask(); | ||
40 | } | ||
41 | |||
42 | -/* Code in __startup_64() can be relocated during execution, but the compiler | ||
43 | - * doesn't have to generate PC-relative relocations when accessing globals from | ||
44 | - * that function. Clang actually does not generate them, which leads to | ||
45 | - * boot-time crashes. To work around this problem, every global pointer must | ||
46 | - * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined | ||
47 | - * by subtracting p2v_offset from the RIP-relative address. | ||
48 | +/* | ||
49 | + * This code is compiled using PIC codegen because it will execute from the | ||
50 | + * early 1:1 mapping of memory, which deviates from the mapping expected by the | ||
51 | + * linker. Due to this deviation, taking the address of a global variable will | ||
52 | + * produce an ambiguous result when using the plain & operator. Instead, | ||
53 | + * rip_rel_ptr() must be used, which will return the RIP-relative address in | ||
54 | + * the 1:1 mapping of memory. Kernel virtual addresses can be determined by | ||
55 | + * subtracting p2v_offset from the RIP-relative address. | ||
56 | */ | ||
57 | unsigned long __head __startup_64(unsigned long p2v_offset, | ||
58 | struct boot_params *bp) | ||
59 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
60 | * Compute the delta between the address I am compiled to run at | ||
61 | * and the address I am actually running at. | ||
62 | */ | ||
63 | - load_delta = __START_KERNEL_map + p2v_offset; | ||
64 | - RIP_REL_REF(phys_base) = load_delta; | ||
65 | + phys_base = load_delta = __START_KERNEL_map + p2v_offset; | ||
66 | |||
67 | /* Is the address not 2M aligned? */ | ||
68 | if (load_delta & ~PMD_MASK) | ||
69 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
70 | pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; | ||
71 | } | ||
72 | |||
73 | - RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; | ||
74 | - RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta; | ||
75 | + level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta; | ||
76 | + level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta; | ||
77 | |||
78 | for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--) | ||
79 | - RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta; | ||
80 | + level2_fixmap_pgt[i].pmd += load_delta; | ||
81 | |||
82 | /* | ||
83 | * Set up the identity mapping for the switchover. These | ||
84 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
85 | |||
86 | pud = &early_pgts[0]->pmd; | ||
87 | pmd = &early_pgts[1]->pmd; | ||
88 | - RIP_REL_REF(next_early_pgt) = 2; | ||
89 | + next_early_pgt = 2; | ||
90 | |||
91 | pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask(); | ||
92 | |||
93 | if (la57) { | ||
94 | - p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd; | ||
95 | + p4d = &early_pgts[next_early_pgt++]->pmd; | ||
96 | |||
97 | i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD; | ||
98 | pgd[i + 0] = (pgdval_t)p4d + pgtable_flags; | ||
99 | @@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset, | ||
100 | |||
101 | pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; | ||
102 | /* Filter out unsupported __PAGE_KERNEL_* bits: */ | ||
103 | - pmd_entry &= RIP_REL_REF(__supported_pte_mask); | ||
104 | + pmd_entry &= __supported_pte_mask; | ||
105 | pmd_entry += sme_get_me_mask(); | ||
106 | pmd_entry += physaddr; | ||
107 | |||
108 | -- | ||
109 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
... | ... | ||
---|---|---|---|
3 | Move the SME initialization code, which runs from the 1:1 mapping of | 3 | Move the SME initialization code, which runs from the 1:1 mapping of |
4 | memory as it operates on the kernel virtual mapping, into the new | 4 | memory as it operates on the kernel virtual mapping, into the new |
5 | sub-directory arch/x86/boot/startup/ where all startup code will reside | 5 | sub-directory arch/x86/boot/startup/ where all startup code will reside |
6 | that needs to tolerate executing from the 1:1 mapping. | 6 | that needs to tolerate executing from the 1:1 mapping. |
7 | 7 | ||
8 | This allows RIP_REL_REF() macro invocations and __head annotations to be | ||
9 | dropped. | ||
10 | |||
11 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 8 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
12 | --- | 9 | --- |
13 | arch/x86/boot/startup/Makefile | 1 + | 10 | arch/x86/boot/startup/Makefile | 1 + |
14 | arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 45 +++++++++----------- | 11 | arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 2 -- |
15 | arch/x86/include/asm/mem_encrypt.h | 2 +- | 12 | arch/x86/mm/Makefile | 6 ------ |
16 | arch/x86/mm/Makefile | 6 --- | 13 | 3 files changed, 1 insertion(+), 8 deletions(-) |
17 | 4 files changed, 23 insertions(+), 31 deletions(-) | ||
18 | 14 | ||
19 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile | 15 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile |
20 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/arch/x86/boot/startup/Makefile | 17 | --- a/arch/x86/boot/startup/Makefile |
22 | +++ b/arch/x86/boot/startup/Makefile | 18 | +++ b/arch/x86/boot/startup/Makefile |
... | ... | ||
27 | +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o | 23 | +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o |
28 | 24 | ||
29 | lib-$(CONFIG_X86_64) += la57toggle.o | 25 | lib-$(CONFIG_X86_64) += la57toggle.o |
30 | lib-$(CONFIG_EFI_MIXED) += efi-mixed.o | 26 | lib-$(CONFIG_EFI_MIXED) += efi-mixed.o |
31 | diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/boot/startup/sme.c | 27 | diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/boot/startup/sme.c |
32 | similarity index 92% | 28 | similarity index 99% |
33 | rename from arch/x86/mm/mem_encrypt_identity.c | 29 | rename from arch/x86/mm/mem_encrypt_identity.c |
34 | rename to arch/x86/boot/startup/sme.c | 30 | rename to arch/x86/boot/startup/sme.c |
35 | index XXXXXXX..XXXXXXX 100644 | 31 | index XXXXXXX..XXXXXXX 100644 |
36 | --- a/arch/x86/mm/mem_encrypt_identity.c | 32 | --- a/arch/x86/mm/mem_encrypt_identity.c |
37 | +++ b/arch/x86/boot/startup/sme.c | 33 | +++ b/arch/x86/boot/startup/sme.c |
... | ... | ||
42 | -#include "mm_internal.h" | 38 | -#include "mm_internal.h" |
43 | - | 39 | - |
44 | #define PGD_FLAGS _KERNPG_TABLE_NOENC | 40 | #define PGD_FLAGS _KERNPG_TABLE_NOENC |
45 | #define P4D_FLAGS _KERNPG_TABLE_NOENC | 41 | #define P4D_FLAGS _KERNPG_TABLE_NOENC |
46 | #define PUD_FLAGS _KERNPG_TABLE_NOENC | 42 | #define PUD_FLAGS _KERNPG_TABLE_NOENC |
47 | @@ -XXX,XX +XXX,XX @@ struct sme_populate_pgd_data { | ||
48 | */ | ||
49 | static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch"); | ||
50 | |||
51 | -static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
52 | +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
53 | { | ||
54 | unsigned long pgd_start, pgd_end, pgd_size; | ||
55 | pgd_t *pgd_p; | ||
56 | @@ -XXX,XX +XXX,XX @@ static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd) | ||
57 | memset(pgd_p, 0, pgd_size); | ||
58 | } | ||
59 | |||
60 | -static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
61 | +static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
62 | { | ||
63 | pgd_t *pgd; | ||
64 | p4d_t *p4d; | ||
65 | @@ -XXX,XX +XXX,XX @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) | ||
66 | return pud; | ||
67 | } | ||
68 | |||
69 | -static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
70 | +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
71 | { | ||
72 | pud_t *pud; | ||
73 | pmd_t *pmd; | ||
74 | @@ -XXX,XX +XXX,XX @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) | ||
75 | set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags)); | ||
76 | } | ||
77 | |||
78 | -static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
79 | +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
80 | { | ||
81 | pud_t *pud; | ||
82 | pmd_t *pmd; | ||
83 | @@ -XXX,XX +XXX,XX @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd) | ||
84 | set_pte(pte, __pte(ppd->paddr | ppd->pte_flags)); | ||
85 | } | ||
86 | |||
87 | -static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
88 | +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
89 | { | ||
90 | while (ppd->vaddr < ppd->vaddr_end) { | ||
91 | sme_populate_pgd_large(ppd); | ||
92 | @@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) | ||
93 | } | ||
94 | } | ||
95 | |||
96 | -static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
97 | +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
98 | { | ||
99 | while (ppd->vaddr < ppd->vaddr_end) { | ||
100 | sme_populate_pgd(ppd); | ||
101 | @@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd) | ||
102 | } | ||
103 | } | ||
104 | |||
105 | -static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
106 | +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
107 | pmdval_t pmd_flags, pteval_t pte_flags) | ||
108 | { | ||
109 | unsigned long vaddr_end; | ||
110 | @@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range(struct sme_populate_pgd_data *ppd, | ||
111 | __sme_map_range_pte(ppd); | ||
112 | } | ||
113 | |||
114 | -static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
115 | +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) | ||
116 | { | ||
117 | __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); | ||
118 | } | ||
119 | |||
120 | -static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
121 | +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) | ||
122 | { | ||
123 | __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); | ||
124 | } | ||
125 | |||
126 | -static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
127 | +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) | ||
128 | { | ||
129 | __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); | ||
130 | } | ||
131 | |||
132 | -static unsigned long __head sme_pgtable_calc(unsigned long len) | ||
133 | +static unsigned long __init sme_pgtable_calc(unsigned long len) | ||
134 | { | ||
135 | unsigned long entries = 0, tables = 0; | ||
136 | |||
137 | @@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_pgtable_calc(unsigned long len) | ||
138 | return entries + tables; | ||
139 | } | ||
140 | |||
141 | -void __head sme_encrypt_kernel(struct boot_params *bp) | ||
142 | +void __init sme_encrypt_kernel(struct boot_params *bp) | ||
143 | { | ||
144 | unsigned long workarea_start, workarea_end, workarea_len; | ||
145 | unsigned long execute_start, execute_end, execute_len; | ||
146 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
147 | * instrumentation or checking boot_cpu_data in the cc_platform_has() | ||
148 | * function. | ||
149 | */ | ||
150 | - if (!sme_get_me_mask() || | ||
151 | - RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) | ||
152 | + if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) | ||
153 | return; | ||
154 | |||
155 | /* | ||
156 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
157 | * memory from being cached. | ||
158 | */ | ||
159 | |||
160 | - kernel_start = (unsigned long)RIP_REL_REF(_text); | ||
161 | - kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE); | ||
162 | + kernel_start = (unsigned long)_text; | ||
163 | + kernel_end = ALIGN((unsigned long)_end, PMD_SIZE); | ||
164 | kernel_len = kernel_end - kernel_start; | ||
165 | |||
166 | initrd_start = 0; | ||
167 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
168 | * pagetable structures for the encryption of the kernel | ||
169 | * pagetable structures for workarea (in case not currently mapped) | ||
170 | */ | ||
171 | - execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea); | ||
172 | + execute_start = workarea_start = (unsigned long)sme_workarea; | ||
173 | execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE; | ||
174 | execute_len = execute_end - execute_start; | ||
175 | |||
176 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
177 | native_write_cr3(__native_read_cr3()); | ||
178 | } | ||
179 | |||
180 | -void __head sme_enable(struct boot_params *bp) | ||
181 | +void __init sme_enable(struct boot_params *bp) | ||
182 | { | ||
183 | unsigned int eax, ebx, ecx, edx; | ||
184 | unsigned long feature_mask; | ||
185 | @@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp) | ||
186 | me_mask = 1UL << (ebx & 0x3f); | ||
187 | |||
188 | /* Check the SEV MSR whether SEV or SME is enabled */ | ||
189 | - RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); | ||
190 | + sev_status = msr = __rdmsr(MSR_AMD64_SEV); | ||
191 | feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
192 | |||
193 | /* | ||
194 | @@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp) | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | - RIP_REL_REF(sme_me_mask) = me_mask; | ||
199 | - RIP_REL_REF(physical_mask) &= ~me_mask; | ||
200 | - RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD; | ||
201 | + sme_me_mask = me_mask; | ||
202 | + physical_mask &= ~me_mask; | ||
203 | + cc_vendor = CC_VENDOR_AMD; | ||
204 | cc_set_mask(me_mask); | ||
205 | } | ||
206 | diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h | ||
207 | index XXXXXXX..XXXXXXX 100644 | ||
208 | --- a/arch/x86/include/asm/mem_encrypt.h | ||
209 | +++ b/arch/x86/include/asm/mem_encrypt.h | ||
210 | @@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void); | ||
211 | |||
212 | static inline u64 sme_get_me_mask(void) | ||
213 | { | ||
214 | - return RIP_REL_REF(sme_me_mask); | ||
215 | + return sme_me_mask; | ||
216 | } | ||
217 | |||
218 | #define __bss_decrypted __section(".bss..decrypted") | ||
219 | diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile | 43 | diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile |
220 | index XXXXXXX..XXXXXXX 100644 | 44 | index XXXXXXX..XXXXXXX 100644 |
221 | --- a/arch/x86/mm/Makefile | 45 | --- a/arch/x86/mm/Makefile |
222 | +++ b/arch/x86/mm/Makefile | 46 | +++ b/arch/x86/mm/Makefile |
223 | @@ -XXX,XX +XXX,XX @@ | 47 | @@ -XXX,XX +XXX,XX @@ |
... | ... | ||
259 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o | 83 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o |
260 | 84 | ||
261 | -obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o | 85 | -obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o |
262 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o | 86 | obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o |
263 | -- | 87 | -- |
264 | 2.49.0.472.ge94155a9ec-goog | 88 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Ard Biesheuvel <ardb@kernel.org> | ||
1 | 2 | ||
3 | RIP_REL_REF() has no effect on code residing in arch/x86/boot/startup, | ||
4 | as it is built with -fPIC. So remove any occurrences from the SME | ||
5 | startup code. | ||
6 | |||
7 | Note the SME is the only caller of cc_set_mask() that requires this, so | ||
8 | drop it from there as well. | ||
9 | |||
10 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
11 | --- | ||
12 | arch/x86/boot/startup/sme.c | 11 +++++------ | ||
13 | arch/x86/include/asm/coco.h | 2 +- | ||
14 | arch/x86/include/asm/mem_encrypt.h | 2 +- | ||
15 | 3 files changed, 7 insertions(+), 8 deletions(-) | ||
16 | |||
17 | diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/arch/x86/boot/startup/sme.c | ||
20 | +++ b/arch/x86/boot/startup/sme.c | ||
21 | @@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp) | ||
22 | * instrumentation or checking boot_cpu_data in the cc_platform_has() | ||
23 | * function. | ||
24 | */ | ||
25 | - if (!sme_get_me_mask() || | ||
26 | - RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED) | ||
27 | + if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED) | ||
28 | return; | ||
29 | |||
30 | /* | ||
31 | @@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp) | ||
32 | me_mask = 1UL << (ebx & 0x3f); | ||
33 | |||
34 | /* Check the SEV MSR whether SEV or SME is enabled */ | ||
35 | - RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV); | ||
36 | + sev_status = msr = __rdmsr(MSR_AMD64_SEV); | ||
37 | feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT; | ||
38 | |||
39 | /* | ||
40 | @@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp) | ||
41 | return; | ||
42 | } | ||
43 | |||
44 | - RIP_REL_REF(sme_me_mask) = me_mask; | ||
45 | - RIP_REL_REF(physical_mask) &= ~me_mask; | ||
46 | - RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD; | ||
47 | + sme_me_mask = me_mask; | ||
48 | + physical_mask &= ~me_mask; | ||
49 | + cc_vendor = CC_VENDOR_AMD; | ||
50 | cc_set_mask(me_mask); | ||
51 | } | ||
52 | diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/arch/x86/include/asm/coco.h | ||
55 | +++ b/arch/x86/include/asm/coco.h | ||
56 | @@ -XXX,XX +XXX,XX @@ static inline u64 cc_get_mask(void) | ||
57 | |||
58 | static inline void cc_set_mask(u64 mask) | ||
59 | { | ||
60 | - RIP_REL_REF(cc_mask) = mask; | ||
61 | + cc_mask = mask; | ||
62 | } | ||
63 | |||
64 | u64 cc_mkenc(u64 val); | ||
65 | diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/arch/x86/include/asm/mem_encrypt.h | ||
68 | +++ b/arch/x86/include/asm/mem_encrypt.h | ||
69 | @@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void); | ||
70 | |||
71 | static inline u64 sme_get_me_mask(void) | ||
72 | { | ||
73 | - return RIP_REL_REF(sme_me_mask); | ||
74 | + return sme_me_mask; | ||
75 | } | ||
76 | |||
77 | #define __bss_decrypted __section(".bss..decrypted") | ||
78 | -- | ||
79 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Ard Biesheuvel <ardb@kernel.org> | ||
1 | 2 | ||
3 | Prepare for splitting off parts of the SEV core.c source file into a | ||
4 | file that carries code that must tolerate being called from the early | ||
5 | 1:1 mapping. This will allow special build-time handling of thise code, | ||
6 | to ensure that it gets generated in a way that is compatible with the | ||
7 | early execution context. | ||
8 | |||
9 | So create a de-facto internal SEV API and put the definitions into | ||
10 | sev-internal.h. No attempt is made to allow this header file to be | ||
11 | included in arbitrary other sources - this is explicitly not the intent. | ||
12 | |||
13 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
14 | --- | ||
15 | arch/x86/boot/compressed/sev.c | 15 ++- | ||
16 | arch/x86/coco/sev/core.c | 108 +++-------------- | ||
17 | arch/x86/coco/sev/shared.c | 64 ++-------- | ||
18 | arch/x86/include/asm/sev-internal.h | 122 ++++++++++++++++++++ | ||
19 | arch/x86/include/asm/sev.h | 37 ++++++ | ||
20 | 5 files changed, 194 insertions(+), 152 deletions(-) | ||
21 | |||
22 | diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/arch/x86/boot/compressed/sev.c | ||
25 | +++ b/arch/x86/boot/compressed/sev.c | ||
26 | @@ -XXX,XX +XXX,XX @@ static bool fault_in_kernel_space(unsigned long address) | ||
27 | #include "../../lib/inat.c" | ||
28 | #include "../../lib/insn.c" | ||
29 | |||
30 | -/* Include code for early handlers */ | ||
31 | -#include "../../coco/sev/shared.c" | ||
32 | +extern struct svsm_ca *boot_svsm_caa; | ||
33 | +extern u64 boot_svsm_caa_pa; | ||
34 | |||
35 | -static struct svsm_ca *svsm_get_caa(void) | ||
36 | +struct svsm_ca *svsm_get_caa(void) | ||
37 | { | ||
38 | return boot_svsm_caa; | ||
39 | } | ||
40 | |||
41 | -static u64 svsm_get_caa_pa(void) | ||
42 | +u64 svsm_get_caa_pa(void) | ||
43 | { | ||
44 | return boot_svsm_caa_pa; | ||
45 | } | ||
46 | |||
47 | -static int svsm_perform_call_protocol(struct svsm_call *call) | ||
48 | +int svsm_perform_call_protocol(struct svsm_call *call); | ||
49 | + | ||
50 | +/* Include code for early handlers */ | ||
51 | +#include "../../coco/sev/shared.c" | ||
52 | + | ||
53 | +int svsm_perform_call_protocol(struct svsm_call *call) | ||
54 | { | ||
55 | struct ghcb *ghcb; | ||
56 | int ret; | ||
57 | diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c | ||
58 | index XXXXXXX..XXXXXXX 100644 | ||
59 | --- a/arch/x86/coco/sev/core.c | ||
60 | +++ b/arch/x86/coco/sev/core.c | ||
61 | @@ -XXX,XX +XXX,XX @@ | ||
62 | #include <asm/cpu_entry_area.h> | ||
63 | #include <asm/stacktrace.h> | ||
64 | #include <asm/sev.h> | ||
65 | +#include <asm/sev-internal.h> | ||
66 | #include <asm/insn-eval.h> | ||
67 | #include <asm/fpu/xcr.h> | ||
68 | #include <asm/processor.h> | ||
69 | @@ -XXX,XX +XXX,XX @@ | ||
70 | #include <asm/cpuid.h> | ||
71 | #include <asm/cmdline.h> | ||
72 | |||
73 | -#define DR7_RESET_VALUE 0x400 | ||
74 | - | ||
75 | /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ | ||
76 | #define AP_INIT_CS_LIMIT 0xffff | ||
77 | #define AP_INIT_DS_LIMIT 0xffff | ||
78 | @@ -XXX,XX +XXX,XX @@ static const char * const sev_status_feat_names[] = { | ||
79 | }; | ||
80 | |||
81 | /* For early boot hypervisor communication in SEV-ES enabled guests */ | ||
82 | -static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | ||
83 | +struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | ||
84 | |||
85 | /* | ||
86 | * Needs to be in the .data section because we need it NULL before bss is | ||
87 | * cleared | ||
88 | */ | ||
89 | -static struct ghcb *boot_ghcb __section(".data"); | ||
90 | +struct ghcb *boot_ghcb __section(".data"); | ||
91 | |||
92 | /* Bitmap of SEV features supported by the hypervisor */ | ||
93 | -static u64 sev_hv_features __ro_after_init; | ||
94 | +u64 sev_hv_features __ro_after_init; | ||
95 | |||
96 | /* Secrets page physical address from the CC blob */ | ||
97 | static u64 secrets_pa __ro_after_init; | ||
98 | @@ -XXX,XX +XXX,XX @@ static u64 snp_tsc_scale __ro_after_init; | ||
99 | static u64 snp_tsc_offset __ro_after_init; | ||
100 | static u64 snp_tsc_freq_khz __ro_after_init; | ||
101 | |||
102 | -/* #VC handler runtime per-CPU data */ | ||
103 | -struct sev_es_runtime_data { | ||
104 | - struct ghcb ghcb_page; | ||
105 | - | ||
106 | - /* | ||
107 | - * Reserve one page per CPU as backup storage for the unencrypted GHCB. | ||
108 | - * It is needed when an NMI happens while the #VC handler uses the real | ||
109 | - * GHCB, and the NMI handler itself is causing another #VC exception. In | ||
110 | - * that case the GHCB content of the first handler needs to be backed up | ||
111 | - * and restored. | ||
112 | - */ | ||
113 | - struct ghcb backup_ghcb; | ||
114 | - | ||
115 | - /* | ||
116 | - * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. | ||
117 | - * There is no need for it to be atomic, because nothing is written to | ||
118 | - * the GHCB between the read and the write of ghcb_active. So it is safe | ||
119 | - * to use it when a nested #VC exception happens before the write. | ||
120 | - * | ||
121 | - * This is necessary for example in the #VC->NMI->#VC case when the NMI | ||
122 | - * happens while the first #VC handler uses the GHCB. When the NMI code | ||
123 | - * raises a second #VC handler it might overwrite the contents of the | ||
124 | - * GHCB written by the first handler. To avoid this the content of the | ||
125 | - * GHCB is saved and restored when the GHCB is detected to be in use | ||
126 | - * already. | ||
127 | - */ | ||
128 | - bool ghcb_active; | ||
129 | - bool backup_ghcb_active; | ||
130 | - | ||
131 | - /* | ||
132 | - * Cached DR7 value - write it on DR7 writes and return it on reads. | ||
133 | - * That value will never make it to the real hardware DR7 as debugging | ||
134 | - * is currently unsupported in SEV-ES guests. | ||
135 | - */ | ||
136 | - unsigned long dr7; | ||
137 | -}; | ||
138 | - | ||
139 | -struct ghcb_state { | ||
140 | - struct ghcb *ghcb; | ||
141 | -}; | ||
142 | |||
143 | /* For early boot SVSM communication */ | ||
144 | -static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); | ||
145 | +struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); | ||
146 | |||
147 | -static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | ||
148 | -static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); | ||
149 | -static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); | ||
150 | -static DEFINE_PER_CPU(u64, svsm_caa_pa); | ||
151 | +DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | ||
152 | +DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); | ||
153 | +DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); | ||
154 | +DEFINE_PER_CPU(u64, svsm_caa_pa); | ||
155 | |||
156 | static __always_inline bool on_vc_stack(struct pt_regs *regs) | ||
157 | { | ||
158 | @@ -XXX,XX +XXX,XX @@ void noinstr __sev_es_ist_exit(void) | ||
159 | * | ||
160 | * Callers must disable local interrupts around it. | ||
161 | */ | ||
162 | -static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | ||
163 | +noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | ||
164 | { | ||
165 | struct sev_es_runtime_data *data; | ||
166 | struct ghcb *ghcb; | ||
167 | @@ -XXX,XX +XXX,XX @@ static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | ||
168 | return ghcb; | ||
169 | } | ||
170 | |||
171 | -static inline u64 sev_es_rd_ghcb_msr(void) | ||
172 | -{ | ||
173 | - return __rdmsr(MSR_AMD64_SEV_ES_GHCB); | ||
174 | -} | ||
175 | - | ||
176 | -static __always_inline void sev_es_wr_ghcb_msr(u64 val) | ||
177 | -{ | ||
178 | - u32 low, high; | ||
179 | - | ||
180 | - low = (u32)(val); | ||
181 | - high = (u32)(val >> 32); | ||
182 | - | ||
183 | - native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); | ||
184 | -} | ||
185 | - | ||
186 | static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | ||
187 | unsigned char *buffer) | ||
188 | { | ||
189 | @@ -XXX,XX +XXX,XX @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) | ||
190 | /* Include code shared with pre-decompression boot stage */ | ||
191 | #include "shared.c" | ||
192 | |||
193 | -static inline struct svsm_ca *svsm_get_caa(void) | ||
194 | -{ | ||
195 | - /* | ||
196 | - * Use rIP-relative references when called early in the boot. If | ||
197 | - * ->use_cas is set, then it is late in the boot and no need | ||
198 | - * to worry about rIP-relative references. | ||
199 | - */ | ||
200 | - if (RIP_REL_REF(sev_cfg).use_cas) | ||
201 | - return this_cpu_read(svsm_caa); | ||
202 | - else | ||
203 | - return RIP_REL_REF(boot_svsm_caa); | ||
204 | -} | ||
205 | - | ||
206 | -static u64 svsm_get_caa_pa(void) | ||
207 | -{ | ||
208 | - /* | ||
209 | - * Use rIP-relative references when called early in the boot. If | ||
210 | - * ->use_cas is set, then it is late in the boot and no need | ||
211 | - * to worry about rIP-relative references. | ||
212 | - */ | ||
213 | - if (RIP_REL_REF(sev_cfg).use_cas) | ||
214 | - return this_cpu_read(svsm_caa_pa); | ||
215 | - else | ||
216 | - return RIP_REL_REF(boot_svsm_caa_pa); | ||
217 | -} | ||
218 | - | ||
219 | -static noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
220 | +noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
221 | { | ||
222 | struct sev_es_runtime_data *data; | ||
223 | struct ghcb *ghcb; | ||
224 | @@ -XXX,XX +XXX,XX @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
225 | } | ||
226 | } | ||
227 | |||
228 | -static int svsm_perform_call_protocol(struct svsm_call *call) | ||
229 | +int svsm_perform_call_protocol(struct svsm_call *call) | ||
230 | { | ||
231 | struct ghcb_state state; | ||
232 | unsigned long flags; | ||
233 | @@ -XXX,XX +XXX,XX @@ static u64 __init get_jump_table_addr(void) | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | -static void __head | ||
238 | +void __head | ||
239 | early_set_pages_state(unsigned long vaddr, unsigned long paddr, | ||
240 | unsigned long npages, enum psc_op op) | ||
241 | { | ||
242 | diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c | ||
243 | index XXXXXXX..XXXXXXX 100644 | ||
244 | --- a/arch/x86/coco/sev/shared.c | ||
245 | +++ b/arch/x86/coco/sev/shared.c | ||
246 | @@ -XXX,XX +XXX,XX @@ | ||
247 | */ | ||
248 | u8 snp_vmpl __ro_after_init; | ||
249 | EXPORT_SYMBOL_GPL(snp_vmpl); | ||
250 | -static struct svsm_ca *boot_svsm_caa __ro_after_init; | ||
251 | -static u64 boot_svsm_caa_pa __ro_after_init; | ||
252 | - | ||
253 | -static struct svsm_ca *svsm_get_caa(void); | ||
254 | -static u64 svsm_get_caa_pa(void); | ||
255 | -static int svsm_perform_call_protocol(struct svsm_call *call); | ||
256 | +struct svsm_ca *boot_svsm_caa __ro_after_init; | ||
257 | +u64 boot_svsm_caa_pa __ro_after_init; | ||
258 | |||
259 | /* I/O parameters for CPUID-related helpers */ | ||
260 | struct cpuid_leaf { | ||
261 | @@ -XXX,XX +XXX,XX @@ struct cpuid_leaf { | ||
262 | u32 edx; | ||
263 | }; | ||
264 | |||
265 | -/* | ||
266 | - * Individual entries of the SNP CPUID table, as defined by the SNP | ||
267 | - * Firmware ABI, Revision 0.9, Section 7.1, Table 14. | ||
268 | - */ | ||
269 | -struct snp_cpuid_fn { | ||
270 | - u32 eax_in; | ||
271 | - u32 ecx_in; | ||
272 | - u64 xcr0_in; | ||
273 | - u64 xss_in; | ||
274 | - u32 eax; | ||
275 | - u32 ebx; | ||
276 | - u32 ecx; | ||
277 | - u32 edx; | ||
278 | - u64 __reserved; | ||
279 | -} __packed; | ||
280 | - | ||
281 | -/* | ||
282 | - * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, | ||
283 | - * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit | ||
284 | - * of 64 entries per CPUID table. | ||
285 | - */ | ||
286 | -#define SNP_CPUID_COUNT_MAX 64 | ||
287 | - | ||
288 | -struct snp_cpuid_table { | ||
289 | - u32 count; | ||
290 | - u32 __reserved1; | ||
291 | - u64 __reserved2; | ||
292 | - struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; | ||
293 | -} __packed; | ||
294 | - | ||
295 | /* | ||
296 | * Since feature negotiation related variables are set early in the boot | ||
297 | * process they must reside in the .data section so as not to be zeroed | ||
298 | @@ -XXX,XX +XXX,XX @@ static u32 cpuid_std_range_max __ro_after_init; | ||
299 | static u32 cpuid_hyp_range_max __ro_after_init; | ||
300 | static u32 cpuid_ext_range_max __ro_after_init; | ||
301 | |||
302 | -static bool __init sev_es_check_cpu_features(void) | ||
303 | +bool __init sev_es_check_cpu_features(void) | ||
304 | { | ||
305 | if (!has_cpuflag(X86_FEATURE_RDRAND)) { | ||
306 | error("RDRAND instruction not supported - no trusted source of randomness available\n"); | ||
307 | @@ -XXX,XX +XXX,XX @@ static bool __init sev_es_check_cpu_features(void) | ||
308 | return true; | ||
309 | } | ||
310 | |||
311 | -static void __head __noreturn | ||
312 | +void __head __noreturn | ||
313 | sev_es_terminate(unsigned int set, unsigned int reason) | ||
314 | { | ||
315 | u64 val = GHCB_MSR_TERM_REQ; | ||
316 | @@ -XXX,XX +XXX,XX @@ sev_es_terminate(unsigned int set, unsigned int reason) | ||
317 | /* | ||
318 | * The hypervisor features are available from GHCB version 2 onward. | ||
319 | */ | ||
320 | -static u64 get_hv_features(void) | ||
321 | +u64 get_hv_features(void) | ||
322 | { | ||
323 | u64 val; | ||
324 | |||
325 | @@ -XXX,XX +XXX,XX @@ static u64 get_hv_features(void) | ||
326 | return GHCB_MSR_HV_FT_RESP_VAL(val); | ||
327 | } | ||
328 | |||
329 | -static void snp_register_ghcb_early(unsigned long paddr) | ||
330 | +void snp_register_ghcb_early(unsigned long paddr) | ||
331 | { | ||
332 | unsigned long pfn = paddr >> PAGE_SHIFT; | ||
333 | u64 val; | ||
334 | @@ -XXX,XX +XXX,XX @@ static void snp_register_ghcb_early(unsigned long paddr) | ||
335 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER); | ||
336 | } | ||
337 | |||
338 | -static bool sev_es_negotiate_protocol(void) | ||
339 | +bool sev_es_negotiate_protocol(void) | ||
340 | { | ||
341 | u64 val; | ||
342 | |||
343 | @@ -XXX,XX +XXX,XX @@ static bool sev_es_negotiate_protocol(void) | ||
344 | return true; | ||
345 | } | ||
346 | |||
347 | -static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) | ||
348 | -{ | ||
349 | - ghcb->save.sw_exit_code = 0; | ||
350 | - __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); | ||
351 | -} | ||
352 | - | ||
353 | static bool vc_decoding_needed(unsigned long exit_code) | ||
354 | { | ||
355 | /* Exceptions don't require to decode the instruction */ | ||
356 | @@ -XXX,XX +XXX,XX @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) | ||
357 | return svsm_process_result_codes(call); | ||
358 | } | ||
359 | |||
360 | -static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, | ||
361 | - struct es_em_ctxt *ctxt, | ||
362 | - u64 exit_code, u64 exit_info_1, | ||
363 | - u64 exit_info_2) | ||
364 | +enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, | ||
365 | + struct es_em_ctxt *ctxt, | ||
366 | + u64 exit_code, u64 exit_info_1, | ||
367 | + u64 exit_info_2) | ||
368 | { | ||
369 | /* Fill in protocol and format specifiers */ | ||
370 | ghcb->protocol_version = ghcb_version; | ||
371 | @@ -XXX,XX +XXX,XX @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid | ||
372 | * while running with the initial identity mapping as well as the | ||
373 | * switch-over to kernel virtual addresses later. | ||
374 | */ | ||
375 | -static const struct snp_cpuid_table *snp_cpuid_get_table(void) | ||
376 | +const struct snp_cpuid_table *snp_cpuid_get_table(void) | ||
377 | { | ||
378 | return rip_rel_ptr(&cpuid_table_copy); | ||
379 | } | ||
380 | diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h | ||
381 | new file mode 100644 | ||
382 | index XXXXXXX..XXXXXXX | ||
383 | --- /dev/null | ||
384 | +++ b/arch/x86/include/asm/sev-internal.h | ||
385 | @@ -XXX,XX +XXX,XX @@ | ||
386 | +/* SPDX-License-Identifier: GPL-2.0 */ | ||
387 | + | ||
388 | +#define DR7_RESET_VALUE 0x400 | ||
389 | + | ||
390 | +extern struct ghcb boot_ghcb_page; | ||
391 | +extern struct ghcb *boot_ghcb; | ||
392 | +extern u64 sev_hv_features; | ||
393 | + | ||
394 | +/* #VC handler runtime per-CPU data */ | ||
395 | +struct sev_es_runtime_data { | ||
396 | + struct ghcb ghcb_page; | ||
397 | + | ||
398 | + /* | ||
399 | + * Reserve one page per CPU as backup storage for the unencrypted GHCB. | ||
400 | + * It is needed when an NMI happens while the #VC handler uses the real | ||
401 | + * GHCB, and the NMI handler itself is causing another #VC exception. In | ||
402 | + * that case the GHCB content of the first handler needs to be backed up | ||
403 | + * and restored. | ||
404 | + */ | ||
405 | + struct ghcb backup_ghcb; | ||
406 | + | ||
407 | + /* | ||
408 | + * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. | ||
409 | + * There is no need for it to be atomic, because nothing is written to | ||
410 | + * the GHCB between the read and the write of ghcb_active. So it is safe | ||
411 | + * to use it when a nested #VC exception happens before the write. | ||
412 | + * | ||
413 | + * This is necessary for example in the #VC->NMI->#VC case when the NMI | ||
414 | + * happens while the first #VC handler uses the GHCB. When the NMI code | ||
415 | + * raises a second #VC handler it might overwrite the contents of the | ||
416 | + * GHCB written by the first handler. To avoid this the content of the | ||
417 | + * GHCB is saved and restored when the GHCB is detected to be in use | ||
418 | + * already. | ||
419 | + */ | ||
420 | + bool ghcb_active; | ||
421 | + bool backup_ghcb_active; | ||
422 | + | ||
423 | + /* | ||
424 | + * Cached DR7 value - write it on DR7 writes and return it on reads. | ||
425 | + * That value will never make it to the real hardware DR7 as debugging | ||
426 | + * is currently unsupported in SEV-ES guests. | ||
427 | + */ | ||
428 | + unsigned long dr7; | ||
429 | +}; | ||
430 | + | ||
431 | +struct ghcb_state { | ||
432 | + struct ghcb *ghcb; | ||
433 | +}; | ||
434 | + | ||
435 | +extern struct svsm_ca boot_svsm_ca_page; | ||
436 | + | ||
437 | +struct ghcb *__sev_get_ghcb(struct ghcb_state *state); | ||
438 | +void __sev_put_ghcb(struct ghcb_state *state); | ||
439 | + | ||
440 | +DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | ||
441 | +DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa); | ||
442 | + | ||
443 | +void early_set_pages_state(unsigned long vaddr, unsigned long paddr, | ||
444 | + unsigned long npages, enum psc_op op); | ||
445 | + | ||
446 | +void __noreturn sev_es_terminate(unsigned int set, unsigned int reason); | ||
447 | + | ||
448 | +DECLARE_PER_CPU(struct svsm_ca *, svsm_caa); | ||
449 | +DECLARE_PER_CPU(u64, svsm_caa_pa); | ||
450 | + | ||
451 | +extern struct svsm_ca *boot_svsm_caa; | ||
452 | +extern u64 boot_svsm_caa_pa; | ||
453 | + | ||
454 | +static __always_inline struct svsm_ca *svsm_get_caa(void) | ||
455 | +{ | ||
456 | + /* | ||
457 | + * Use rIP-relative references when called early in the boot. If | ||
458 | + * ->use_cas is set, then it is late in the boot and no need | ||
459 | + * to worry about rIP-relative references. | ||
460 | + */ | ||
461 | + if (RIP_REL_REF(sev_cfg).use_cas) | ||
462 | + return this_cpu_read(svsm_caa); | ||
463 | + else | ||
464 | + return RIP_REL_REF(boot_svsm_caa); | ||
465 | +} | ||
466 | + | ||
467 | +static __always_inline u64 svsm_get_caa_pa(void) | ||
468 | +{ | ||
469 | + /* | ||
470 | + * Use rIP-relative references when called early in the boot. If | ||
471 | + * ->use_cas is set, then it is late in the boot and no need | ||
472 | + * to worry about rIP-relative references. | ||
473 | + */ | ||
474 | + if (RIP_REL_REF(sev_cfg).use_cas) | ||
475 | + return this_cpu_read(svsm_caa_pa); | ||
476 | + else | ||
477 | + return RIP_REL_REF(boot_svsm_caa_pa); | ||
478 | +} | ||
479 | + | ||
480 | +int svsm_perform_call_protocol(struct svsm_call *call); | ||
481 | + | ||
482 | +static inline u64 sev_es_rd_ghcb_msr(void) | ||
483 | +{ | ||
484 | + return __rdmsr(MSR_AMD64_SEV_ES_GHCB); | ||
485 | +} | ||
486 | + | ||
487 | +static __always_inline void sev_es_wr_ghcb_msr(u64 val) | ||
488 | +{ | ||
489 | + u32 low, high; | ||
490 | + | ||
491 | + low = (u32)(val); | ||
492 | + high = (u32)(val >> 32); | ||
493 | + | ||
494 | + native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); | ||
495 | +} | ||
496 | + | ||
497 | +enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, | ||
498 | + struct es_em_ctxt *ctxt, | ||
499 | + u64 exit_code, u64 exit_info_1, | ||
500 | + u64 exit_info_2); | ||
501 | + | ||
502 | +void snp_register_ghcb_early(unsigned long paddr); | ||
503 | +bool sev_es_negotiate_protocol(void); | ||
504 | +bool sev_es_check_cpu_features(void); | ||
505 | +u64 get_hv_features(void); | ||
506 | + | ||
507 | +const struct snp_cpuid_table *snp_cpuid_get_table(void); | ||
508 | diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h | ||
509 | index XXXXXXX..XXXXXXX 100644 | ||
510 | --- a/arch/x86/include/asm/sev.h | ||
511 | +++ b/arch/x86/include/asm/sev.h | ||
512 | @@ -XXX,XX +XXX,XX @@ | ||
513 | #include <asm/sev-common.h> | ||
514 | #include <asm/coco.h> | ||
515 | #include <asm/set_memory.h> | ||
516 | +#include <asm/svm.h> | ||
517 | |||
518 | #define GHCB_PROTOCOL_MIN 1ULL | ||
519 | #define GHCB_PROTOCOL_MAX 2ULL | ||
520 | @@ -XXX,XX +XXX,XX @@ extern void vc_no_ghcb(void); | ||
521 | extern void vc_boot_ghcb(void); | ||
522 | extern bool handle_vc_boot_ghcb(struct pt_regs *regs); | ||
523 | |||
524 | +/* | ||
525 | + * Individual entries of the SNP CPUID table, as defined by the SNP | ||
526 | + * Firmware ABI, Revision 0.9, Section 7.1, Table 14. | ||
527 | + */ | ||
528 | +struct snp_cpuid_fn { | ||
529 | + u32 eax_in; | ||
530 | + u32 ecx_in; | ||
531 | + u64 xcr0_in; | ||
532 | + u64 xss_in; | ||
533 | + u32 eax; | ||
534 | + u32 ebx; | ||
535 | + u32 ecx; | ||
536 | + u32 edx; | ||
537 | + u64 __reserved; | ||
538 | +} __packed; | ||
539 | + | ||
540 | +/* | ||
541 | + * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9, | ||
542 | + * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit | ||
543 | + * of 64 entries per CPUID table. | ||
544 | + */ | ||
545 | +#define SNP_CPUID_COUNT_MAX 64 | ||
546 | + | ||
547 | +struct snp_cpuid_table { | ||
548 | + u32 count; | ||
549 | + u32 __reserved1; | ||
550 | + u64 __reserved2; | ||
551 | + struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX]; | ||
552 | +} __packed; | ||
553 | + | ||
554 | /* PVALIDATE return codes */ | ||
555 | #define PVALIDATE_FAIL_SIZEMISMATCH 6 | ||
556 | |||
557 | @@ -XXX,XX +XXX,XX @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req | ||
558 | void __init snp_secure_tsc_prepare(void); | ||
559 | void __init snp_secure_tsc_init(void); | ||
560 | |||
561 | +static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) | ||
562 | +{ | ||
563 | + ghcb->save.sw_exit_code = 0; | ||
564 | + __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); | ||
565 | +} | ||
566 | + | ||
567 | #else /* !CONFIG_AMD_MEM_ENCRYPT */ | ||
568 | |||
569 | #define snp_vmpl 0 | ||
570 | -- | ||
571 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | The 5-level paging trampoline is used by both the EFI stub and the | 3 | Disentangle the SEV core code and the SEV code that is called during |
4 | traditional decompressor. Move it out of the decompressor sources into | 4 | early boot. The latter piece will be moved into startup/ in a subsequent |
5 | the newly minted arch/x86/boot/startup/ sub-directory which will hold | 5 | patch. |
6 | startup code that may be shared between the decompressor, the EFI stub | ||
7 | and the kernel proper, and needs to tolerate being called during early | ||
8 | boot, before the kernel virtual mapping has been created. | ||
9 | |||
10 | This will allow the 5-level paging trampoline to be used by EFI boot | ||
11 | images such as zboot that omit the traditional decompressor entirely. | ||
12 | 6 | ||
13 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 7 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
14 | --- | 8 | --- |
15 | arch/x86/Makefile | 1 + | 9 | arch/x86/boot/compressed/sev.c | 2 + |
16 | arch/x86/boot/compressed/Makefile | 2 +- | 10 | arch/x86/coco/sev/Makefile | 12 +- |
17 | arch/x86/boot/startup/Makefile | 3 +++ | 11 | arch/x86/coco/sev/core.c | 1574 ++++---------------- |
18 | arch/x86/boot/{compressed => startup}/la57toggle.S | 0 | 12 | arch/x86/coco/sev/shared.c | 281 ---- |
19 | 4 files changed, 5 insertions(+), 1 deletion(-) | 13 | arch/x86/coco/sev/startup.c | 1395 +++++++++++++++++ |
14 | 5 files changed, 1658 insertions(+), 1606 deletions(-) | ||
20 | 15 | ||
21 | diff --git a/arch/x86/Makefile b/arch/x86/Makefile | 16 | diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c |
22 | index XXXXXXX..XXXXXXX 100644 | 17 | index XXXXXXX..XXXXXXX 100644 |
23 | --- a/arch/x86/Makefile | 18 | --- a/arch/x86/boot/compressed/sev.c |
24 | +++ b/arch/x86/Makefile | 19 | +++ b/arch/x86/boot/compressed/sev.c |
25 | @@ -XXX,XX +XXX,XX @@ archprepare: $(cpufeaturemasks.hdr) | 20 | @@ -XXX,XX +XXX,XX @@ u64 svsm_get_caa_pa(void) |
26 | ### | 21 | |
27 | # Kernel objects | 22 | int svsm_perform_call_protocol(struct svsm_call *call); |
28 | 23 | ||
29 | +core-y += arch/x86/boot/startup/ | 24 | +u8 snp_vmpl; |
30 | libs-y += arch/x86/lib/ | 25 | + |
31 | 26 | /* Include code for early handlers */ | |
32 | # drivers-y are linked after core-y | 27 | #include "../../coco/sev/shared.c" |
33 | diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile | 28 | |
29 | diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile | ||
34 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
35 | --- a/arch/x86/boot/compressed/Makefile | 31 | --- a/arch/x86/coco/sev/Makefile |
36 | +++ b/arch/x86/boot/compressed/Makefile | 32 | +++ b/arch/x86/coco/sev/Makefile |
37 | @@ -XXX,XX +XXX,XX @@ ifdef CONFIG_X86_64 | 33 | @@ -XXX,XX +XXX,XX @@ |
38 | vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o | 34 | # SPDX-License-Identifier: GPL-2.0 |
39 | vmlinux-objs-y += $(obj)/pgtable_64.o | 35 | |
40 | vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o | 36 | -obj-y += core.o |
41 | - vmlinux-objs-y += $(obj)/la57toggle.o | 37 | +obj-y += core.o startup.o |
38 | |||
39 | # jump tables are emitted using absolute references in non-PIC code | ||
40 | # so they cannot be used in the early SEV startup code | ||
41 | -CFLAGS_core.o += -fno-jump-tables | ||
42 | +CFLAGS_startup.o += -fno-jump-tables | ||
43 | |||
44 | ifdef CONFIG_FUNCTION_TRACER | ||
45 | -CFLAGS_REMOVE_core.o = -pg | ||
46 | +CFLAGS_REMOVE_startup.o = -pg | ||
42 | endif | 47 | endif |
43 | 48 | ||
44 | vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o | 49 | -KASAN_SANITIZE_core.o := n |
45 | @@ -XXX,XX +XXX,XX @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o | 50 | -KMSAN_SANITIZE_core.o := n |
46 | 51 | -KCOV_INSTRUMENT_core.o := n | |
47 | vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o | 52 | +KASAN_SANITIZE_startup.o := n |
48 | vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a | 53 | +KMSAN_SANITIZE_startup.o := n |
49 | +vmlinux-libs-$(CONFIG_X86_64) += $(objtree)/arch/x86/boot/startup/lib.a | 54 | +KCOV_INSTRUMENT_startup.o := n |
50 | 55 | ||
51 | $(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE | 56 | # With some compiler versions the generated code results in boot hangs, caused |
52 | $(call if_changed,ld) | 57 | # by several compilation units. To be safe, disable all instrumentation. |
53 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile | 58 | diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c |
59 | index XXXXXXX..XXXXXXX 100644 | ||
60 | --- a/arch/x86/coco/sev/core.c | ||
61 | +++ b/arch/x86/coco/sev/core.c | ||
62 | @@ -XXX,XX +XXX,XX @@ static const char * const sev_status_feat_names[] = { | ||
63 | [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", | ||
64 | }; | ||
65 | |||
66 | -/* For early boot hypervisor communication in SEV-ES enabled guests */ | ||
67 | -struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | ||
68 | - | ||
69 | -/* | ||
70 | - * Needs to be in the .data section because we need it NULL before bss is | ||
71 | - * cleared | ||
72 | - */ | ||
73 | -struct ghcb *boot_ghcb __section(".data"); | ||
74 | - | ||
75 | -/* Bitmap of SEV features supported by the hypervisor */ | ||
76 | -u64 sev_hv_features __ro_after_init; | ||
77 | - | ||
78 | /* Secrets page physical address from the CC blob */ | ||
79 | static u64 secrets_pa __ro_after_init; | ||
80 | |||
81 | @@ -XXX,XX +XXX,XX @@ static u64 snp_tsc_scale __ro_after_init; | ||
82 | static u64 snp_tsc_offset __ro_after_init; | ||
83 | static u64 snp_tsc_freq_khz __ro_after_init; | ||
84 | |||
85 | - | ||
86 | -/* For early boot SVSM communication */ | ||
87 | -struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); | ||
88 | - | ||
89 | DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | ||
90 | DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); | ||
91 | -DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); | ||
92 | -DEFINE_PER_CPU(u64, svsm_caa_pa); | ||
93 | + | ||
94 | +/* | ||
95 | + * SVSM related information: | ||
96 | + * When running under an SVSM, the VMPL that Linux is executing at must be | ||
97 | + * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. | ||
98 | + */ | ||
99 | +u8 snp_vmpl __ro_after_init; | ||
100 | +EXPORT_SYMBOL_GPL(snp_vmpl); | ||
101 | |||
102 | static __always_inline bool on_vc_stack(struct pt_regs *regs) | ||
103 | { | ||
104 | @@ -XXX,XX +XXX,XX @@ static __always_inline bool on_vc_stack(struct pt_regs *regs) | ||
105 | return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); | ||
106 | } | ||
107 | |||
108 | + | ||
109 | /* | ||
110 | * This function handles the case when an NMI is raised in the #VC | ||
111 | * exception handler entry code, before the #VC handler has switched off | ||
112 | @@ -XXX,XX +XXX,XX @@ void noinstr __sev_es_ist_exit(void) | ||
113 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); | ||
114 | } | ||
115 | |||
116 | -/* | ||
117 | - * Nothing shall interrupt this code path while holding the per-CPU | ||
118 | - * GHCB. The backup GHCB is only for NMIs interrupting this path. | ||
119 | - * | ||
120 | - * Callers must disable local interrupts around it. | ||
121 | - */ | ||
122 | -noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | ||
123 | +static u64 __init get_snp_jump_table_addr(void) | ||
124 | { | ||
125 | - struct sev_es_runtime_data *data; | ||
126 | - struct ghcb *ghcb; | ||
127 | - | ||
128 | - WARN_ON(!irqs_disabled()); | ||
129 | - | ||
130 | - data = this_cpu_read(runtime_data); | ||
131 | - ghcb = &data->ghcb_page; | ||
132 | - | ||
133 | - if (unlikely(data->ghcb_active)) { | ||
134 | - /* GHCB is already in use - save its contents */ | ||
135 | - | ||
136 | - if (unlikely(data->backup_ghcb_active)) { | ||
137 | - /* | ||
138 | - * Backup-GHCB is also already in use. There is no way | ||
139 | - * to continue here so just kill the machine. To make | ||
140 | - * panic() work, mark GHCBs inactive so that messages | ||
141 | - * can be printed out. | ||
142 | - */ | ||
143 | - data->ghcb_active = false; | ||
144 | - data->backup_ghcb_active = false; | ||
145 | - | ||
146 | - instrumentation_begin(); | ||
147 | - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); | ||
148 | - instrumentation_end(); | ||
149 | - } | ||
150 | - | ||
151 | - /* Mark backup_ghcb active before writing to it */ | ||
152 | - data->backup_ghcb_active = true; | ||
153 | - | ||
154 | - state->ghcb = &data->backup_ghcb; | ||
155 | + struct snp_secrets_page *secrets; | ||
156 | + void __iomem *mem; | ||
157 | + u64 addr; | ||
158 | |||
159 | - /* Backup GHCB content */ | ||
160 | - *state->ghcb = *ghcb; | ||
161 | - } else { | ||
162 | - state->ghcb = NULL; | ||
163 | - data->ghcb_active = true; | ||
164 | + mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); | ||
165 | + if (!mem) { | ||
166 | + pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); | ||
167 | + return 0; | ||
168 | } | ||
169 | |||
170 | - return ghcb; | ||
171 | -} | ||
172 | + secrets = (__force struct snp_secrets_page *)mem; | ||
173 | |||
174 | -static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | ||
175 | - unsigned char *buffer) | ||
176 | -{ | ||
177 | - return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); | ||
178 | + addr = secrets->os_area.ap_jump_table_pa; | ||
179 | + iounmap(mem); | ||
180 | + | ||
181 | + return addr; | ||
182 | } | ||
183 | |||
184 | -static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) | ||
185 | +void noinstr __sev_es_nmi_complete(void) | ||
186 | { | ||
187 | - char buffer[MAX_INSN_SIZE]; | ||
188 | - int insn_bytes; | ||
189 | - | ||
190 | - insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); | ||
191 | - if (insn_bytes == 0) { | ||
192 | - /* Nothing could be copied */ | ||
193 | - ctxt->fi.vector = X86_TRAP_PF; | ||
194 | - ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; | ||
195 | - ctxt->fi.cr2 = ctxt->regs->ip; | ||
196 | - return ES_EXCEPTION; | ||
197 | - } else if (insn_bytes == -EINVAL) { | ||
198 | - /* Effective RIP could not be calculated */ | ||
199 | - ctxt->fi.vector = X86_TRAP_GP; | ||
200 | - ctxt->fi.error_code = 0; | ||
201 | - ctxt->fi.cr2 = 0; | ||
202 | - return ES_EXCEPTION; | ||
203 | - } | ||
204 | - | ||
205 | - if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) | ||
206 | - return ES_DECODE_FAILED; | ||
207 | + struct ghcb_state state; | ||
208 | + struct ghcb *ghcb; | ||
209 | |||
210 | - if (ctxt->insn.immediate.got) | ||
211 | - return ES_OK; | ||
212 | - else | ||
213 | - return ES_DECODE_FAILED; | ||
214 | -} | ||
215 | + ghcb = __sev_get_ghcb(&state); | ||
216 | |||
217 | -static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) | ||
218 | -{ | ||
219 | - char buffer[MAX_INSN_SIZE]; | ||
220 | - int res, ret; | ||
221 | + vc_ghcb_invalidate(ghcb); | ||
222 | + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); | ||
223 | + ghcb_set_sw_exit_info_1(ghcb, 0); | ||
224 | + ghcb_set_sw_exit_info_2(ghcb, 0); | ||
225 | |||
226 | - res = vc_fetch_insn_kernel(ctxt, buffer); | ||
227 | - if (res) { | ||
228 | - ctxt->fi.vector = X86_TRAP_PF; | ||
229 | - ctxt->fi.error_code = X86_PF_INSTR; | ||
230 | - ctxt->fi.cr2 = ctxt->regs->ip; | ||
231 | - return ES_EXCEPTION; | ||
232 | - } | ||
233 | + sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); | ||
234 | + VMGEXIT(); | ||
235 | |||
236 | - ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); | ||
237 | - if (ret < 0) | ||
238 | - return ES_DECODE_FAILED; | ||
239 | - else | ||
240 | - return ES_OK; | ||
241 | + __sev_put_ghcb(&state); | ||
242 | } | ||
243 | |||
244 | -static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) | ||
245 | +static u64 __init get_jump_table_addr(void) | ||
246 | { | ||
247 | - if (user_mode(ctxt->regs)) | ||
248 | - return __vc_decode_user_insn(ctxt); | ||
249 | - else | ||
250 | - return __vc_decode_kern_insn(ctxt); | ||
251 | -} | ||
252 | + struct ghcb_state state; | ||
253 | + unsigned long flags; | ||
254 | + struct ghcb *ghcb; | ||
255 | + u64 ret = 0; | ||
256 | |||
257 | -static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, | ||
258 | - char *dst, char *buf, size_t size) | ||
259 | -{ | ||
260 | - unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; | ||
261 | + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | ||
262 | + return get_snp_jump_table_addr(); | ||
263 | |||
264 | - /* | ||
265 | - * This function uses __put_user() independent of whether kernel or user | ||
266 | - * memory is accessed. This works fine because __put_user() does no | ||
267 | - * sanity checks of the pointer being accessed. All that it does is | ||
268 | - * to report when the access failed. | ||
269 | - * | ||
270 | - * Also, this function runs in atomic context, so __put_user() is not | ||
271 | - * allowed to sleep. The page-fault handler detects that it is running | ||
272 | - * in atomic context and will not try to take mmap_sem and handle the | ||
273 | - * fault, so additional pagefault_enable()/disable() calls are not | ||
274 | - * needed. | ||
275 | - * | ||
276 | - * The access can't be done via copy_to_user() here because | ||
277 | - * vc_write_mem() must not use string instructions to access unsafe | ||
278 | - * memory. The reason is that MOVS is emulated by the #VC handler by | ||
279 | - * splitting the move up into a read and a write and taking a nested #VC | ||
280 | - * exception on whatever of them is the MMIO access. Using string | ||
281 | - * instructions here would cause infinite nesting. | ||
282 | - */ | ||
283 | - switch (size) { | ||
284 | - case 1: { | ||
285 | - u8 d1; | ||
286 | - u8 __user *target = (u8 __user *)dst; | ||
287 | - | ||
288 | - memcpy(&d1, buf, 1); | ||
289 | - if (__put_user(d1, target)) | ||
290 | - goto fault; | ||
291 | - break; | ||
292 | - } | ||
293 | - case 2: { | ||
294 | - u16 d2; | ||
295 | - u16 __user *target = (u16 __user *)dst; | ||
296 | + local_irq_save(flags); | ||
297 | |||
298 | - memcpy(&d2, buf, 2); | ||
299 | - if (__put_user(d2, target)) | ||
300 | - goto fault; | ||
301 | - break; | ||
302 | - } | ||
303 | - case 4: { | ||
304 | - u32 d4; | ||
305 | - u32 __user *target = (u32 __user *)dst; | ||
306 | + ghcb = __sev_get_ghcb(&state); | ||
307 | |||
308 | - memcpy(&d4, buf, 4); | ||
309 | - if (__put_user(d4, target)) | ||
310 | - goto fault; | ||
311 | - break; | ||
312 | - } | ||
313 | - case 8: { | ||
314 | - u64 d8; | ||
315 | - u64 __user *target = (u64 __user *)dst; | ||
316 | + vc_ghcb_invalidate(ghcb); | ||
317 | + ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); | ||
318 | + ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); | ||
319 | + ghcb_set_sw_exit_info_2(ghcb, 0); | ||
320 | |||
321 | - memcpy(&d8, buf, 8); | ||
322 | - if (__put_user(d8, target)) | ||
323 | - goto fault; | ||
324 | - break; | ||
325 | - } | ||
326 | - default: | ||
327 | - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | ||
328 | - return ES_UNSUPPORTED; | ||
329 | - } | ||
330 | + sev_es_wr_ghcb_msr(__pa(ghcb)); | ||
331 | + VMGEXIT(); | ||
332 | |||
333 | - return ES_OK; | ||
334 | + if (ghcb_sw_exit_info_1_is_valid(ghcb) && | ||
335 | + ghcb_sw_exit_info_2_is_valid(ghcb)) | ||
336 | + ret = ghcb->save.sw_exit_info_2; | ||
337 | |||
338 | -fault: | ||
339 | - if (user_mode(ctxt->regs)) | ||
340 | - error_code |= X86_PF_USER; | ||
341 | + __sev_put_ghcb(&state); | ||
342 | |||
343 | - ctxt->fi.vector = X86_TRAP_PF; | ||
344 | - ctxt->fi.error_code = error_code; | ||
345 | - ctxt->fi.cr2 = (unsigned long)dst; | ||
346 | + local_irq_restore(flags); | ||
347 | |||
348 | - return ES_EXCEPTION; | ||
349 | + return ret; | ||
350 | } | ||
351 | |||
352 | -static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, | ||
353 | - char *src, char *buf, size_t size) | ||
354 | +static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size, | ||
355 | + int ret, u64 svsm_ret) | ||
356 | { | ||
357 | - unsigned long error_code = X86_PF_PROT; | ||
358 | - | ||
359 | - /* | ||
360 | - * This function uses __get_user() independent of whether kernel or user | ||
361 | - * memory is accessed. This works fine because __get_user() does no | ||
362 | - * sanity checks of the pointer being accessed. All that it does is | ||
363 | - * to report when the access failed. | ||
364 | - * | ||
365 | - * Also, this function runs in atomic context, so __get_user() is not | ||
366 | - * allowed to sleep. The page-fault handler detects that it is running | ||
367 | - * in atomic context and will not try to take mmap_sem and handle the | ||
368 | - * fault, so additional pagefault_enable()/disable() calls are not | ||
369 | - * needed. | ||
370 | - * | ||
371 | - * The access can't be done via copy_from_user() here because | ||
372 | - * vc_read_mem() must not use string instructions to access unsafe | ||
373 | - * memory. The reason is that MOVS is emulated by the #VC handler by | ||
374 | - * splitting the move up into a read and a write and taking a nested #VC | ||
375 | - * exception on whatever of them is the MMIO access. Using string | ||
376 | - * instructions here would cause infinite nesting. | ||
377 | - */ | ||
378 | - switch (size) { | ||
379 | - case 1: { | ||
380 | - u8 d1; | ||
381 | - u8 __user *s = (u8 __user *)src; | ||
382 | - | ||
383 | - if (__get_user(d1, s)) | ||
384 | - goto fault; | ||
385 | - memcpy(buf, &d1, 1); | ||
386 | - break; | ||
387 | - } | ||
388 | - case 2: { | ||
389 | - u16 d2; | ||
390 | - u16 __user *s = (u16 __user *)src; | ||
391 | - | ||
392 | - if (__get_user(d2, s)) | ||
393 | - goto fault; | ||
394 | - memcpy(buf, &d2, 2); | ||
395 | - break; | ||
396 | - } | ||
397 | - case 4: { | ||
398 | - u32 d4; | ||
399 | - u32 __user *s = (u32 __user *)src; | ||
400 | - | ||
401 | - if (__get_user(d4, s)) | ||
402 | - goto fault; | ||
403 | - memcpy(buf, &d4, 4); | ||
404 | - break; | ||
405 | - } | ||
406 | - case 8: { | ||
407 | - u64 d8; | ||
408 | - u64 __user *s = (u64 __user *)src; | ||
409 | - if (__get_user(d8, s)) | ||
410 | - goto fault; | ||
411 | - memcpy(buf, &d8, 8); | ||
412 | - break; | ||
413 | - } | ||
414 | - default: | ||
415 | - WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | ||
416 | - return ES_UNSUPPORTED; | ||
417 | - } | ||
418 | + WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n", | ||
419 | + pfn, action, page_size, ret, svsm_ret); | ||
420 | |||
421 | - return ES_OK; | ||
422 | + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); | ||
423 | +} | ||
424 | |||
425 | -fault: | ||
426 | - if (user_mode(ctxt->regs)) | ||
427 | - error_code |= X86_PF_USER; | ||
428 | +static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret) | ||
429 | +{ | ||
430 | + unsigned int page_size; | ||
431 | + bool action; | ||
432 | + u64 pfn; | ||
433 | |||
434 | - ctxt->fi.vector = X86_TRAP_PF; | ||
435 | - ctxt->fi.error_code = error_code; | ||
436 | - ctxt->fi.cr2 = (unsigned long)src; | ||
437 | + pfn = pc->entry[pc->cur_index].pfn; | ||
438 | + action = pc->entry[pc->cur_index].action; | ||
439 | + page_size = pc->entry[pc->cur_index].page_size; | ||
440 | |||
441 | - return ES_EXCEPTION; | ||
442 | + __pval_terminate(pfn, action, page_size, ret, svsm_ret); | ||
443 | } | ||
444 | |||
445 | -static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | ||
446 | - unsigned long vaddr, phys_addr_t *paddr) | ||
447 | +static void pval_pages(struct snp_psc_desc *desc) | ||
448 | { | ||
449 | - unsigned long va = (unsigned long)vaddr; | ||
450 | - unsigned int level; | ||
451 | - phys_addr_t pa; | ||
452 | - pgd_t *pgd; | ||
453 | - pte_t *pte; | ||
454 | - | ||
455 | - pgd = __va(read_cr3_pa()); | ||
456 | - pgd = &pgd[pgd_index(va)]; | ||
457 | - pte = lookup_address_in_pgd(pgd, va, &level); | ||
458 | - if (!pte) { | ||
459 | - ctxt->fi.vector = X86_TRAP_PF; | ||
460 | - ctxt->fi.cr2 = vaddr; | ||
461 | - ctxt->fi.error_code = 0; | ||
462 | - | ||
463 | - if (user_mode(ctxt->regs)) | ||
464 | - ctxt->fi.error_code |= X86_PF_USER; | ||
465 | + struct psc_entry *e; | ||
466 | + unsigned long vaddr; | ||
467 | + unsigned int size; | ||
468 | + unsigned int i; | ||
469 | + bool validate; | ||
470 | + u64 pfn; | ||
471 | + int rc; | ||
472 | |||
473 | - return ES_EXCEPTION; | ||
474 | - } | ||
475 | + for (i = 0; i <= desc->hdr.end_entry; i++) { | ||
476 | + e = &desc->entries[i]; | ||
477 | |||
478 | - if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) | ||
479 | - /* Emulated MMIO to/from encrypted memory not supported */ | ||
480 | - return ES_UNSUPPORTED; | ||
481 | + pfn = e->gfn; | ||
482 | + vaddr = (unsigned long)pfn_to_kaddr(pfn); | ||
483 | + size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; | ||
484 | + validate = e->operation == SNP_PAGE_STATE_PRIVATE; | ||
485 | |||
486 | - pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; | ||
487 | - pa |= va & ~page_level_mask(level); | ||
488 | + rc = pvalidate(vaddr, size, validate); | ||
489 | + if (!rc) | ||
490 | + continue; | ||
491 | |||
492 | - *paddr = pa; | ||
493 | + if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { | ||
494 | + unsigned long vaddr_end = vaddr + PMD_SIZE; | ||
495 | |||
496 | - return ES_OK; | ||
497 | + for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { | ||
498 | + rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); | ||
499 | + if (rc) | ||
500 | + __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); | ||
501 | + } | ||
502 | + } else { | ||
503 | + __pval_terminate(pfn, validate, size, rc, 0); | ||
504 | + } | ||
505 | + } | ||
506 | } | ||
507 | |||
508 | -static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) | ||
509 | +static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, | ||
510 | + struct svsm_pvalidate_call *pc) | ||
511 | { | ||
512 | - BUG_ON(size > 4); | ||
513 | + struct svsm_pvalidate_entry *pe; | ||
514 | |||
515 | - if (user_mode(ctxt->regs)) { | ||
516 | - struct thread_struct *t = ¤t->thread; | ||
517 | - struct io_bitmap *iobm = t->io_bitmap; | ||
518 | - size_t idx; | ||
519 | + /* Nothing in the CA yet */ | ||
520 | + pc->num_entries = 0; | ||
521 | + pc->cur_index = 0; | ||
522 | |||
523 | - if (!iobm) | ||
524 | - goto fault; | ||
525 | + pe = &pc->entry[0]; | ||
526 | |||
527 | - for (idx = port; idx < port + size; ++idx) { | ||
528 | - if (test_bit(idx, iobm->bitmap)) | ||
529 | - goto fault; | ||
530 | - } | ||
531 | - } | ||
532 | + while (pfn < pfn_end) { | ||
533 | + pe->page_size = RMP_PG_SIZE_4K; | ||
534 | + pe->action = action; | ||
535 | + pe->ignore_cf = 0; | ||
536 | + pe->pfn = pfn; | ||
537 | |||
538 | - return ES_OK; | ||
539 | + pe++; | ||
540 | + pfn++; | ||
541 | |||
542 | -fault: | ||
543 | - ctxt->fi.vector = X86_TRAP_GP; | ||
544 | - ctxt->fi.error_code = 0; | ||
545 | + pc->num_entries++; | ||
546 | + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) | ||
547 | + break; | ||
548 | + } | ||
549 | |||
550 | - return ES_EXCEPTION; | ||
551 | + return pfn; | ||
552 | } | ||
553 | |||
554 | -static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) | ||
555 | +static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry, | ||
556 | + struct svsm_pvalidate_call *pc) | ||
557 | { | ||
558 | - long error_code = ctxt->fi.error_code; | ||
559 | - int trapnr = ctxt->fi.vector; | ||
560 | - | ||
561 | - ctxt->regs->orig_ax = ctxt->fi.error_code; | ||
562 | - | ||
563 | - switch (trapnr) { | ||
564 | - case X86_TRAP_GP: | ||
565 | - exc_general_protection(ctxt->regs, error_code); | ||
566 | - break; | ||
567 | - case X86_TRAP_UD: | ||
568 | - exc_invalid_op(ctxt->regs); | ||
569 | - break; | ||
570 | - case X86_TRAP_PF: | ||
571 | - write_cr2(ctxt->fi.cr2); | ||
572 | - exc_page_fault(ctxt->regs, error_code); | ||
573 | - break; | ||
574 | - case X86_TRAP_AC: | ||
575 | - exc_alignment_check(ctxt->regs, error_code); | ||
576 | - break; | ||
577 | - default: | ||
578 | - pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); | ||
579 | - BUG(); | ||
580 | - } | ||
581 | -} | ||
582 | + struct svsm_pvalidate_entry *pe; | ||
583 | + struct psc_entry *e; | ||
584 | |||
585 | -/* Include code shared with pre-decompression boot stage */ | ||
586 | -#include "shared.c" | ||
587 | + /* Nothing in the CA yet */ | ||
588 | + pc->num_entries = 0; | ||
589 | + pc->cur_index = 0; | ||
590 | |||
591 | -noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
592 | -{ | ||
593 | - struct sev_es_runtime_data *data; | ||
594 | - struct ghcb *ghcb; | ||
595 | + pe = &pc->entry[0]; | ||
596 | + e = &desc->entries[desc_entry]; | ||
597 | |||
598 | - WARN_ON(!irqs_disabled()); | ||
599 | + while (desc_entry <= desc->hdr.end_entry) { | ||
600 | + pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; | ||
601 | + pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; | ||
602 | + pe->ignore_cf = 0; | ||
603 | + pe->pfn = e->gfn; | ||
604 | |||
605 | - data = this_cpu_read(runtime_data); | ||
606 | - ghcb = &data->ghcb_page; | ||
607 | + pe++; | ||
608 | + e++; | ||
609 | |||
610 | - if (state->ghcb) { | ||
611 | - /* Restore GHCB from Backup */ | ||
612 | - *ghcb = *state->ghcb; | ||
613 | - data->backup_ghcb_active = false; | ||
614 | - state->ghcb = NULL; | ||
615 | - } else { | ||
616 | - /* | ||
617 | - * Invalidate the GHCB so a VMGEXIT instruction issued | ||
618 | - * from userspace won't appear to be valid. | ||
619 | - */ | ||
620 | - vc_ghcb_invalidate(ghcb); | ||
621 | - data->ghcb_active = false; | ||
622 | + desc_entry++; | ||
623 | + pc->num_entries++; | ||
624 | + if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) | ||
625 | + break; | ||
626 | } | ||
627 | + | ||
628 | + return desc_entry; | ||
629 | } | ||
630 | |||
631 | -int svsm_perform_call_protocol(struct svsm_call *call) | ||
632 | +static void svsm_pval_pages(struct snp_psc_desc *desc) | ||
633 | { | ||
634 | - struct ghcb_state state; | ||
635 | + struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY]; | ||
636 | + unsigned int i, pv_4k_count = 0; | ||
637 | + struct svsm_pvalidate_call *pc; | ||
638 | + struct svsm_call call = {}; | ||
639 | unsigned long flags; | ||
640 | - struct ghcb *ghcb; | ||
641 | + bool action; | ||
642 | + u64 pc_pa; | ||
643 | int ret; | ||
644 | |||
645 | /* | ||
646 | @@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call) | ||
647 | flags = native_local_irq_save(); | ||
648 | |||
649 | /* | ||
650 | - * Use rip-relative references when called early in the boot. If | ||
651 | - * ghcbs_initialized is set, then it is late in the boot and no need | ||
652 | - * to worry about rip-relative references in called functions. | ||
653 | + * The SVSM calling area (CA) can support processing 510 entries at a | ||
654 | + * time. Loop through the Page State Change descriptor until the CA is | ||
655 | + * full or the last entry in the descriptor is reached, at which time | ||
656 | + * the SVSM is invoked. This repeats until all entries in the descriptor | ||
657 | + * are processed. | ||
658 | */ | ||
659 | - if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
660 | - ghcb = __sev_get_ghcb(&state); | ||
661 | - else if (RIP_REL_REF(boot_ghcb)) | ||
662 | - ghcb = RIP_REL_REF(boot_ghcb); | ||
663 | - else | ||
664 | - ghcb = NULL; | ||
665 | + call.caa = svsm_get_caa(); | ||
666 | |||
667 | - do { | ||
668 | - ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) | ||
669 | - : svsm_perform_msr_protocol(call); | ||
670 | - } while (ret == -EAGAIN); | ||
671 | + pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; | ||
672 | + pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); | ||
673 | |||
674 | - if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
675 | - __sev_put_ghcb(&state); | ||
676 | + /* Protocol 0, Call ID 1 */ | ||
677 | + call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); | ||
678 | + call.rcx = pc_pa; | ||
679 | |||
680 | - native_local_irq_restore(flags); | ||
681 | + for (i = 0; i <= desc->hdr.end_entry;) { | ||
682 | + i = svsm_build_ca_from_psc_desc(desc, i, pc); | ||
683 | |||
684 | - return ret; | ||
685 | -} | ||
686 | + do { | ||
687 | + ret = svsm_perform_call_protocol(&call); | ||
688 | + if (!ret) | ||
689 | + continue; | ||
690 | |||
691 | -void noinstr __sev_es_nmi_complete(void) | ||
692 | -{ | ||
693 | - struct ghcb_state state; | ||
694 | - struct ghcb *ghcb; | ||
695 | + /* | ||
696 | + * Check if the entry failed because of an RMP mismatch (a | ||
697 | + * PVALIDATE at 2M was requested, but the page is mapped in | ||
698 | + * the RMP as 4K). | ||
699 | + */ | ||
700 | |||
701 | - ghcb = __sev_get_ghcb(&state); | ||
702 | + if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH && | ||
703 | + pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) { | ||
704 | + /* Save this entry for post-processing at 4K */ | ||
705 | + pv_4k[pv_4k_count++] = pc->entry[pc->cur_index]; | ||
706 | + | ||
707 | + /* Skip to the next one unless at the end of the list */ | ||
708 | + pc->cur_index++; | ||
709 | + if (pc->cur_index < pc->num_entries) | ||
710 | + ret = -EAGAIN; | ||
711 | + else | ||
712 | + ret = 0; | ||
713 | + } | ||
714 | + } while (ret == -EAGAIN); | ||
715 | |||
716 | - vc_ghcb_invalidate(ghcb); | ||
717 | - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); | ||
718 | - ghcb_set_sw_exit_info_1(ghcb, 0); | ||
719 | - ghcb_set_sw_exit_info_2(ghcb, 0); | ||
720 | + if (ret) | ||
721 | + svsm_pval_terminate(pc, ret, call.rax_out); | ||
722 | + } | ||
723 | |||
724 | - sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); | ||
725 | - VMGEXIT(); | ||
726 | + /* Process any entries that failed to be validated at 2M and validate them at 4K */ | ||
727 | + for (i = 0; i < pv_4k_count; i++) { | ||
728 | + u64 pfn, pfn_end; | ||
729 | |||
730 | - __sev_put_ghcb(&state); | ||
731 | -} | ||
732 | + action = pv_4k[i].action; | ||
733 | + pfn = pv_4k[i].pfn; | ||
734 | + pfn_end = pfn + 512; | ||
735 | |||
736 | -static u64 __init get_snp_jump_table_addr(void) | ||
737 | -{ | ||
738 | - struct snp_secrets_page *secrets; | ||
739 | - void __iomem *mem; | ||
740 | - u64 addr; | ||
741 | + while (pfn < pfn_end) { | ||
742 | + pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc); | ||
743 | |||
744 | - mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); | ||
745 | - if (!mem) { | ||
746 | - pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); | ||
747 | - return 0; | ||
748 | + ret = svsm_perform_call_protocol(&call); | ||
749 | + if (ret) | ||
750 | + svsm_pval_terminate(pc, ret, call.rax_out); | ||
751 | + } | ||
752 | } | ||
753 | |||
754 | - secrets = (__force struct snp_secrets_page *)mem; | ||
755 | - | ||
756 | - addr = secrets->os_area.ap_jump_table_pa; | ||
757 | - iounmap(mem); | ||
758 | - | ||
759 | - return addr; | ||
760 | + native_local_irq_restore(flags); | ||
761 | } | ||
762 | |||
763 | -static u64 __init get_jump_table_addr(void) | ||
764 | +static void pvalidate_pages(struct snp_psc_desc *desc) | ||
765 | { | ||
766 | - struct ghcb_state state; | ||
767 | - unsigned long flags; | ||
768 | - struct ghcb *ghcb; | ||
769 | - u64 ret = 0; | ||
770 | - | ||
771 | - if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | ||
772 | - return get_snp_jump_table_addr(); | ||
773 | - | ||
774 | - local_irq_save(flags); | ||
775 | + if (snp_vmpl) | ||
776 | + svsm_pval_pages(desc); | ||
777 | + else | ||
778 | + pval_pages(desc); | ||
779 | +} | ||
780 | |||
781 | - ghcb = __sev_get_ghcb(&state); | ||
782 | +static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) | ||
783 | +{ | ||
784 | + int cur_entry, end_entry, ret = 0; | ||
785 | + struct snp_psc_desc *data; | ||
786 | + struct es_em_ctxt ctxt; | ||
787 | |||
788 | vc_ghcb_invalidate(ghcb); | ||
789 | - ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); | ||
790 | - ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); | ||
791 | - ghcb_set_sw_exit_info_2(ghcb, 0); | ||
792 | - | ||
793 | - sev_es_wr_ghcb_msr(__pa(ghcb)); | ||
794 | - VMGEXIT(); | ||
795 | - | ||
796 | - if (ghcb_sw_exit_info_1_is_valid(ghcb) && | ||
797 | - ghcb_sw_exit_info_2_is_valid(ghcb)) | ||
798 | - ret = ghcb->save.sw_exit_info_2; | ||
799 | - | ||
800 | - __sev_put_ghcb(&state); | ||
801 | |||
802 | - local_irq_restore(flags); | ||
803 | - | ||
804 | - return ret; | ||
805 | -} | ||
806 | + /* Copy the input desc into GHCB shared buffer */ | ||
807 | + data = (struct snp_psc_desc *)ghcb->shared_buffer; | ||
808 | + memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); | ||
809 | |||
810 | -void __head | ||
811 | -early_set_pages_state(unsigned long vaddr, unsigned long paddr, | ||
812 | - unsigned long npages, enum psc_op op) | ||
813 | -{ | ||
814 | - unsigned long paddr_end; | ||
815 | - u64 val; | ||
816 | - | ||
817 | - vaddr = vaddr & PAGE_MASK; | ||
818 | + /* | ||
819 | + * As per the GHCB specification, the hypervisor can resume the guest | ||
820 | + * before processing all the entries. Check whether all the entries | ||
821 | + * are processed. If not, then keep retrying. Note, the hypervisor | ||
822 | + * will update the data memory directly to indicate the status, so | ||
823 | + * reference the data->hdr everywhere. | ||
824 | + * | ||
825 | + * The strategy here is to wait for the hypervisor to change the page | ||
826 | + * state in the RMP table before guest accesses the memory pages. If the | ||
827 | + * page state change was not successful, then later memory access will | ||
828 | + * result in a crash. | ||
829 | + */ | ||
830 | + cur_entry = data->hdr.cur_entry; | ||
831 | + end_entry = data->hdr.end_entry; | ||
832 | |||
833 | - paddr = paddr & PAGE_MASK; | ||
834 | - paddr_end = paddr + (npages << PAGE_SHIFT); | ||
835 | + while (data->hdr.cur_entry <= data->hdr.end_entry) { | ||
836 | + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); | ||
837 | |||
838 | - while (paddr < paddr_end) { | ||
839 | - /* Page validation must be rescinded before changing to shared */ | ||
840 | - if (op == SNP_PAGE_STATE_SHARED) | ||
841 | - pvalidate_4k_page(vaddr, paddr, false); | ||
842 | + /* This will advance the shared buffer data points to. */ | ||
843 | + ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); | ||
844 | |||
845 | /* | ||
846 | - * Use the MSR protocol because this function can be called before | ||
847 | - * the GHCB is established. | ||
848 | + * Page State Change VMGEXIT can pass error code through | ||
849 | + * exit_info_2. | ||
850 | */ | ||
851 | - sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); | ||
852 | - VMGEXIT(); | ||
853 | - | ||
854 | - val = sev_es_rd_ghcb_msr(); | ||
855 | - | ||
856 | - if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) | ||
857 | - goto e_term; | ||
858 | - | ||
859 | - if (GHCB_MSR_PSC_RESP_VAL(val)) | ||
860 | - goto e_term; | ||
861 | + if (WARN(ret || ghcb->save.sw_exit_info_2, | ||
862 | + "SNP: PSC failed ret=%d exit_info_2=%llx\n", | ||
863 | + ret, ghcb->save.sw_exit_info_2)) { | ||
864 | + ret = 1; | ||
865 | + goto out; | ||
866 | + } | ||
867 | |||
868 | - /* Page validation must be performed after changing to private */ | ||
869 | - if (op == SNP_PAGE_STATE_PRIVATE) | ||
870 | - pvalidate_4k_page(vaddr, paddr, true); | ||
871 | + /* Verify that reserved bit is not set */ | ||
872 | + if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { | ||
873 | + ret = 1; | ||
874 | + goto out; | ||
875 | + } | ||
876 | |||
877 | - vaddr += PAGE_SIZE; | ||
878 | - paddr += PAGE_SIZE; | ||
879 | + /* | ||
880 | + * Sanity check that entry processing is not going backwards. | ||
881 | + * This will happen only if hypervisor is tricking us. | ||
882 | + */ | ||
883 | + if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, | ||
884 | +"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", | ||
885 | + end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { | ||
886 | + ret = 1; | ||
887 | + goto out; | ||
888 | + } | ||
889 | } | ||
890 | |||
891 | - return; | ||
892 | - | ||
893 | -e_term: | ||
894 | - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); | ||
895 | +out: | ||
896 | + return ret; | ||
897 | } | ||
898 | |||
899 | -void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, | ||
900 | - unsigned long npages) | ||
901 | -{ | ||
902 | - /* | ||
903 | - * This can be invoked in early boot while running identity mapped, so | ||
904 | - * use an open coded check for SNP instead of using cc_platform_has(). | ||
905 | - * This eliminates worries about jump tables or checking boot_cpu_data | ||
906 | - * in the cc_platform_has() function. | ||
907 | - */ | ||
908 | - if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
909 | - return; | ||
910 | - | ||
911 | - /* | ||
912 | - * Ask the hypervisor to mark the memory pages as private in the RMP | ||
913 | - * table. | ||
914 | - */ | ||
915 | - early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); | ||
916 | -} | ||
917 | - | ||
918 | -void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, | ||
919 | - unsigned long npages) | ||
920 | -{ | ||
921 | - /* | ||
922 | - * This can be invoked in early boot while running identity mapped, so | ||
923 | - * use an open coded check for SNP instead of using cc_platform_has(). | ||
924 | - * This eliminates worries about jump tables or checking boot_cpu_data | ||
925 | - * in the cc_platform_has() function. | ||
926 | - */ | ||
927 | - if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
928 | - return; | ||
929 | - | ||
930 | - /* Ask hypervisor to mark the memory pages shared in the RMP table. */ | ||
931 | - early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); | ||
932 | -} | ||
933 | - | ||
934 | -static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, | ||
935 | - unsigned long vaddr_end, int op) | ||
936 | +static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, | ||
937 | + unsigned long vaddr_end, int op) | ||
938 | { | ||
939 | struct ghcb_state state; | ||
940 | bool use_large_entry; | ||
941 | @@ -XXX,XX +XXX,XX @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd) | ||
942 | return 0; | ||
943 | } | ||
944 | |||
945 | -/* Writes to the SVSM CAA MSR are ignored */ | ||
946 | -static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) | ||
947 | -{ | ||
948 | - if (write) | ||
949 | - return ES_OK; | ||
950 | - | ||
951 | - regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); | ||
952 | - regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); | ||
953 | - | ||
954 | - return ES_OK; | ||
955 | -} | ||
956 | - | ||
957 | -/* | ||
958 | - * TSC related accesses should not exit to the hypervisor when a guest is | ||
959 | - * executing with Secure TSC enabled, so special handling is required for | ||
960 | - * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. | ||
961 | - */ | ||
962 | -static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) | ||
963 | -{ | ||
964 | - u64 tsc; | ||
965 | - | ||
966 | - /* | ||
967 | - * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. | ||
968 | - * Terminate the SNP guest when the interception is enabled. | ||
969 | - */ | ||
970 | - if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) | ||
971 | - return ES_VMM_ERROR; | ||
972 | - | ||
973 | - /* | ||
974 | - * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC | ||
975 | - * to return undefined values, so ignore all writes. | ||
976 | - * | ||
977 | - * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use | ||
978 | - * the value returned by rdtsc_ordered(). | ||
979 | - */ | ||
980 | - if (write) { | ||
981 | - WARN_ONCE(1, "TSC MSR writes are verboten!\n"); | ||
982 | - return ES_OK; | ||
983 | - } | ||
984 | - | ||
985 | - tsc = rdtsc_ordered(); | ||
986 | - regs->ax = lower_32_bits(tsc); | ||
987 | - regs->dx = upper_32_bits(tsc); | ||
988 | - | ||
989 | - return ES_OK; | ||
990 | -} | ||
991 | - | ||
992 | -static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
993 | -{ | ||
994 | - struct pt_regs *regs = ctxt->regs; | ||
995 | - enum es_result ret; | ||
996 | - bool write; | ||
997 | - | ||
998 | - /* Is it a WRMSR? */ | ||
999 | - write = ctxt->insn.opcode.bytes[1] == 0x30; | ||
1000 | - | ||
1001 | - switch (regs->cx) { | ||
1002 | - case MSR_SVSM_CAA: | ||
1003 | - return __vc_handle_msr_caa(regs, write); | ||
1004 | - case MSR_IA32_TSC: | ||
1005 | - case MSR_AMD64_GUEST_TSC_FREQ: | ||
1006 | - if (sev_status & MSR_AMD64_SNP_SECURE_TSC) | ||
1007 | - return __vc_handle_secure_tsc_msrs(regs, write); | ||
1008 | - break; | ||
1009 | - default: | ||
1010 | - break; | ||
1011 | - } | ||
1012 | - | ||
1013 | - ghcb_set_rcx(ghcb, regs->cx); | ||
1014 | - if (write) { | ||
1015 | - ghcb_set_rax(ghcb, regs->ax); | ||
1016 | - ghcb_set_rdx(ghcb, regs->dx); | ||
1017 | - } | ||
1018 | - | ||
1019 | - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); | ||
1020 | - | ||
1021 | - if ((ret == ES_OK) && !write) { | ||
1022 | - regs->ax = ghcb->save.rax; | ||
1023 | - regs->dx = ghcb->save.rdx; | ||
1024 | - } | ||
1025 | - | ||
1026 | - return ret; | ||
1027 | -} | ||
1028 | - | ||
1029 | static void snp_register_per_cpu_ghcb(void) | ||
1030 | { | ||
1031 | struct sev_es_runtime_data *data; | ||
1032 | @@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void) | ||
1033 | initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; | ||
1034 | } | ||
1035 | |||
1036 | -static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) | ||
1037 | -{ | ||
1038 | - int trapnr = ctxt->fi.vector; | ||
1039 | - | ||
1040 | - if (trapnr == X86_TRAP_PF) | ||
1041 | - native_write_cr2(ctxt->fi.cr2); | ||
1042 | - | ||
1043 | - ctxt->regs->orig_ax = ctxt->fi.error_code; | ||
1044 | - do_early_exception(ctxt->regs, trapnr); | ||
1045 | -} | ||
1046 | - | ||
1047 | -static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) | ||
1048 | -{ | ||
1049 | - long *reg_array; | ||
1050 | - int offset; | ||
1051 | - | ||
1052 | - reg_array = (long *)ctxt->regs; | ||
1053 | - offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); | ||
1054 | - | ||
1055 | - if (offset < 0) | ||
1056 | - return NULL; | ||
1057 | - | ||
1058 | - offset /= sizeof(long); | ||
1059 | - | ||
1060 | - return reg_array + offset; | ||
1061 | -} | ||
1062 | -static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | ||
1063 | - unsigned int bytes, bool read) | ||
1064 | -{ | ||
1065 | - u64 exit_code, exit_info_1, exit_info_2; | ||
1066 | - unsigned long ghcb_pa = __pa(ghcb); | ||
1067 | - enum es_result res; | ||
1068 | - phys_addr_t paddr; | ||
1069 | - void __user *ref; | ||
1070 | - | ||
1071 | - ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); | ||
1072 | - if (ref == (void __user *)-1L) | ||
1073 | - return ES_UNSUPPORTED; | ||
1074 | - | ||
1075 | - exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; | ||
1076 | - | ||
1077 | - res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); | ||
1078 | - if (res != ES_OK) { | ||
1079 | - if (res == ES_EXCEPTION && !read) | ||
1080 | - ctxt->fi.error_code |= X86_PF_WRITE; | ||
1081 | - | ||
1082 | - return res; | ||
1083 | - } | ||
1084 | - | ||
1085 | - exit_info_1 = paddr; | ||
1086 | - /* Can never be greater than 8 */ | ||
1087 | - exit_info_2 = bytes; | ||
1088 | - | ||
1089 | - ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); | ||
1090 | - | ||
1091 | - return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); | ||
1092 | -} | ||
1093 | - | ||
1094 | -/* | ||
1095 | - * The MOVS instruction has two memory operands, which raises the | ||
1096 | - * problem that it is not known whether the access to the source or the | ||
1097 | - * destination caused the #VC exception (and hence whether an MMIO read | ||
1098 | - * or write operation needs to be emulated). | ||
1099 | - * | ||
1100 | - * Instead of playing games with walking page-tables and trying to guess | ||
1101 | - * whether the source or destination is an MMIO range, split the move | ||
1102 | - * into two operations, a read and a write with only one memory operand. | ||
1103 | - * This will cause a nested #VC exception on the MMIO address which can | ||
1104 | - * then be handled. | ||
1105 | - * | ||
1106 | - * This implementation has the benefit that it also supports MOVS where | ||
1107 | - * source _and_ destination are MMIO regions. | ||
1108 | - * | ||
1109 | - * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a | ||
1110 | - * rare operation. If it turns out to be a performance problem the split | ||
1111 | - * operations can be moved to memcpy_fromio() and memcpy_toio(). | ||
1112 | - */ | ||
1113 | -static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, | ||
1114 | - unsigned int bytes) | ||
1115 | -{ | ||
1116 | - unsigned long ds_base, es_base; | ||
1117 | - unsigned char *src, *dst; | ||
1118 | - unsigned char buffer[8]; | ||
1119 | - enum es_result ret; | ||
1120 | - bool rep; | ||
1121 | - int off; | ||
1122 | - | ||
1123 | - ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); | ||
1124 | - es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); | ||
1125 | - | ||
1126 | - if (ds_base == -1L || es_base == -1L) { | ||
1127 | - ctxt->fi.vector = X86_TRAP_GP; | ||
1128 | - ctxt->fi.error_code = 0; | ||
1129 | - return ES_EXCEPTION; | ||
1130 | - } | ||
1131 | - | ||
1132 | - src = ds_base + (unsigned char *)ctxt->regs->si; | ||
1133 | - dst = es_base + (unsigned char *)ctxt->regs->di; | ||
1134 | - | ||
1135 | - ret = vc_read_mem(ctxt, src, buffer, bytes); | ||
1136 | - if (ret != ES_OK) | ||
1137 | - return ret; | ||
1138 | - | ||
1139 | - ret = vc_write_mem(ctxt, dst, buffer, bytes); | ||
1140 | - if (ret != ES_OK) | ||
1141 | - return ret; | ||
1142 | - | ||
1143 | - if (ctxt->regs->flags & X86_EFLAGS_DF) | ||
1144 | - off = -bytes; | ||
1145 | - else | ||
1146 | - off = bytes; | ||
1147 | - | ||
1148 | - ctxt->regs->si += off; | ||
1149 | - ctxt->regs->di += off; | ||
1150 | - | ||
1151 | - rep = insn_has_rep_prefix(&ctxt->insn); | ||
1152 | - if (rep) | ||
1153 | - ctxt->regs->cx -= 1; | ||
1154 | - | ||
1155 | - if (!rep || ctxt->regs->cx == 0) | ||
1156 | - return ES_OK; | ||
1157 | - else | ||
1158 | - return ES_RETRY; | ||
1159 | -} | ||
1160 | - | ||
1161 | -static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
1162 | -{ | ||
1163 | - struct insn *insn = &ctxt->insn; | ||
1164 | - enum insn_mmio_type mmio; | ||
1165 | - unsigned int bytes = 0; | ||
1166 | - enum es_result ret; | ||
1167 | - u8 sign_byte; | ||
1168 | - long *reg_data; | ||
1169 | - | ||
1170 | - mmio = insn_decode_mmio(insn, &bytes); | ||
1171 | - if (mmio == INSN_MMIO_DECODE_FAILED) | ||
1172 | - return ES_DECODE_FAILED; | ||
1173 | - | ||
1174 | - if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { | ||
1175 | - reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); | ||
1176 | - if (!reg_data) | ||
1177 | - return ES_DECODE_FAILED; | ||
1178 | - } | ||
1179 | - | ||
1180 | - if (user_mode(ctxt->regs)) | ||
1181 | - return ES_UNSUPPORTED; | ||
1182 | - | ||
1183 | - switch (mmio) { | ||
1184 | - case INSN_MMIO_WRITE: | ||
1185 | - memcpy(ghcb->shared_buffer, reg_data, bytes); | ||
1186 | - ret = vc_do_mmio(ghcb, ctxt, bytes, false); | ||
1187 | - break; | ||
1188 | - case INSN_MMIO_WRITE_IMM: | ||
1189 | - memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); | ||
1190 | - ret = vc_do_mmio(ghcb, ctxt, bytes, false); | ||
1191 | - break; | ||
1192 | - case INSN_MMIO_READ: | ||
1193 | - ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
1194 | - if (ret) | ||
1195 | - break; | ||
1196 | - | ||
1197 | - /* Zero-extend for 32-bit operation */ | ||
1198 | - if (bytes == 4) | ||
1199 | - *reg_data = 0; | ||
1200 | - | ||
1201 | - memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
1202 | - break; | ||
1203 | - case INSN_MMIO_READ_ZERO_EXTEND: | ||
1204 | - ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
1205 | - if (ret) | ||
1206 | - break; | ||
1207 | - | ||
1208 | - /* Zero extend based on operand size */ | ||
1209 | - memset(reg_data, 0, insn->opnd_bytes); | ||
1210 | - memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
1211 | - break; | ||
1212 | - case INSN_MMIO_READ_SIGN_EXTEND: | ||
1213 | - ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
1214 | - if (ret) | ||
1215 | - break; | ||
1216 | - | ||
1217 | - if (bytes == 1) { | ||
1218 | - u8 *val = (u8 *)ghcb->shared_buffer; | ||
1219 | - | ||
1220 | - sign_byte = (*val & 0x80) ? 0xff : 0x00; | ||
1221 | - } else { | ||
1222 | - u16 *val = (u16 *)ghcb->shared_buffer; | ||
1223 | - | ||
1224 | - sign_byte = (*val & 0x8000) ? 0xff : 0x00; | ||
1225 | - } | ||
1226 | - | ||
1227 | - /* Sign extend based on operand size */ | ||
1228 | - memset(reg_data, sign_byte, insn->opnd_bytes); | ||
1229 | - memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
1230 | - break; | ||
1231 | - case INSN_MMIO_MOVS: | ||
1232 | - ret = vc_handle_mmio_movs(ctxt, bytes); | ||
1233 | - break; | ||
1234 | - default: | ||
1235 | - ret = ES_UNSUPPORTED; | ||
1236 | - break; | ||
1237 | - } | ||
1238 | - | ||
1239 | - return ret; | ||
1240 | -} | ||
1241 | - | ||
1242 | -static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, | ||
1243 | - struct es_em_ctxt *ctxt) | ||
1244 | -{ | ||
1245 | - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | ||
1246 | - long val, *reg = vc_insn_get_rm(ctxt); | ||
1247 | - enum es_result ret; | ||
1248 | - | ||
1249 | - if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) | ||
1250 | - return ES_VMM_ERROR; | ||
1251 | - | ||
1252 | - if (!reg) | ||
1253 | - return ES_DECODE_FAILED; | ||
1254 | - | ||
1255 | - val = *reg; | ||
1256 | - | ||
1257 | - /* Upper 32 bits must be written as zeroes */ | ||
1258 | - if (val >> 32) { | ||
1259 | - ctxt->fi.vector = X86_TRAP_GP; | ||
1260 | - ctxt->fi.error_code = 0; | ||
1261 | - return ES_EXCEPTION; | ||
1262 | - } | ||
1263 | - | ||
1264 | - /* Clear out other reserved bits and set bit 10 */ | ||
1265 | - val = (val & 0xffff23ffL) | BIT(10); | ||
1266 | - | ||
1267 | - /* Early non-zero writes to DR7 are not supported */ | ||
1268 | - if (!data && (val & ~DR7_RESET_VALUE)) | ||
1269 | - return ES_UNSUPPORTED; | ||
1270 | - | ||
1271 | - /* Using a value of 0 for ExitInfo1 means RAX holds the value */ | ||
1272 | - ghcb_set_rax(ghcb, val); | ||
1273 | - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); | ||
1274 | - if (ret != ES_OK) | ||
1275 | - return ret; | ||
1276 | - | ||
1277 | - if (data) | ||
1278 | - data->dr7 = val; | ||
1279 | - | ||
1280 | - return ES_OK; | ||
1281 | -} | ||
1282 | - | ||
1283 | -static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, | ||
1284 | - struct es_em_ctxt *ctxt) | ||
1285 | -{ | ||
1286 | - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | ||
1287 | - long *reg = vc_insn_get_rm(ctxt); | ||
1288 | - | ||
1289 | - if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) | ||
1290 | - return ES_VMM_ERROR; | ||
1291 | - | ||
1292 | - if (!reg) | ||
1293 | - return ES_DECODE_FAILED; | ||
1294 | - | ||
1295 | - if (data) | ||
1296 | - *reg = data->dr7; | ||
1297 | - else | ||
1298 | - *reg = DR7_RESET_VALUE; | ||
1299 | - | ||
1300 | - return ES_OK; | ||
1301 | -} | ||
1302 | - | ||
1303 | -static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, | ||
1304 | - struct es_em_ctxt *ctxt) | ||
1305 | -{ | ||
1306 | - return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); | ||
1307 | -} | ||
1308 | - | ||
1309 | -static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
1310 | -{ | ||
1311 | - enum es_result ret; | ||
1312 | - | ||
1313 | - ghcb_set_rcx(ghcb, ctxt->regs->cx); | ||
1314 | - | ||
1315 | - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); | ||
1316 | - if (ret != ES_OK) | ||
1317 | - return ret; | ||
1318 | - | ||
1319 | - if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) | ||
1320 | - return ES_VMM_ERROR; | ||
1321 | - | ||
1322 | - ctxt->regs->ax = ghcb->save.rax; | ||
1323 | - ctxt->regs->dx = ghcb->save.rdx; | ||
1324 | - | ||
1325 | - return ES_OK; | ||
1326 | -} | ||
1327 | - | ||
1328 | -static enum es_result vc_handle_monitor(struct ghcb *ghcb, | ||
1329 | - struct es_em_ctxt *ctxt) | ||
1330 | -{ | ||
1331 | - /* | ||
1332 | - * Treat it as a NOP and do not leak a physical address to the | ||
1333 | - * hypervisor. | ||
1334 | - */ | ||
1335 | - return ES_OK; | ||
1336 | -} | ||
1337 | - | ||
1338 | -static enum es_result vc_handle_mwait(struct ghcb *ghcb, | ||
1339 | - struct es_em_ctxt *ctxt) | ||
1340 | -{ | ||
1341 | - /* Treat the same as MONITOR/MONITORX */ | ||
1342 | - return ES_OK; | ||
1343 | -} | ||
1344 | - | ||
1345 | -static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, | ||
1346 | - struct es_em_ctxt *ctxt) | ||
1347 | -{ | ||
1348 | - enum es_result ret; | ||
1349 | - | ||
1350 | - ghcb_set_rax(ghcb, ctxt->regs->ax); | ||
1351 | - ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); | ||
1352 | - | ||
1353 | - if (x86_platform.hyper.sev_es_hcall_prepare) | ||
1354 | - x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); | ||
1355 | - | ||
1356 | - ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); | ||
1357 | - if (ret != ES_OK) | ||
1358 | - return ret; | ||
1359 | - | ||
1360 | - if (!ghcb_rax_is_valid(ghcb)) | ||
1361 | - return ES_VMM_ERROR; | ||
1362 | - | ||
1363 | - ctxt->regs->ax = ghcb->save.rax; | ||
1364 | - | ||
1365 | - /* | ||
1366 | - * Call sev_es_hcall_finish() after regs->ax is already set. | ||
1367 | - * This allows the hypervisor handler to overwrite it again if | ||
1368 | - * necessary. | ||
1369 | - */ | ||
1370 | - if (x86_platform.hyper.sev_es_hcall_finish && | ||
1371 | - !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) | ||
1372 | - return ES_VMM_ERROR; | ||
1373 | - | ||
1374 | - return ES_OK; | ||
1375 | -} | ||
1376 | - | ||
1377 | -static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, | ||
1378 | - struct es_em_ctxt *ctxt) | ||
1379 | -{ | ||
1380 | - /* | ||
1381 | - * Calling ecx_alignment_check() directly does not work, because it | ||
1382 | - * enables IRQs and the GHCB is active. Forward the exception and call | ||
1383 | - * it later from vc_forward_exception(). | ||
1384 | - */ | ||
1385 | - ctxt->fi.vector = X86_TRAP_AC; | ||
1386 | - ctxt->fi.error_code = 0; | ||
1387 | - return ES_EXCEPTION; | ||
1388 | -} | ||
1389 | - | ||
1390 | -static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, | ||
1391 | - struct ghcb *ghcb, | ||
1392 | - unsigned long exit_code) | ||
1393 | -{ | ||
1394 | - enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); | ||
1395 | - | ||
1396 | - if (result != ES_OK) | ||
1397 | - return result; | ||
1398 | - | ||
1399 | - switch (exit_code) { | ||
1400 | - case SVM_EXIT_READ_DR7: | ||
1401 | - result = vc_handle_dr7_read(ghcb, ctxt); | ||
1402 | - break; | ||
1403 | - case SVM_EXIT_WRITE_DR7: | ||
1404 | - result = vc_handle_dr7_write(ghcb, ctxt); | ||
1405 | - break; | ||
1406 | - case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: | ||
1407 | - result = vc_handle_trap_ac(ghcb, ctxt); | ||
1408 | - break; | ||
1409 | - case SVM_EXIT_RDTSC: | ||
1410 | - case SVM_EXIT_RDTSCP: | ||
1411 | - result = vc_handle_rdtsc(ghcb, ctxt, exit_code); | ||
1412 | - break; | ||
1413 | - case SVM_EXIT_RDPMC: | ||
1414 | - result = vc_handle_rdpmc(ghcb, ctxt); | ||
1415 | - break; | ||
1416 | - case SVM_EXIT_INVD: | ||
1417 | - pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); | ||
1418 | - result = ES_UNSUPPORTED; | ||
1419 | - break; | ||
1420 | - case SVM_EXIT_CPUID: | ||
1421 | - result = vc_handle_cpuid(ghcb, ctxt); | ||
1422 | - break; | ||
1423 | - case SVM_EXIT_IOIO: | ||
1424 | - result = vc_handle_ioio(ghcb, ctxt); | ||
1425 | - break; | ||
1426 | - case SVM_EXIT_MSR: | ||
1427 | - result = vc_handle_msr(ghcb, ctxt); | ||
1428 | - break; | ||
1429 | - case SVM_EXIT_VMMCALL: | ||
1430 | - result = vc_handle_vmmcall(ghcb, ctxt); | ||
1431 | - break; | ||
1432 | - case SVM_EXIT_WBINVD: | ||
1433 | - result = vc_handle_wbinvd(ghcb, ctxt); | ||
1434 | - break; | ||
1435 | - case SVM_EXIT_MONITOR: | ||
1436 | - result = vc_handle_monitor(ghcb, ctxt); | ||
1437 | - break; | ||
1438 | - case SVM_EXIT_MWAIT: | ||
1439 | - result = vc_handle_mwait(ghcb, ctxt); | ||
1440 | - break; | ||
1441 | - case SVM_EXIT_NPF: | ||
1442 | - result = vc_handle_mmio(ghcb, ctxt); | ||
1443 | - break; | ||
1444 | - default: | ||
1445 | - /* | ||
1446 | - * Unexpected #VC exception | ||
1447 | - */ | ||
1448 | - result = ES_UNSUPPORTED; | ||
1449 | - } | ||
1450 | - | ||
1451 | - return result; | ||
1452 | -} | ||
1453 | - | ||
1454 | -static __always_inline bool is_vc2_stack(unsigned long sp) | ||
1455 | -{ | ||
1456 | - return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); | ||
1457 | -} | ||
1458 | - | ||
1459 | -static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) | ||
1460 | -{ | ||
1461 | - unsigned long sp, prev_sp; | ||
1462 | - | ||
1463 | - sp = (unsigned long)regs; | ||
1464 | - prev_sp = regs->sp; | ||
1465 | - | ||
1466 | - /* | ||
1467 | - * If the code was already executing on the VC2 stack when the #VC | ||
1468 | - * happened, let it proceed to the normal handling routine. This way the | ||
1469 | - * code executing on the VC2 stack can cause #VC exceptions to get handled. | ||
1470 | - */ | ||
1471 | - return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); | ||
1472 | -} | ||
1473 | - | ||
1474 | -static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) | ||
1475 | -{ | ||
1476 | - struct ghcb_state state; | ||
1477 | - struct es_em_ctxt ctxt; | ||
1478 | - enum es_result result; | ||
1479 | - struct ghcb *ghcb; | ||
1480 | - bool ret = true; | ||
1481 | - | ||
1482 | - ghcb = __sev_get_ghcb(&state); | ||
1483 | - | ||
1484 | - vc_ghcb_invalidate(ghcb); | ||
1485 | - result = vc_init_em_ctxt(&ctxt, regs, error_code); | ||
1486 | - | ||
1487 | - if (result == ES_OK) | ||
1488 | - result = vc_handle_exitcode(&ctxt, ghcb, error_code); | ||
1489 | - | ||
1490 | - __sev_put_ghcb(&state); | ||
1491 | - | ||
1492 | - /* Done - now check the result */ | ||
1493 | - switch (result) { | ||
1494 | - case ES_OK: | ||
1495 | - vc_finish_insn(&ctxt); | ||
1496 | - break; | ||
1497 | - case ES_UNSUPPORTED: | ||
1498 | - pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", | ||
1499 | - error_code, regs->ip); | ||
1500 | - ret = false; | ||
1501 | - break; | ||
1502 | - case ES_VMM_ERROR: | ||
1503 | - pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | ||
1504 | - error_code, regs->ip); | ||
1505 | - ret = false; | ||
1506 | - break; | ||
1507 | - case ES_DECODE_FAILED: | ||
1508 | - pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | ||
1509 | - error_code, regs->ip); | ||
1510 | - ret = false; | ||
1511 | - break; | ||
1512 | - case ES_EXCEPTION: | ||
1513 | - vc_forward_exception(&ctxt); | ||
1514 | - break; | ||
1515 | - case ES_RETRY: | ||
1516 | - /* Nothing to do */ | ||
1517 | - break; | ||
1518 | - default: | ||
1519 | - pr_emerg("Unknown result in %s():%d\n", __func__, result); | ||
1520 | - /* | ||
1521 | - * Emulating the instruction which caused the #VC exception | ||
1522 | - * failed - can't continue so print debug information | ||
1523 | - */ | ||
1524 | - BUG(); | ||
1525 | - } | ||
1526 | - | ||
1527 | - return ret; | ||
1528 | -} | ||
1529 | - | ||
1530 | -static __always_inline bool vc_is_db(unsigned long error_code) | ||
1531 | -{ | ||
1532 | - return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; | ||
1533 | -} | ||
1534 | - | ||
1535 | -/* | ||
1536 | - * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode | ||
1537 | - * and will panic when an error happens. | ||
1538 | - */ | ||
1539 | -DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) | ||
1540 | -{ | ||
1541 | - irqentry_state_t irq_state; | ||
1542 | - | ||
1543 | - /* | ||
1544 | - * With the current implementation it is always possible to switch to a | ||
1545 | - * safe stack because #VC exceptions only happen at known places, like | ||
1546 | - * intercepted instructions or accesses to MMIO areas/IO ports. They can | ||
1547 | - * also happen with code instrumentation when the hypervisor intercepts | ||
1548 | - * #DB, but the critical paths are forbidden to be instrumented, so #DB | ||
1549 | - * exceptions currently also only happen in safe places. | ||
1550 | - * | ||
1551 | - * But keep this here in case the noinstr annotations are violated due | ||
1552 | - * to bug elsewhere. | ||
1553 | - */ | ||
1554 | - if (unlikely(vc_from_invalid_context(regs))) { | ||
1555 | - instrumentation_begin(); | ||
1556 | - panic("Can't handle #VC exception from unsupported context\n"); | ||
1557 | - instrumentation_end(); | ||
1558 | - } | ||
1559 | - | ||
1560 | - /* | ||
1561 | - * Handle #DB before calling into !noinstr code to avoid recursive #DB. | ||
1562 | - */ | ||
1563 | - if (vc_is_db(error_code)) { | ||
1564 | - exc_debug(regs); | ||
1565 | - return; | ||
1566 | - } | ||
1567 | - | ||
1568 | - irq_state = irqentry_nmi_enter(regs); | ||
1569 | - | ||
1570 | - instrumentation_begin(); | ||
1571 | - | ||
1572 | - if (!vc_raw_handle_exception(regs, error_code)) { | ||
1573 | - /* Show some debug info */ | ||
1574 | - show_regs(regs); | ||
1575 | - | ||
1576 | - /* Ask hypervisor to sev_es_terminate */ | ||
1577 | - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); | ||
1578 | - | ||
1579 | - /* If that fails and we get here - just panic */ | ||
1580 | - panic("Returned from Terminate-Request to Hypervisor\n"); | ||
1581 | - } | ||
1582 | - | ||
1583 | - instrumentation_end(); | ||
1584 | - irqentry_nmi_exit(regs, irq_state); | ||
1585 | -} | ||
1586 | - | ||
1587 | -/* | ||
1588 | - * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode | ||
1589 | - * and will kill the current task with SIGBUS when an error happens. | ||
1590 | - */ | ||
1591 | -DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) | ||
1592 | -{ | ||
1593 | - /* | ||
1594 | - * Handle #DB before calling into !noinstr code to avoid recursive #DB. | ||
1595 | - */ | ||
1596 | - if (vc_is_db(error_code)) { | ||
1597 | - noist_exc_debug(regs); | ||
1598 | - return; | ||
1599 | - } | ||
1600 | - | ||
1601 | - irqentry_enter_from_user_mode(regs); | ||
1602 | - instrumentation_begin(); | ||
1603 | - | ||
1604 | - if (!vc_raw_handle_exception(regs, error_code)) { | ||
1605 | - /* | ||
1606 | - * Do not kill the machine if user-space triggered the | ||
1607 | - * exception. Send SIGBUS instead and let user-space deal with | ||
1608 | - * it. | ||
1609 | - */ | ||
1610 | - force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); | ||
1611 | - } | ||
1612 | - | ||
1613 | - instrumentation_end(); | ||
1614 | - irqentry_exit_to_user_mode(regs); | ||
1615 | -} | ||
1616 | - | ||
1617 | -bool __init handle_vc_boot_ghcb(struct pt_regs *regs) | ||
1618 | -{ | ||
1619 | - unsigned long exit_code = regs->orig_ax; | ||
1620 | - struct es_em_ctxt ctxt; | ||
1621 | - enum es_result result; | ||
1622 | - | ||
1623 | - vc_ghcb_invalidate(boot_ghcb); | ||
1624 | - | ||
1625 | - result = vc_init_em_ctxt(&ctxt, regs, exit_code); | ||
1626 | - if (result == ES_OK) | ||
1627 | - result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); | ||
1628 | - | ||
1629 | - /* Done - now check the result */ | ||
1630 | - switch (result) { | ||
1631 | - case ES_OK: | ||
1632 | - vc_finish_insn(&ctxt); | ||
1633 | - break; | ||
1634 | - case ES_UNSUPPORTED: | ||
1635 | - early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | ||
1636 | - exit_code, regs->ip); | ||
1637 | - goto fail; | ||
1638 | - case ES_VMM_ERROR: | ||
1639 | - early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | ||
1640 | - exit_code, regs->ip); | ||
1641 | - goto fail; | ||
1642 | - case ES_DECODE_FAILED: | ||
1643 | - early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | ||
1644 | - exit_code, regs->ip); | ||
1645 | - goto fail; | ||
1646 | - case ES_EXCEPTION: | ||
1647 | - vc_early_forward_exception(&ctxt); | ||
1648 | - break; | ||
1649 | - case ES_RETRY: | ||
1650 | - /* Nothing to do */ | ||
1651 | - break; | ||
1652 | - default: | ||
1653 | - BUG(); | ||
1654 | - } | ||
1655 | - | ||
1656 | - return true; | ||
1657 | - | ||
1658 | -fail: | ||
1659 | - show_regs(regs); | ||
1660 | - | ||
1661 | - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); | ||
1662 | -} | ||
1663 | - | ||
1664 | -/* | ||
1665 | - * Initial set up of SNP relies on information provided by the | ||
1666 | - * Confidential Computing blob, which can be passed to the kernel | ||
1667 | - * in the following ways, depending on how it is booted: | ||
1668 | - * | ||
1669 | - * - when booted via the boot/decompress kernel: | ||
1670 | - * - via boot_params | ||
1671 | - * | ||
1672 | - * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): | ||
1673 | - * - via a setup_data entry, as defined by the Linux Boot Protocol | ||
1674 | - * | ||
1675 | - * Scan for the blob in that order. | ||
1676 | - */ | ||
1677 | -static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) | ||
1678 | -{ | ||
1679 | - struct cc_blob_sev_info *cc_info; | ||
1680 | - | ||
1681 | - /* Boot kernel would have passed the CC blob via boot_params. */ | ||
1682 | - if (bp->cc_blob_address) { | ||
1683 | - cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; | ||
1684 | - goto found_cc_info; | ||
1685 | - } | ||
1686 | - | ||
1687 | - /* | ||
1688 | - * If kernel was booted directly, without the use of the | ||
1689 | - * boot/decompression kernel, the CC blob may have been passed via | ||
1690 | - * setup_data instead. | ||
1691 | - */ | ||
1692 | - cc_info = find_cc_blob_setup_data(bp); | ||
1693 | - if (!cc_info) | ||
1694 | - return NULL; | ||
1695 | - | ||
1696 | -found_cc_info: | ||
1697 | - if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) | ||
1698 | - snp_abort(); | ||
1699 | - | ||
1700 | - return cc_info; | ||
1701 | -} | ||
1702 | - | ||
1703 | -static __head void svsm_setup(struct cc_blob_sev_info *cc_info) | ||
1704 | -{ | ||
1705 | - struct svsm_call call = {}; | ||
1706 | - int ret; | ||
1707 | - u64 pa; | ||
1708 | - | ||
1709 | - /* | ||
1710 | - * Record the SVSM Calling Area address (CAA) if the guest is not | ||
1711 | - * running at VMPL0. The CA will be used to communicate with the | ||
1712 | - * SVSM to perform the SVSM services. | ||
1713 | - */ | ||
1714 | - if (!svsm_setup_ca(cc_info)) | ||
1715 | - return; | ||
1716 | - | ||
1717 | - /* | ||
1718 | - * It is very early in the boot and the kernel is running identity | ||
1719 | - * mapped but without having adjusted the pagetables to where the | ||
1720 | - * kernel was loaded (physbase), so the get the CA address using | ||
1721 | - * RIP-relative addressing. | ||
1722 | - */ | ||
1723 | - pa = (u64)rip_rel_ptr(&boot_svsm_ca_page); | ||
1724 | - | ||
1725 | - /* | ||
1726 | - * Switch over to the boot SVSM CA while the current CA is still | ||
1727 | - * addressable. There is no GHCB at this point so use the MSR protocol. | ||
1728 | - * | ||
1729 | - * SVSM_CORE_REMAP_CA call: | ||
1730 | - * RAX = 0 (Protocol=0, CallID=0) | ||
1731 | - * RCX = New CA GPA | ||
1732 | - */ | ||
1733 | - call.caa = svsm_get_caa(); | ||
1734 | - call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); | ||
1735 | - call.rcx = pa; | ||
1736 | - ret = svsm_perform_call_protocol(&call); | ||
1737 | - if (ret) | ||
1738 | - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); | ||
1739 | - | ||
1740 | - RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; | ||
1741 | - RIP_REL_REF(boot_svsm_caa_pa) = pa; | ||
1742 | -} | ||
1743 | - | ||
1744 | -bool __head snp_init(struct boot_params *bp) | ||
1745 | -{ | ||
1746 | - struct cc_blob_sev_info *cc_info; | ||
1747 | - | ||
1748 | - if (!bp) | ||
1749 | - return false; | ||
1750 | - | ||
1751 | - cc_info = find_cc_blob(bp); | ||
1752 | - if (!cc_info) | ||
1753 | - return false; | ||
1754 | - | ||
1755 | - if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) | ||
1756 | - secrets_pa = cc_info->secrets_phys; | ||
1757 | - else | ||
1758 | - return false; | ||
1759 | - | ||
1760 | - setup_cpuid_table(cc_info); | ||
1761 | - | ||
1762 | - svsm_setup(cc_info); | ||
1763 | - | ||
1764 | - /* | ||
1765 | - * The CC blob will be used later to access the secrets page. Cache | ||
1766 | - * it here like the boot kernel does. | ||
1767 | - */ | ||
1768 | - bp->cc_blob_address = (u32)(unsigned long)cc_info; | ||
1769 | - | ||
1770 | - return true; | ||
1771 | -} | ||
1772 | - | ||
1773 | -void __head __noreturn snp_abort(void) | ||
1774 | -{ | ||
1775 | - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); | ||
1776 | -} | ||
1777 | - | ||
1778 | /* | ||
1779 | * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are | ||
1780 | * enabled, as the alternative (fallback) logic for DMI probing in the legacy | ||
1781 | diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c | ||
1782 | index XXXXXXX..XXXXXXX 100644 | ||
1783 | --- a/arch/x86/coco/sev/shared.c | ||
1784 | +++ b/arch/x86/coco/sev/shared.c | ||
1785 | @@ -XXX,XX +XXX,XX @@ | ||
1786 | |||
1787 | /* | ||
1788 | * SVSM related information: | ||
1789 | - * When running under an SVSM, the VMPL that Linux is executing at must be | ||
1790 | - * non-zero. The VMPL is therefore used to indicate the presence of an SVSM. | ||
1791 | - * | ||
1792 | * During boot, the page tables are set up as identity mapped and later | ||
1793 | * changed to use kernel virtual addresses. Maintain separate virtual and | ||
1794 | * physical addresses for the CAA to allow SVSM functions to be used during | ||
1795 | * early boot, both with identity mapped virtual addresses and proper kernel | ||
1796 | * virtual addresses. | ||
1797 | */ | ||
1798 | -u8 snp_vmpl __ro_after_init; | ||
1799 | -EXPORT_SYMBOL_GPL(snp_vmpl); | ||
1800 | struct svsm_ca *boot_svsm_caa __ro_after_init; | ||
1801 | u64 boot_svsm_caa_pa __ro_after_init; | ||
1802 | |||
1803 | @@ -XXX,XX +XXX,XX @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) | ||
1804 | } | ||
1805 | } | ||
1806 | |||
1807 | -static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size, | ||
1808 | - int ret, u64 svsm_ret) | ||
1809 | -{ | ||
1810 | - WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n", | ||
1811 | - pfn, action, page_size, ret, svsm_ret); | ||
1812 | - | ||
1813 | - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE); | ||
1814 | -} | ||
1815 | - | ||
1816 | -static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret) | ||
1817 | -{ | ||
1818 | - unsigned int page_size; | ||
1819 | - bool action; | ||
1820 | - u64 pfn; | ||
1821 | - | ||
1822 | - pfn = pc->entry[pc->cur_index].pfn; | ||
1823 | - action = pc->entry[pc->cur_index].action; | ||
1824 | - page_size = pc->entry[pc->cur_index].page_size; | ||
1825 | - | ||
1826 | - __pval_terminate(pfn, action, page_size, ret, svsm_ret); | ||
1827 | -} | ||
1828 | - | ||
1829 | static void __head svsm_pval_4k_page(unsigned long paddr, bool validate) | ||
1830 | { | ||
1831 | struct svsm_pvalidate_call *pc; | ||
1832 | @@ -XXX,XX +XXX,XX @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, | ||
1833 | } | ||
1834 | } | ||
1835 | |||
1836 | -static void pval_pages(struct snp_psc_desc *desc) | ||
1837 | -{ | ||
1838 | - struct psc_entry *e; | ||
1839 | - unsigned long vaddr; | ||
1840 | - unsigned int size; | ||
1841 | - unsigned int i; | ||
1842 | - bool validate; | ||
1843 | - u64 pfn; | ||
1844 | - int rc; | ||
1845 | - | ||
1846 | - for (i = 0; i <= desc->hdr.end_entry; i++) { | ||
1847 | - e = &desc->entries[i]; | ||
1848 | - | ||
1849 | - pfn = e->gfn; | ||
1850 | - vaddr = (unsigned long)pfn_to_kaddr(pfn); | ||
1851 | - size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; | ||
1852 | - validate = e->operation == SNP_PAGE_STATE_PRIVATE; | ||
1853 | - | ||
1854 | - rc = pvalidate(vaddr, size, validate); | ||
1855 | - if (!rc) | ||
1856 | - continue; | ||
1857 | - | ||
1858 | - if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) { | ||
1859 | - unsigned long vaddr_end = vaddr + PMD_SIZE; | ||
1860 | - | ||
1861 | - for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) { | ||
1862 | - rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); | ||
1863 | - if (rc) | ||
1864 | - __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0); | ||
1865 | - } | ||
1866 | - } else { | ||
1867 | - __pval_terminate(pfn, validate, size, rc, 0); | ||
1868 | - } | ||
1869 | - } | ||
1870 | -} | ||
1871 | - | ||
1872 | -static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action, | ||
1873 | - struct svsm_pvalidate_call *pc) | ||
1874 | -{ | ||
1875 | - struct svsm_pvalidate_entry *pe; | ||
1876 | - | ||
1877 | - /* Nothing in the CA yet */ | ||
1878 | - pc->num_entries = 0; | ||
1879 | - pc->cur_index = 0; | ||
1880 | - | ||
1881 | - pe = &pc->entry[0]; | ||
1882 | - | ||
1883 | - while (pfn < pfn_end) { | ||
1884 | - pe->page_size = RMP_PG_SIZE_4K; | ||
1885 | - pe->action = action; | ||
1886 | - pe->ignore_cf = 0; | ||
1887 | - pe->pfn = pfn; | ||
1888 | - | ||
1889 | - pe++; | ||
1890 | - pfn++; | ||
1891 | - | ||
1892 | - pc->num_entries++; | ||
1893 | - if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) | ||
1894 | - break; | ||
1895 | - } | ||
1896 | - | ||
1897 | - return pfn; | ||
1898 | -} | ||
1899 | - | ||
1900 | -static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry, | ||
1901 | - struct svsm_pvalidate_call *pc) | ||
1902 | -{ | ||
1903 | - struct svsm_pvalidate_entry *pe; | ||
1904 | - struct psc_entry *e; | ||
1905 | - | ||
1906 | - /* Nothing in the CA yet */ | ||
1907 | - pc->num_entries = 0; | ||
1908 | - pc->cur_index = 0; | ||
1909 | - | ||
1910 | - pe = &pc->entry[0]; | ||
1911 | - e = &desc->entries[desc_entry]; | ||
1912 | - | ||
1913 | - while (desc_entry <= desc->hdr.end_entry) { | ||
1914 | - pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K; | ||
1915 | - pe->action = e->operation == SNP_PAGE_STATE_PRIVATE; | ||
1916 | - pe->ignore_cf = 0; | ||
1917 | - pe->pfn = e->gfn; | ||
1918 | - | ||
1919 | - pe++; | ||
1920 | - e++; | ||
1921 | - | ||
1922 | - desc_entry++; | ||
1923 | - pc->num_entries++; | ||
1924 | - if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT) | ||
1925 | - break; | ||
1926 | - } | ||
1927 | - | ||
1928 | - return desc_entry; | ||
1929 | -} | ||
1930 | - | ||
1931 | -static void svsm_pval_pages(struct snp_psc_desc *desc) | ||
1932 | -{ | ||
1933 | - struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY]; | ||
1934 | - unsigned int i, pv_4k_count = 0; | ||
1935 | - struct svsm_pvalidate_call *pc; | ||
1936 | - struct svsm_call call = {}; | ||
1937 | - unsigned long flags; | ||
1938 | - bool action; | ||
1939 | - u64 pc_pa; | ||
1940 | - int ret; | ||
1941 | - | ||
1942 | - /* | ||
1943 | - * This can be called very early in the boot, use native functions in | ||
1944 | - * order to avoid paravirt issues. | ||
1945 | - */ | ||
1946 | - flags = native_local_irq_save(); | ||
1947 | - | ||
1948 | - /* | ||
1949 | - * The SVSM calling area (CA) can support processing 510 entries at a | ||
1950 | - * time. Loop through the Page State Change descriptor until the CA is | ||
1951 | - * full or the last entry in the descriptor is reached, at which time | ||
1952 | - * the SVSM is invoked. This repeats until all entries in the descriptor | ||
1953 | - * are processed. | ||
1954 | - */ | ||
1955 | - call.caa = svsm_get_caa(); | ||
1956 | - | ||
1957 | - pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer; | ||
1958 | - pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); | ||
1959 | - | ||
1960 | - /* Protocol 0, Call ID 1 */ | ||
1961 | - call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE); | ||
1962 | - call.rcx = pc_pa; | ||
1963 | - | ||
1964 | - for (i = 0; i <= desc->hdr.end_entry;) { | ||
1965 | - i = svsm_build_ca_from_psc_desc(desc, i, pc); | ||
1966 | - | ||
1967 | - do { | ||
1968 | - ret = svsm_perform_call_protocol(&call); | ||
1969 | - if (!ret) | ||
1970 | - continue; | ||
1971 | - | ||
1972 | - /* | ||
1973 | - * Check if the entry failed because of an RMP mismatch (a | ||
1974 | - * PVALIDATE at 2M was requested, but the page is mapped in | ||
1975 | - * the RMP as 4K). | ||
1976 | - */ | ||
1977 | - | ||
1978 | - if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH && | ||
1979 | - pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) { | ||
1980 | - /* Save this entry for post-processing at 4K */ | ||
1981 | - pv_4k[pv_4k_count++] = pc->entry[pc->cur_index]; | ||
1982 | - | ||
1983 | - /* Skip to the next one unless at the end of the list */ | ||
1984 | - pc->cur_index++; | ||
1985 | - if (pc->cur_index < pc->num_entries) | ||
1986 | - ret = -EAGAIN; | ||
1987 | - else | ||
1988 | - ret = 0; | ||
1989 | - } | ||
1990 | - } while (ret == -EAGAIN); | ||
1991 | - | ||
1992 | - if (ret) | ||
1993 | - svsm_pval_terminate(pc, ret, call.rax_out); | ||
1994 | - } | ||
1995 | - | ||
1996 | - /* Process any entries that failed to be validated at 2M and validate them at 4K */ | ||
1997 | - for (i = 0; i < pv_4k_count; i++) { | ||
1998 | - u64 pfn, pfn_end; | ||
1999 | - | ||
2000 | - action = pv_4k[i].action; | ||
2001 | - pfn = pv_4k[i].pfn; | ||
2002 | - pfn_end = pfn + 512; | ||
2003 | - | ||
2004 | - while (pfn < pfn_end) { | ||
2005 | - pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc); | ||
2006 | - | ||
2007 | - ret = svsm_perform_call_protocol(&call); | ||
2008 | - if (ret) | ||
2009 | - svsm_pval_terminate(pc, ret, call.rax_out); | ||
2010 | - } | ||
2011 | - } | ||
2012 | - | ||
2013 | - native_local_irq_restore(flags); | ||
2014 | -} | ||
2015 | - | ||
2016 | -static void pvalidate_pages(struct snp_psc_desc *desc) | ||
2017 | -{ | ||
2018 | - if (snp_vmpl) | ||
2019 | - svsm_pval_pages(desc); | ||
2020 | - else | ||
2021 | - pval_pages(desc); | ||
2022 | -} | ||
2023 | - | ||
2024 | -static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc) | ||
2025 | -{ | ||
2026 | - int cur_entry, end_entry, ret = 0; | ||
2027 | - struct snp_psc_desc *data; | ||
2028 | - struct es_em_ctxt ctxt; | ||
2029 | - | ||
2030 | - vc_ghcb_invalidate(ghcb); | ||
2031 | - | ||
2032 | - /* Copy the input desc into GHCB shared buffer */ | ||
2033 | - data = (struct snp_psc_desc *)ghcb->shared_buffer; | ||
2034 | - memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc))); | ||
2035 | - | ||
2036 | - /* | ||
2037 | - * As per the GHCB specification, the hypervisor can resume the guest | ||
2038 | - * before processing all the entries. Check whether all the entries | ||
2039 | - * are processed. If not, then keep retrying. Note, the hypervisor | ||
2040 | - * will update the data memory directly to indicate the status, so | ||
2041 | - * reference the data->hdr everywhere. | ||
2042 | - * | ||
2043 | - * The strategy here is to wait for the hypervisor to change the page | ||
2044 | - * state in the RMP table before guest accesses the memory pages. If the | ||
2045 | - * page state change was not successful, then later memory access will | ||
2046 | - * result in a crash. | ||
2047 | - */ | ||
2048 | - cur_entry = data->hdr.cur_entry; | ||
2049 | - end_entry = data->hdr.end_entry; | ||
2050 | - | ||
2051 | - while (data->hdr.cur_entry <= data->hdr.end_entry) { | ||
2052 | - ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); | ||
2053 | - | ||
2054 | - /* This will advance the shared buffer data points to. */ | ||
2055 | - ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0); | ||
2056 | - | ||
2057 | - /* | ||
2058 | - * Page State Change VMGEXIT can pass error code through | ||
2059 | - * exit_info_2. | ||
2060 | - */ | ||
2061 | - if (WARN(ret || ghcb->save.sw_exit_info_2, | ||
2062 | - "SNP: PSC failed ret=%d exit_info_2=%llx\n", | ||
2063 | - ret, ghcb->save.sw_exit_info_2)) { | ||
2064 | - ret = 1; | ||
2065 | - goto out; | ||
2066 | - } | ||
2067 | - | ||
2068 | - /* Verify that reserved bit is not set */ | ||
2069 | - if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) { | ||
2070 | - ret = 1; | ||
2071 | - goto out; | ||
2072 | - } | ||
2073 | - | ||
2074 | - /* | ||
2075 | - * Sanity check that entry processing is not going backwards. | ||
2076 | - * This will happen only if hypervisor is tricking us. | ||
2077 | - */ | ||
2078 | - if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry, | ||
2079 | -"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n", | ||
2080 | - end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) { | ||
2081 | - ret = 1; | ||
2082 | - goto out; | ||
2083 | - } | ||
2084 | - } | ||
2085 | - | ||
2086 | -out: | ||
2087 | - return ret; | ||
2088 | -} | ||
2089 | - | ||
2090 | static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, | ||
2091 | unsigned long exit_code) | ||
2092 | { | ||
2093 | diff --git a/arch/x86/coco/sev/startup.c b/arch/x86/coco/sev/startup.c | ||
54 | new file mode 100644 | 2094 | new file mode 100644 |
55 | index XXXXXXX..XXXXXXX | 2095 | index XXXXXXX..XXXXXXX |
56 | --- /dev/null | 2096 | --- /dev/null |
57 | +++ b/arch/x86/boot/startup/Makefile | 2097 | +++ b/arch/x86/coco/sev/startup.c |
58 | @@ -XXX,XX +XXX,XX @@ | 2098 | @@ -XXX,XX +XXX,XX @@ |
59 | +# SPDX-License-Identifier: GPL-2.0 | 2099 | +// SPDX-License-Identifier: GPL-2.0-only |
60 | + | 2100 | +/* |
61 | +lib-$(CONFIG_X86_64) += la57toggle.o | 2101 | + * AMD Memory Encryption Support |
62 | diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/startup/la57toggle.S | 2102 | + * |
63 | similarity index 100% | 2103 | + * Copyright (C) 2019 SUSE |
64 | rename from arch/x86/boot/compressed/la57toggle.S | 2104 | + * |
65 | rename to arch/x86/boot/startup/la57toggle.S | 2105 | + * Author: Joerg Roedel <jroedel@suse.de> |
2106 | + */ | ||
2107 | + | ||
2108 | +#define pr_fmt(fmt) "SEV: " fmt | ||
2109 | + | ||
2110 | +#include <linux/sched/debug.h> /* For show_regs() */ | ||
2111 | +#include <linux/percpu-defs.h> | ||
2112 | +#include <linux/cc_platform.h> | ||
2113 | +#include <linux/printk.h> | ||
2114 | +#include <linux/mm_types.h> | ||
2115 | +#include <linux/set_memory.h> | ||
2116 | +#include <linux/memblock.h> | ||
2117 | +#include <linux/kernel.h> | ||
2118 | +#include <linux/mm.h> | ||
2119 | +#include <linux/cpumask.h> | ||
2120 | +#include <linux/efi.h> | ||
2121 | +#include <linux/io.h> | ||
2122 | +#include <linux/psp-sev.h> | ||
2123 | +#include <uapi/linux/sev-guest.h> | ||
2124 | + | ||
2125 | +#include <asm/init.h> | ||
2126 | +#include <asm/cpu_entry_area.h> | ||
2127 | +#include <asm/stacktrace.h> | ||
2128 | +#include <asm/sev.h> | ||
2129 | +#include <asm/sev-internal.h> | ||
2130 | +#include <asm/insn-eval.h> | ||
2131 | +#include <asm/fpu/xcr.h> | ||
2132 | +#include <asm/processor.h> | ||
2133 | +#include <asm/realmode.h> | ||
2134 | +#include <asm/setup.h> | ||
2135 | +#include <asm/traps.h> | ||
2136 | +#include <asm/svm.h> | ||
2137 | +#include <asm/smp.h> | ||
2138 | +#include <asm/cpu.h> | ||
2139 | +#include <asm/apic.h> | ||
2140 | +#include <asm/cpuid.h> | ||
2141 | +#include <asm/cmdline.h> | ||
2142 | + | ||
2143 | +/* For early boot hypervisor communication in SEV-ES enabled guests */ | ||
2144 | +struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | ||
2145 | + | ||
2146 | +/* | ||
2147 | + * Needs to be in the .data section because we need it NULL before bss is | ||
2148 | + * cleared | ||
2149 | + */ | ||
2150 | +struct ghcb *boot_ghcb __section(".data"); | ||
2151 | + | ||
2152 | +/* Bitmap of SEV features supported by the hypervisor */ | ||
2153 | +u64 sev_hv_features __ro_after_init; | ||
2154 | + | ||
2155 | +/* Secrets page physical address from the CC blob */ | ||
2156 | +static u64 secrets_pa __ro_after_init; | ||
2157 | + | ||
2158 | +/* For early boot SVSM communication */ | ||
2159 | +struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE); | ||
2160 | + | ||
2161 | +DEFINE_PER_CPU(struct svsm_ca *, svsm_caa); | ||
2162 | +DEFINE_PER_CPU(u64, svsm_caa_pa); | ||
2163 | + | ||
2164 | +/* | ||
2165 | + * Nothing shall interrupt this code path while holding the per-CPU | ||
2166 | + * GHCB. The backup GHCB is only for NMIs interrupting this path. | ||
2167 | + * | ||
2168 | + * Callers must disable local interrupts around it. | ||
2169 | + */ | ||
2170 | +noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | ||
2171 | +{ | ||
2172 | + struct sev_es_runtime_data *data; | ||
2173 | + struct ghcb *ghcb; | ||
2174 | + | ||
2175 | + WARN_ON(!irqs_disabled()); | ||
2176 | + | ||
2177 | + data = this_cpu_read(runtime_data); | ||
2178 | + ghcb = &data->ghcb_page; | ||
2179 | + | ||
2180 | + if (unlikely(data->ghcb_active)) { | ||
2181 | + /* GHCB is already in use - save its contents */ | ||
2182 | + | ||
2183 | + if (unlikely(data->backup_ghcb_active)) { | ||
2184 | + /* | ||
2185 | + * Backup-GHCB is also already in use. There is no way | ||
2186 | + * to continue here so just kill the machine. To make | ||
2187 | + * panic() work, mark GHCBs inactive so that messages | ||
2188 | + * can be printed out. | ||
2189 | + */ | ||
2190 | + data->ghcb_active = false; | ||
2191 | + data->backup_ghcb_active = false; | ||
2192 | + | ||
2193 | + instrumentation_begin(); | ||
2194 | + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); | ||
2195 | + instrumentation_end(); | ||
2196 | + } | ||
2197 | + | ||
2198 | + /* Mark backup_ghcb active before writing to it */ | ||
2199 | + data->backup_ghcb_active = true; | ||
2200 | + | ||
2201 | + state->ghcb = &data->backup_ghcb; | ||
2202 | + | ||
2203 | + /* Backup GHCB content */ | ||
2204 | + *state->ghcb = *ghcb; | ||
2205 | + } else { | ||
2206 | + state->ghcb = NULL; | ||
2207 | + data->ghcb_active = true; | ||
2208 | + } | ||
2209 | + | ||
2210 | + return ghcb; | ||
2211 | +} | ||
2212 | + | ||
2213 | +static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | ||
2214 | + unsigned char *buffer) | ||
2215 | +{ | ||
2216 | + return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); | ||
2217 | +} | ||
2218 | + | ||
2219 | +static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) | ||
2220 | +{ | ||
2221 | + char buffer[MAX_INSN_SIZE]; | ||
2222 | + int insn_bytes; | ||
2223 | + | ||
2224 | + insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); | ||
2225 | + if (insn_bytes == 0) { | ||
2226 | + /* Nothing could be copied */ | ||
2227 | + ctxt->fi.vector = X86_TRAP_PF; | ||
2228 | + ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; | ||
2229 | + ctxt->fi.cr2 = ctxt->regs->ip; | ||
2230 | + return ES_EXCEPTION; | ||
2231 | + } else if (insn_bytes == -EINVAL) { | ||
2232 | + /* Effective RIP could not be calculated */ | ||
2233 | + ctxt->fi.vector = X86_TRAP_GP; | ||
2234 | + ctxt->fi.error_code = 0; | ||
2235 | + ctxt->fi.cr2 = 0; | ||
2236 | + return ES_EXCEPTION; | ||
2237 | + } | ||
2238 | + | ||
2239 | + if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) | ||
2240 | + return ES_DECODE_FAILED; | ||
2241 | + | ||
2242 | + if (ctxt->insn.immediate.got) | ||
2243 | + return ES_OK; | ||
2244 | + else | ||
2245 | + return ES_DECODE_FAILED; | ||
2246 | +} | ||
2247 | + | ||
2248 | +static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) | ||
2249 | +{ | ||
2250 | + char buffer[MAX_INSN_SIZE]; | ||
2251 | + int res, ret; | ||
2252 | + | ||
2253 | + res = vc_fetch_insn_kernel(ctxt, buffer); | ||
2254 | + if (res) { | ||
2255 | + ctxt->fi.vector = X86_TRAP_PF; | ||
2256 | + ctxt->fi.error_code = X86_PF_INSTR; | ||
2257 | + ctxt->fi.cr2 = ctxt->regs->ip; | ||
2258 | + return ES_EXCEPTION; | ||
2259 | + } | ||
2260 | + | ||
2261 | + ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); | ||
2262 | + if (ret < 0) | ||
2263 | + return ES_DECODE_FAILED; | ||
2264 | + else | ||
2265 | + return ES_OK; | ||
2266 | +} | ||
2267 | + | ||
2268 | +static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) | ||
2269 | +{ | ||
2270 | + if (user_mode(ctxt->regs)) | ||
2271 | + return __vc_decode_user_insn(ctxt); | ||
2272 | + else | ||
2273 | + return __vc_decode_kern_insn(ctxt); | ||
2274 | +} | ||
2275 | + | ||
2276 | +static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, | ||
2277 | + char *dst, char *buf, size_t size) | ||
2278 | +{ | ||
2279 | + unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; | ||
2280 | + | ||
2281 | + /* | ||
2282 | + * This function uses __put_user() independent of whether kernel or user | ||
2283 | + * memory is accessed. This works fine because __put_user() does no | ||
2284 | + * sanity checks of the pointer being accessed. All that it does is | ||
2285 | + * to report when the access failed. | ||
2286 | + * | ||
2287 | + * Also, this function runs in atomic context, so __put_user() is not | ||
2288 | + * allowed to sleep. The page-fault handler detects that it is running | ||
2289 | + * in atomic context and will not try to take mmap_sem and handle the | ||
2290 | + * fault, so additional pagefault_enable()/disable() calls are not | ||
2291 | + * needed. | ||
2292 | + * | ||
2293 | + * The access can't be done via copy_to_user() here because | ||
2294 | + * vc_write_mem() must not use string instructions to access unsafe | ||
2295 | + * memory. The reason is that MOVS is emulated by the #VC handler by | ||
2296 | + * splitting the move up into a read and a write and taking a nested #VC | ||
2297 | + * exception on whatever of them is the MMIO access. Using string | ||
2298 | + * instructions here would cause infinite nesting. | ||
2299 | + */ | ||
2300 | + switch (size) { | ||
2301 | + case 1: { | ||
2302 | + u8 d1; | ||
2303 | + u8 __user *target = (u8 __user *)dst; | ||
2304 | + | ||
2305 | + memcpy(&d1, buf, 1); | ||
2306 | + if (__put_user(d1, target)) | ||
2307 | + goto fault; | ||
2308 | + break; | ||
2309 | + } | ||
2310 | + case 2: { | ||
2311 | + u16 d2; | ||
2312 | + u16 __user *target = (u16 __user *)dst; | ||
2313 | + | ||
2314 | + memcpy(&d2, buf, 2); | ||
2315 | + if (__put_user(d2, target)) | ||
2316 | + goto fault; | ||
2317 | + break; | ||
2318 | + } | ||
2319 | + case 4: { | ||
2320 | + u32 d4; | ||
2321 | + u32 __user *target = (u32 __user *)dst; | ||
2322 | + | ||
2323 | + memcpy(&d4, buf, 4); | ||
2324 | + if (__put_user(d4, target)) | ||
2325 | + goto fault; | ||
2326 | + break; | ||
2327 | + } | ||
2328 | + case 8: { | ||
2329 | + u64 d8; | ||
2330 | + u64 __user *target = (u64 __user *)dst; | ||
2331 | + | ||
2332 | + memcpy(&d8, buf, 8); | ||
2333 | + if (__put_user(d8, target)) | ||
2334 | + goto fault; | ||
2335 | + break; | ||
2336 | + } | ||
2337 | + default: | ||
2338 | + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | ||
2339 | + return ES_UNSUPPORTED; | ||
2340 | + } | ||
2341 | + | ||
2342 | + return ES_OK; | ||
2343 | + | ||
2344 | +fault: | ||
2345 | + if (user_mode(ctxt->regs)) | ||
2346 | + error_code |= X86_PF_USER; | ||
2347 | + | ||
2348 | + ctxt->fi.vector = X86_TRAP_PF; | ||
2349 | + ctxt->fi.error_code = error_code; | ||
2350 | + ctxt->fi.cr2 = (unsigned long)dst; | ||
2351 | + | ||
2352 | + return ES_EXCEPTION; | ||
2353 | +} | ||
2354 | + | ||
2355 | +static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, | ||
2356 | + char *src, char *buf, size_t size) | ||
2357 | +{ | ||
2358 | + unsigned long error_code = X86_PF_PROT; | ||
2359 | + | ||
2360 | + /* | ||
2361 | + * This function uses __get_user() independent of whether kernel or user | ||
2362 | + * memory is accessed. This works fine because __get_user() does no | ||
2363 | + * sanity checks of the pointer being accessed. All that it does is | ||
2364 | + * to report when the access failed. | ||
2365 | + * | ||
2366 | + * Also, this function runs in atomic context, so __get_user() is not | ||
2367 | + * allowed to sleep. The page-fault handler detects that it is running | ||
2368 | + * in atomic context and will not try to take mmap_sem and handle the | ||
2369 | + * fault, so additional pagefault_enable()/disable() calls are not | ||
2370 | + * needed. | ||
2371 | + * | ||
2372 | + * The access can't be done via copy_from_user() here because | ||
2373 | + * vc_read_mem() must not use string instructions to access unsafe | ||
2374 | + * memory. The reason is that MOVS is emulated by the #VC handler by | ||
2375 | + * splitting the move up into a read and a write and taking a nested #VC | ||
2376 | + * exception on whatever of them is the MMIO access. Using string | ||
2377 | + * instructions here would cause infinite nesting. | ||
2378 | + */ | ||
2379 | + switch (size) { | ||
2380 | + case 1: { | ||
2381 | + u8 d1; | ||
2382 | + u8 __user *s = (u8 __user *)src; | ||
2383 | + | ||
2384 | + if (__get_user(d1, s)) | ||
2385 | + goto fault; | ||
2386 | + memcpy(buf, &d1, 1); | ||
2387 | + break; | ||
2388 | + } | ||
2389 | + case 2: { | ||
2390 | + u16 d2; | ||
2391 | + u16 __user *s = (u16 __user *)src; | ||
2392 | + | ||
2393 | + if (__get_user(d2, s)) | ||
2394 | + goto fault; | ||
2395 | + memcpy(buf, &d2, 2); | ||
2396 | + break; | ||
2397 | + } | ||
2398 | + case 4: { | ||
2399 | + u32 d4; | ||
2400 | + u32 __user *s = (u32 __user *)src; | ||
2401 | + | ||
2402 | + if (__get_user(d4, s)) | ||
2403 | + goto fault; | ||
2404 | + memcpy(buf, &d4, 4); | ||
2405 | + break; | ||
2406 | + } | ||
2407 | + case 8: { | ||
2408 | + u64 d8; | ||
2409 | + u64 __user *s = (u64 __user *)src; | ||
2410 | + if (__get_user(d8, s)) | ||
2411 | + goto fault; | ||
2412 | + memcpy(buf, &d8, 8); | ||
2413 | + break; | ||
2414 | + } | ||
2415 | + default: | ||
2416 | + WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | ||
2417 | + return ES_UNSUPPORTED; | ||
2418 | + } | ||
2419 | + | ||
2420 | + return ES_OK; | ||
2421 | + | ||
2422 | +fault: | ||
2423 | + if (user_mode(ctxt->regs)) | ||
2424 | + error_code |= X86_PF_USER; | ||
2425 | + | ||
2426 | + ctxt->fi.vector = X86_TRAP_PF; | ||
2427 | + ctxt->fi.error_code = error_code; | ||
2428 | + ctxt->fi.cr2 = (unsigned long)src; | ||
2429 | + | ||
2430 | + return ES_EXCEPTION; | ||
2431 | +} | ||
2432 | + | ||
2433 | +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | ||
2434 | + unsigned long vaddr, phys_addr_t *paddr) | ||
2435 | +{ | ||
2436 | + unsigned long va = (unsigned long)vaddr; | ||
2437 | + unsigned int level; | ||
2438 | + phys_addr_t pa; | ||
2439 | + pgd_t *pgd; | ||
2440 | + pte_t *pte; | ||
2441 | + | ||
2442 | + pgd = __va(read_cr3_pa()); | ||
2443 | + pgd = &pgd[pgd_index(va)]; | ||
2444 | + pte = lookup_address_in_pgd(pgd, va, &level); | ||
2445 | + if (!pte) { | ||
2446 | + ctxt->fi.vector = X86_TRAP_PF; | ||
2447 | + ctxt->fi.cr2 = vaddr; | ||
2448 | + ctxt->fi.error_code = 0; | ||
2449 | + | ||
2450 | + if (user_mode(ctxt->regs)) | ||
2451 | + ctxt->fi.error_code |= X86_PF_USER; | ||
2452 | + | ||
2453 | + return ES_EXCEPTION; | ||
2454 | + } | ||
2455 | + | ||
2456 | + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) | ||
2457 | + /* Emulated MMIO to/from encrypted memory not supported */ | ||
2458 | + return ES_UNSUPPORTED; | ||
2459 | + | ||
2460 | + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; | ||
2461 | + pa |= va & ~page_level_mask(level); | ||
2462 | + | ||
2463 | + *paddr = pa; | ||
2464 | + | ||
2465 | + return ES_OK; | ||
2466 | +} | ||
2467 | + | ||
2468 | +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) | ||
2469 | +{ | ||
2470 | + BUG_ON(size > 4); | ||
2471 | + | ||
2472 | + if (user_mode(ctxt->regs)) { | ||
2473 | + struct thread_struct *t = ¤t->thread; | ||
2474 | + struct io_bitmap *iobm = t->io_bitmap; | ||
2475 | + size_t idx; | ||
2476 | + | ||
2477 | + if (!iobm) | ||
2478 | + goto fault; | ||
2479 | + | ||
2480 | + for (idx = port; idx < port + size; ++idx) { | ||
2481 | + if (test_bit(idx, iobm->bitmap)) | ||
2482 | + goto fault; | ||
2483 | + } | ||
2484 | + } | ||
2485 | + | ||
2486 | + return ES_OK; | ||
2487 | + | ||
2488 | +fault: | ||
2489 | + ctxt->fi.vector = X86_TRAP_GP; | ||
2490 | + ctxt->fi.error_code = 0; | ||
2491 | + | ||
2492 | + return ES_EXCEPTION; | ||
2493 | +} | ||
2494 | + | ||
2495 | +static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) | ||
2496 | +{ | ||
2497 | + long error_code = ctxt->fi.error_code; | ||
2498 | + int trapnr = ctxt->fi.vector; | ||
2499 | + | ||
2500 | + ctxt->regs->orig_ax = ctxt->fi.error_code; | ||
2501 | + | ||
2502 | + switch (trapnr) { | ||
2503 | + case X86_TRAP_GP: | ||
2504 | + exc_general_protection(ctxt->regs, error_code); | ||
2505 | + break; | ||
2506 | + case X86_TRAP_UD: | ||
2507 | + exc_invalid_op(ctxt->regs); | ||
2508 | + break; | ||
2509 | + case X86_TRAP_PF: | ||
2510 | + write_cr2(ctxt->fi.cr2); | ||
2511 | + exc_page_fault(ctxt->regs, error_code); | ||
2512 | + break; | ||
2513 | + case X86_TRAP_AC: | ||
2514 | + exc_alignment_check(ctxt->regs, error_code); | ||
2515 | + break; | ||
2516 | + default: | ||
2517 | + pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); | ||
2518 | + BUG(); | ||
2519 | + } | ||
2520 | +} | ||
2521 | + | ||
2522 | +/* Include code shared with pre-decompression boot stage */ | ||
2523 | +#include "shared.c" | ||
2524 | + | ||
2525 | +noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
2526 | +{ | ||
2527 | + struct sev_es_runtime_data *data; | ||
2528 | + struct ghcb *ghcb; | ||
2529 | + | ||
2530 | + WARN_ON(!irqs_disabled()); | ||
2531 | + | ||
2532 | + data = this_cpu_read(runtime_data); | ||
2533 | + ghcb = &data->ghcb_page; | ||
2534 | + | ||
2535 | + if (state->ghcb) { | ||
2536 | + /* Restore GHCB from Backup */ | ||
2537 | + *ghcb = *state->ghcb; | ||
2538 | + data->backup_ghcb_active = false; | ||
2539 | + state->ghcb = NULL; | ||
2540 | + } else { | ||
2541 | + /* | ||
2542 | + * Invalidate the GHCB so a VMGEXIT instruction issued | ||
2543 | + * from userspace won't appear to be valid. | ||
2544 | + */ | ||
2545 | + vc_ghcb_invalidate(ghcb); | ||
2546 | + data->ghcb_active = false; | ||
2547 | + } | ||
2548 | +} | ||
2549 | + | ||
2550 | +int svsm_perform_call_protocol(struct svsm_call *call) | ||
2551 | +{ | ||
2552 | + struct ghcb_state state; | ||
2553 | + unsigned long flags; | ||
2554 | + struct ghcb *ghcb; | ||
2555 | + int ret; | ||
2556 | + | ||
2557 | + /* | ||
2558 | + * This can be called very early in the boot, use native functions in | ||
2559 | + * order to avoid paravirt issues. | ||
2560 | + */ | ||
2561 | + flags = native_local_irq_save(); | ||
2562 | + | ||
2563 | + /* | ||
2564 | + * Use rip-relative references when called early in the boot. If | ||
2565 | + * ghcbs_initialized is set, then it is late in the boot and no need | ||
2566 | + * to worry about rip-relative references in called functions. | ||
2567 | + */ | ||
2568 | + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
2569 | + ghcb = __sev_get_ghcb(&state); | ||
2570 | + else if (RIP_REL_REF(boot_ghcb)) | ||
2571 | + ghcb = RIP_REL_REF(boot_ghcb); | ||
2572 | + else | ||
2573 | + ghcb = NULL; | ||
2574 | + | ||
2575 | + do { | ||
2576 | + ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call) | ||
2577 | + : svsm_perform_msr_protocol(call); | ||
2578 | + } while (ret == -EAGAIN); | ||
2579 | + | ||
2580 | + if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
2581 | + __sev_put_ghcb(&state); | ||
2582 | + | ||
2583 | + native_local_irq_restore(flags); | ||
2584 | + | ||
2585 | + return ret; | ||
2586 | +} | ||
2587 | + | ||
2588 | +void __head | ||
2589 | +early_set_pages_state(unsigned long vaddr, unsigned long paddr, | ||
2590 | + unsigned long npages, enum psc_op op) | ||
2591 | +{ | ||
2592 | + unsigned long paddr_end; | ||
2593 | + u64 val; | ||
2594 | + | ||
2595 | + vaddr = vaddr & PAGE_MASK; | ||
2596 | + | ||
2597 | + paddr = paddr & PAGE_MASK; | ||
2598 | + paddr_end = paddr + (npages << PAGE_SHIFT); | ||
2599 | + | ||
2600 | + while (paddr < paddr_end) { | ||
2601 | + /* Page validation must be rescinded before changing to shared */ | ||
2602 | + if (op == SNP_PAGE_STATE_SHARED) | ||
2603 | + pvalidate_4k_page(vaddr, paddr, false); | ||
2604 | + | ||
2605 | + /* | ||
2606 | + * Use the MSR protocol because this function can be called before | ||
2607 | + * the GHCB is established. | ||
2608 | + */ | ||
2609 | + sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); | ||
2610 | + VMGEXIT(); | ||
2611 | + | ||
2612 | + val = sev_es_rd_ghcb_msr(); | ||
2613 | + | ||
2614 | + if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) | ||
2615 | + goto e_term; | ||
2616 | + | ||
2617 | + if (GHCB_MSR_PSC_RESP_VAL(val)) | ||
2618 | + goto e_term; | ||
2619 | + | ||
2620 | + /* Page validation must be performed after changing to private */ | ||
2621 | + if (op == SNP_PAGE_STATE_PRIVATE) | ||
2622 | + pvalidate_4k_page(vaddr, paddr, true); | ||
2623 | + | ||
2624 | + vaddr += PAGE_SIZE; | ||
2625 | + paddr += PAGE_SIZE; | ||
2626 | + } | ||
2627 | + | ||
2628 | + return; | ||
2629 | + | ||
2630 | +e_term: | ||
2631 | + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); | ||
2632 | +} | ||
2633 | + | ||
2634 | +void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, | ||
2635 | + unsigned long npages) | ||
2636 | +{ | ||
2637 | + /* | ||
2638 | + * This can be invoked in early boot while running identity mapped, so | ||
2639 | + * use an open coded check for SNP instead of using cc_platform_has(). | ||
2640 | + * This eliminates worries about jump tables or checking boot_cpu_data | ||
2641 | + * in the cc_platform_has() function. | ||
2642 | + */ | ||
2643 | + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
2644 | + return; | ||
2645 | + | ||
2646 | + /* | ||
2647 | + * Ask the hypervisor to mark the memory pages as private in the RMP | ||
2648 | + * table. | ||
2649 | + */ | ||
2650 | + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); | ||
2651 | +} | ||
2652 | + | ||
2653 | +void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, | ||
2654 | + unsigned long npages) | ||
2655 | +{ | ||
2656 | + /* | ||
2657 | + * This can be invoked in early boot while running identity mapped, so | ||
2658 | + * use an open coded check for SNP instead of using cc_platform_has(). | ||
2659 | + * This eliminates worries about jump tables or checking boot_cpu_data | ||
2660 | + * in the cc_platform_has() function. | ||
2661 | + */ | ||
2662 | + if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
2663 | + return; | ||
2664 | + | ||
2665 | + /* Ask hypervisor to mark the memory pages shared in the RMP table. */ | ||
2666 | + early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); | ||
2667 | +} | ||
2668 | + | ||
2669 | +/* Writes to the SVSM CAA MSR are ignored */ | ||
2670 | +static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) | ||
2671 | +{ | ||
2672 | + if (write) | ||
2673 | + return ES_OK; | ||
2674 | + | ||
2675 | + regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa)); | ||
2676 | + regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa)); | ||
2677 | + | ||
2678 | + return ES_OK; | ||
2679 | +} | ||
2680 | + | ||
2681 | +/* | ||
2682 | + * TSC related accesses should not exit to the hypervisor when a guest is | ||
2683 | + * executing with Secure TSC enabled, so special handling is required for | ||
2684 | + * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. | ||
2685 | + */ | ||
2686 | +static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) | ||
2687 | +{ | ||
2688 | + u64 tsc; | ||
2689 | + | ||
2690 | + /* | ||
2691 | + * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. | ||
2692 | + * Terminate the SNP guest when the interception is enabled. | ||
2693 | + */ | ||
2694 | + if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) | ||
2695 | + return ES_VMM_ERROR; | ||
2696 | + | ||
2697 | + /* | ||
2698 | + * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC | ||
2699 | + * to return undefined values, so ignore all writes. | ||
2700 | + * | ||
2701 | + * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use | ||
2702 | + * the value returned by rdtsc_ordered(). | ||
2703 | + */ | ||
2704 | + if (write) { | ||
2705 | + WARN_ONCE(1, "TSC MSR writes are verboten!\n"); | ||
2706 | + return ES_OK; | ||
2707 | + } | ||
2708 | + | ||
2709 | + tsc = rdtsc_ordered(); | ||
2710 | + regs->ax = lower_32_bits(tsc); | ||
2711 | + regs->dx = upper_32_bits(tsc); | ||
2712 | + | ||
2713 | + return ES_OK; | ||
2714 | +} | ||
2715 | + | ||
2716 | +static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
2717 | +{ | ||
2718 | + struct pt_regs *regs = ctxt->regs; | ||
2719 | + enum es_result ret; | ||
2720 | + bool write; | ||
2721 | + | ||
2722 | + /* Is it a WRMSR? */ | ||
2723 | + write = ctxt->insn.opcode.bytes[1] == 0x30; | ||
2724 | + | ||
2725 | + switch (regs->cx) { | ||
2726 | + case MSR_SVSM_CAA: | ||
2727 | + return __vc_handle_msr_caa(regs, write); | ||
2728 | + case MSR_IA32_TSC: | ||
2729 | + case MSR_AMD64_GUEST_TSC_FREQ: | ||
2730 | + if (sev_status & MSR_AMD64_SNP_SECURE_TSC) | ||
2731 | + return __vc_handle_secure_tsc_msrs(regs, write); | ||
2732 | + break; | ||
2733 | + default: | ||
2734 | + break; | ||
2735 | + } | ||
2736 | + | ||
2737 | + ghcb_set_rcx(ghcb, regs->cx); | ||
2738 | + if (write) { | ||
2739 | + ghcb_set_rax(ghcb, regs->ax); | ||
2740 | + ghcb_set_rdx(ghcb, regs->dx); | ||
2741 | + } | ||
2742 | + | ||
2743 | + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0); | ||
2744 | + | ||
2745 | + if ((ret == ES_OK) && !write) { | ||
2746 | + regs->ax = ghcb->save.rax; | ||
2747 | + regs->dx = ghcb->save.rdx; | ||
2748 | + } | ||
2749 | + | ||
2750 | + return ret; | ||
2751 | +} | ||
2752 | + | ||
2753 | +static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) | ||
2754 | +{ | ||
2755 | + int trapnr = ctxt->fi.vector; | ||
2756 | + | ||
2757 | + if (trapnr == X86_TRAP_PF) | ||
2758 | + native_write_cr2(ctxt->fi.cr2); | ||
2759 | + | ||
2760 | + ctxt->regs->orig_ax = ctxt->fi.error_code; | ||
2761 | + do_early_exception(ctxt->regs, trapnr); | ||
2762 | +} | ||
2763 | + | ||
2764 | +static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) | ||
2765 | +{ | ||
2766 | + long *reg_array; | ||
2767 | + int offset; | ||
2768 | + | ||
2769 | + reg_array = (long *)ctxt->regs; | ||
2770 | + offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); | ||
2771 | + | ||
2772 | + if (offset < 0) | ||
2773 | + return NULL; | ||
2774 | + | ||
2775 | + offset /= sizeof(long); | ||
2776 | + | ||
2777 | + return reg_array + offset; | ||
2778 | +} | ||
2779 | +static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, | ||
2780 | + unsigned int bytes, bool read) | ||
2781 | +{ | ||
2782 | + u64 exit_code, exit_info_1, exit_info_2; | ||
2783 | + unsigned long ghcb_pa = __pa(ghcb); | ||
2784 | + enum es_result res; | ||
2785 | + phys_addr_t paddr; | ||
2786 | + void __user *ref; | ||
2787 | + | ||
2788 | + ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); | ||
2789 | + if (ref == (void __user *)-1L) | ||
2790 | + return ES_UNSUPPORTED; | ||
2791 | + | ||
2792 | + exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; | ||
2793 | + | ||
2794 | + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); | ||
2795 | + if (res != ES_OK) { | ||
2796 | + if (res == ES_EXCEPTION && !read) | ||
2797 | + ctxt->fi.error_code |= X86_PF_WRITE; | ||
2798 | + | ||
2799 | + return res; | ||
2800 | + } | ||
2801 | + | ||
2802 | + exit_info_1 = paddr; | ||
2803 | + /* Can never be greater than 8 */ | ||
2804 | + exit_info_2 = bytes; | ||
2805 | + | ||
2806 | + ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); | ||
2807 | + | ||
2808 | + return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); | ||
2809 | +} | ||
2810 | + | ||
2811 | +/* | ||
2812 | + * The MOVS instruction has two memory operands, which raises the | ||
2813 | + * problem that it is not known whether the access to the source or the | ||
2814 | + * destination caused the #VC exception (and hence whether an MMIO read | ||
2815 | + * or write operation needs to be emulated). | ||
2816 | + * | ||
2817 | + * Instead of playing games with walking page-tables and trying to guess | ||
2818 | + * whether the source or destination is an MMIO range, split the move | ||
2819 | + * into two operations, a read and a write with only one memory operand. | ||
2820 | + * This will cause a nested #VC exception on the MMIO address which can | ||
2821 | + * then be handled. | ||
2822 | + * | ||
2823 | + * This implementation has the benefit that it also supports MOVS where | ||
2824 | + * source _and_ destination are MMIO regions. | ||
2825 | + * | ||
2826 | + * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a | ||
2827 | + * rare operation. If it turns out to be a performance problem the split | ||
2828 | + * operations can be moved to memcpy_fromio() and memcpy_toio(). | ||
2829 | + */ | ||
2830 | +static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, | ||
2831 | + unsigned int bytes) | ||
2832 | +{ | ||
2833 | + unsigned long ds_base, es_base; | ||
2834 | + unsigned char *src, *dst; | ||
2835 | + unsigned char buffer[8]; | ||
2836 | + enum es_result ret; | ||
2837 | + bool rep; | ||
2838 | + int off; | ||
2839 | + | ||
2840 | + ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); | ||
2841 | + es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); | ||
2842 | + | ||
2843 | + if (ds_base == -1L || es_base == -1L) { | ||
2844 | + ctxt->fi.vector = X86_TRAP_GP; | ||
2845 | + ctxt->fi.error_code = 0; | ||
2846 | + return ES_EXCEPTION; | ||
2847 | + } | ||
2848 | + | ||
2849 | + src = ds_base + (unsigned char *)ctxt->regs->si; | ||
2850 | + dst = es_base + (unsigned char *)ctxt->regs->di; | ||
2851 | + | ||
2852 | + ret = vc_read_mem(ctxt, src, buffer, bytes); | ||
2853 | + if (ret != ES_OK) | ||
2854 | + return ret; | ||
2855 | + | ||
2856 | + ret = vc_write_mem(ctxt, dst, buffer, bytes); | ||
2857 | + if (ret != ES_OK) | ||
2858 | + return ret; | ||
2859 | + | ||
2860 | + if (ctxt->regs->flags & X86_EFLAGS_DF) | ||
2861 | + off = -bytes; | ||
2862 | + else | ||
2863 | + off = bytes; | ||
2864 | + | ||
2865 | + ctxt->regs->si += off; | ||
2866 | + ctxt->regs->di += off; | ||
2867 | + | ||
2868 | + rep = insn_has_rep_prefix(&ctxt->insn); | ||
2869 | + if (rep) | ||
2870 | + ctxt->regs->cx -= 1; | ||
2871 | + | ||
2872 | + if (!rep || ctxt->regs->cx == 0) | ||
2873 | + return ES_OK; | ||
2874 | + else | ||
2875 | + return ES_RETRY; | ||
2876 | +} | ||
2877 | + | ||
2878 | +static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
2879 | +{ | ||
2880 | + struct insn *insn = &ctxt->insn; | ||
2881 | + enum insn_mmio_type mmio; | ||
2882 | + unsigned int bytes = 0; | ||
2883 | + enum es_result ret; | ||
2884 | + u8 sign_byte; | ||
2885 | + long *reg_data; | ||
2886 | + | ||
2887 | + mmio = insn_decode_mmio(insn, &bytes); | ||
2888 | + if (mmio == INSN_MMIO_DECODE_FAILED) | ||
2889 | + return ES_DECODE_FAILED; | ||
2890 | + | ||
2891 | + if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { | ||
2892 | + reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); | ||
2893 | + if (!reg_data) | ||
2894 | + return ES_DECODE_FAILED; | ||
2895 | + } | ||
2896 | + | ||
2897 | + if (user_mode(ctxt->regs)) | ||
2898 | + return ES_UNSUPPORTED; | ||
2899 | + | ||
2900 | + switch (mmio) { | ||
2901 | + case INSN_MMIO_WRITE: | ||
2902 | + memcpy(ghcb->shared_buffer, reg_data, bytes); | ||
2903 | + ret = vc_do_mmio(ghcb, ctxt, bytes, false); | ||
2904 | + break; | ||
2905 | + case INSN_MMIO_WRITE_IMM: | ||
2906 | + memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); | ||
2907 | + ret = vc_do_mmio(ghcb, ctxt, bytes, false); | ||
2908 | + break; | ||
2909 | + case INSN_MMIO_READ: | ||
2910 | + ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
2911 | + if (ret) | ||
2912 | + break; | ||
2913 | + | ||
2914 | + /* Zero-extend for 32-bit operation */ | ||
2915 | + if (bytes == 4) | ||
2916 | + *reg_data = 0; | ||
2917 | + | ||
2918 | + memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
2919 | + break; | ||
2920 | + case INSN_MMIO_READ_ZERO_EXTEND: | ||
2921 | + ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
2922 | + if (ret) | ||
2923 | + break; | ||
2924 | + | ||
2925 | + /* Zero extend based on operand size */ | ||
2926 | + memset(reg_data, 0, insn->opnd_bytes); | ||
2927 | + memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
2928 | + break; | ||
2929 | + case INSN_MMIO_READ_SIGN_EXTEND: | ||
2930 | + ret = vc_do_mmio(ghcb, ctxt, bytes, true); | ||
2931 | + if (ret) | ||
2932 | + break; | ||
2933 | + | ||
2934 | + if (bytes == 1) { | ||
2935 | + u8 *val = (u8 *)ghcb->shared_buffer; | ||
2936 | + | ||
2937 | + sign_byte = (*val & 0x80) ? 0xff : 0x00; | ||
2938 | + } else { | ||
2939 | + u16 *val = (u16 *)ghcb->shared_buffer; | ||
2940 | + | ||
2941 | + sign_byte = (*val & 0x8000) ? 0xff : 0x00; | ||
2942 | + } | ||
2943 | + | ||
2944 | + /* Sign extend based on operand size */ | ||
2945 | + memset(reg_data, sign_byte, insn->opnd_bytes); | ||
2946 | + memcpy(reg_data, ghcb->shared_buffer, bytes); | ||
2947 | + break; | ||
2948 | + case INSN_MMIO_MOVS: | ||
2949 | + ret = vc_handle_mmio_movs(ctxt, bytes); | ||
2950 | + break; | ||
2951 | + default: | ||
2952 | + ret = ES_UNSUPPORTED; | ||
2953 | + break; | ||
2954 | + } | ||
2955 | + | ||
2956 | + return ret; | ||
2957 | +} | ||
2958 | + | ||
2959 | +static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, | ||
2960 | + struct es_em_ctxt *ctxt) | ||
2961 | +{ | ||
2962 | + struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | ||
2963 | + long val, *reg = vc_insn_get_rm(ctxt); | ||
2964 | + enum es_result ret; | ||
2965 | + | ||
2966 | + if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) | ||
2967 | + return ES_VMM_ERROR; | ||
2968 | + | ||
2969 | + if (!reg) | ||
2970 | + return ES_DECODE_FAILED; | ||
2971 | + | ||
2972 | + val = *reg; | ||
2973 | + | ||
2974 | + /* Upper 32 bits must be written as zeroes */ | ||
2975 | + if (val >> 32) { | ||
2976 | + ctxt->fi.vector = X86_TRAP_GP; | ||
2977 | + ctxt->fi.error_code = 0; | ||
2978 | + return ES_EXCEPTION; | ||
2979 | + } | ||
2980 | + | ||
2981 | + /* Clear out other reserved bits and set bit 10 */ | ||
2982 | + val = (val & 0xffff23ffL) | BIT(10); | ||
2983 | + | ||
2984 | + /* Early non-zero writes to DR7 are not supported */ | ||
2985 | + if (!data && (val & ~DR7_RESET_VALUE)) | ||
2986 | + return ES_UNSUPPORTED; | ||
2987 | + | ||
2988 | + /* Using a value of 0 for ExitInfo1 means RAX holds the value */ | ||
2989 | + ghcb_set_rax(ghcb, val); | ||
2990 | + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); | ||
2991 | + if (ret != ES_OK) | ||
2992 | + return ret; | ||
2993 | + | ||
2994 | + if (data) | ||
2995 | + data->dr7 = val; | ||
2996 | + | ||
2997 | + return ES_OK; | ||
2998 | +} | ||
2999 | + | ||
3000 | +static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, | ||
3001 | + struct es_em_ctxt *ctxt) | ||
3002 | +{ | ||
3003 | + struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | ||
3004 | + long *reg = vc_insn_get_rm(ctxt); | ||
3005 | + | ||
3006 | + if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) | ||
3007 | + return ES_VMM_ERROR; | ||
3008 | + | ||
3009 | + if (!reg) | ||
3010 | + return ES_DECODE_FAILED; | ||
3011 | + | ||
3012 | + if (data) | ||
3013 | + *reg = data->dr7; | ||
3014 | + else | ||
3015 | + *reg = DR7_RESET_VALUE; | ||
3016 | + | ||
3017 | + return ES_OK; | ||
3018 | +} | ||
3019 | + | ||
3020 | +static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, | ||
3021 | + struct es_em_ctxt *ctxt) | ||
3022 | +{ | ||
3023 | + return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); | ||
3024 | +} | ||
3025 | + | ||
3026 | +static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) | ||
3027 | +{ | ||
3028 | + enum es_result ret; | ||
3029 | + | ||
3030 | + ghcb_set_rcx(ghcb, ctxt->regs->cx); | ||
3031 | + | ||
3032 | + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); | ||
3033 | + if (ret != ES_OK) | ||
3034 | + return ret; | ||
3035 | + | ||
3036 | + if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) | ||
3037 | + return ES_VMM_ERROR; | ||
3038 | + | ||
3039 | + ctxt->regs->ax = ghcb->save.rax; | ||
3040 | + ctxt->regs->dx = ghcb->save.rdx; | ||
3041 | + | ||
3042 | + return ES_OK; | ||
3043 | +} | ||
3044 | + | ||
3045 | +static enum es_result vc_handle_monitor(struct ghcb *ghcb, | ||
3046 | + struct es_em_ctxt *ctxt) | ||
3047 | +{ | ||
3048 | + /* | ||
3049 | + * Treat it as a NOP and do not leak a physical address to the | ||
3050 | + * hypervisor. | ||
3051 | + */ | ||
3052 | + return ES_OK; | ||
3053 | +} | ||
3054 | + | ||
3055 | +static enum es_result vc_handle_mwait(struct ghcb *ghcb, | ||
3056 | + struct es_em_ctxt *ctxt) | ||
3057 | +{ | ||
3058 | + /* Treat the same as MONITOR/MONITORX */ | ||
3059 | + return ES_OK; | ||
3060 | +} | ||
3061 | + | ||
3062 | +static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, | ||
3063 | + struct es_em_ctxt *ctxt) | ||
3064 | +{ | ||
3065 | + enum es_result ret; | ||
3066 | + | ||
3067 | + ghcb_set_rax(ghcb, ctxt->regs->ax); | ||
3068 | + ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); | ||
3069 | + | ||
3070 | + if (x86_platform.hyper.sev_es_hcall_prepare) | ||
3071 | + x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); | ||
3072 | + | ||
3073 | + ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); | ||
3074 | + if (ret != ES_OK) | ||
3075 | + return ret; | ||
3076 | + | ||
3077 | + if (!ghcb_rax_is_valid(ghcb)) | ||
3078 | + return ES_VMM_ERROR; | ||
3079 | + | ||
3080 | + ctxt->regs->ax = ghcb->save.rax; | ||
3081 | + | ||
3082 | + /* | ||
3083 | + * Call sev_es_hcall_finish() after regs->ax is already set. | ||
3084 | + * This allows the hypervisor handler to overwrite it again if | ||
3085 | + * necessary. | ||
3086 | + */ | ||
3087 | + if (x86_platform.hyper.sev_es_hcall_finish && | ||
3088 | + !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) | ||
3089 | + return ES_VMM_ERROR; | ||
3090 | + | ||
3091 | + return ES_OK; | ||
3092 | +} | ||
3093 | + | ||
3094 | +static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, | ||
3095 | + struct es_em_ctxt *ctxt) | ||
3096 | +{ | ||
3097 | + /* | ||
3098 | + * Calling ecx_alignment_check() directly does not work, because it | ||
3099 | + * enables IRQs and the GHCB is active. Forward the exception and call | ||
3100 | + * it later from vc_forward_exception(). | ||
3101 | + */ | ||
3102 | + ctxt->fi.vector = X86_TRAP_AC; | ||
3103 | + ctxt->fi.error_code = 0; | ||
3104 | + return ES_EXCEPTION; | ||
3105 | +} | ||
3106 | + | ||
3107 | +static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, | ||
3108 | + struct ghcb *ghcb, | ||
3109 | + unsigned long exit_code) | ||
3110 | +{ | ||
3111 | + enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); | ||
3112 | + | ||
3113 | + if (result != ES_OK) | ||
3114 | + return result; | ||
3115 | + | ||
3116 | + switch (exit_code) { | ||
3117 | + case SVM_EXIT_READ_DR7: | ||
3118 | + result = vc_handle_dr7_read(ghcb, ctxt); | ||
3119 | + break; | ||
3120 | + case SVM_EXIT_WRITE_DR7: | ||
3121 | + result = vc_handle_dr7_write(ghcb, ctxt); | ||
3122 | + break; | ||
3123 | + case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: | ||
3124 | + result = vc_handle_trap_ac(ghcb, ctxt); | ||
3125 | + break; | ||
3126 | + case SVM_EXIT_RDTSC: | ||
3127 | + case SVM_EXIT_RDTSCP: | ||
3128 | + result = vc_handle_rdtsc(ghcb, ctxt, exit_code); | ||
3129 | + break; | ||
3130 | + case SVM_EXIT_RDPMC: | ||
3131 | + result = vc_handle_rdpmc(ghcb, ctxt); | ||
3132 | + break; | ||
3133 | + case SVM_EXIT_INVD: | ||
3134 | + pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); | ||
3135 | + result = ES_UNSUPPORTED; | ||
3136 | + break; | ||
3137 | + case SVM_EXIT_CPUID: | ||
3138 | + result = vc_handle_cpuid(ghcb, ctxt); | ||
3139 | + break; | ||
3140 | + case SVM_EXIT_IOIO: | ||
3141 | + result = vc_handle_ioio(ghcb, ctxt); | ||
3142 | + break; | ||
3143 | + case SVM_EXIT_MSR: | ||
3144 | + result = vc_handle_msr(ghcb, ctxt); | ||
3145 | + break; | ||
3146 | + case SVM_EXIT_VMMCALL: | ||
3147 | + result = vc_handle_vmmcall(ghcb, ctxt); | ||
3148 | + break; | ||
3149 | + case SVM_EXIT_WBINVD: | ||
3150 | + result = vc_handle_wbinvd(ghcb, ctxt); | ||
3151 | + break; | ||
3152 | + case SVM_EXIT_MONITOR: | ||
3153 | + result = vc_handle_monitor(ghcb, ctxt); | ||
3154 | + break; | ||
3155 | + case SVM_EXIT_MWAIT: | ||
3156 | + result = vc_handle_mwait(ghcb, ctxt); | ||
3157 | + break; | ||
3158 | + case SVM_EXIT_NPF: | ||
3159 | + result = vc_handle_mmio(ghcb, ctxt); | ||
3160 | + break; | ||
3161 | + default: | ||
3162 | + /* | ||
3163 | + * Unexpected #VC exception | ||
3164 | + */ | ||
3165 | + result = ES_UNSUPPORTED; | ||
3166 | + } | ||
3167 | + | ||
3168 | + return result; | ||
3169 | +} | ||
3170 | + | ||
3171 | +static __always_inline bool is_vc2_stack(unsigned long sp) | ||
3172 | +{ | ||
3173 | + return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); | ||
3174 | +} | ||
3175 | + | ||
3176 | +static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) | ||
3177 | +{ | ||
3178 | + unsigned long sp, prev_sp; | ||
3179 | + | ||
3180 | + sp = (unsigned long)regs; | ||
3181 | + prev_sp = regs->sp; | ||
3182 | + | ||
3183 | + /* | ||
3184 | + * If the code was already executing on the VC2 stack when the #VC | ||
3185 | + * happened, let it proceed to the normal handling routine. This way the | ||
3186 | + * code executing on the VC2 stack can cause #VC exceptions to get handled. | ||
3187 | + */ | ||
3188 | + return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); | ||
3189 | +} | ||
3190 | + | ||
3191 | +static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) | ||
3192 | +{ | ||
3193 | + struct ghcb_state state; | ||
3194 | + struct es_em_ctxt ctxt; | ||
3195 | + enum es_result result; | ||
3196 | + struct ghcb *ghcb; | ||
3197 | + bool ret = true; | ||
3198 | + | ||
3199 | + ghcb = __sev_get_ghcb(&state); | ||
3200 | + | ||
3201 | + vc_ghcb_invalidate(ghcb); | ||
3202 | + result = vc_init_em_ctxt(&ctxt, regs, error_code); | ||
3203 | + | ||
3204 | + if (result == ES_OK) | ||
3205 | + result = vc_handle_exitcode(&ctxt, ghcb, error_code); | ||
3206 | + | ||
3207 | + __sev_put_ghcb(&state); | ||
3208 | + | ||
3209 | + /* Done - now check the result */ | ||
3210 | + switch (result) { | ||
3211 | + case ES_OK: | ||
3212 | + vc_finish_insn(&ctxt); | ||
3213 | + break; | ||
3214 | + case ES_UNSUPPORTED: | ||
3215 | + pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", | ||
3216 | + error_code, regs->ip); | ||
3217 | + ret = false; | ||
3218 | + break; | ||
3219 | + case ES_VMM_ERROR: | ||
3220 | + pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | ||
3221 | + error_code, regs->ip); | ||
3222 | + ret = false; | ||
3223 | + break; | ||
3224 | + case ES_DECODE_FAILED: | ||
3225 | + pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | ||
3226 | + error_code, regs->ip); | ||
3227 | + ret = false; | ||
3228 | + break; | ||
3229 | + case ES_EXCEPTION: | ||
3230 | + vc_forward_exception(&ctxt); | ||
3231 | + break; | ||
3232 | + case ES_RETRY: | ||
3233 | + /* Nothing to do */ | ||
3234 | + break; | ||
3235 | + default: | ||
3236 | + pr_emerg("Unknown result in %s():%d\n", __func__, result); | ||
3237 | + /* | ||
3238 | + * Emulating the instruction which caused the #VC exception | ||
3239 | + * failed - can't continue so print debug information | ||
3240 | + */ | ||
3241 | + BUG(); | ||
3242 | + } | ||
3243 | + | ||
3244 | + return ret; | ||
3245 | +} | ||
3246 | + | ||
3247 | +static __always_inline bool vc_is_db(unsigned long error_code) | ||
3248 | +{ | ||
3249 | + return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; | ||
3250 | +} | ||
3251 | + | ||
3252 | +/* | ||
3253 | + * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode | ||
3254 | + * and will panic when an error happens. | ||
3255 | + */ | ||
3256 | +DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) | ||
3257 | +{ | ||
3258 | + irqentry_state_t irq_state; | ||
3259 | + | ||
3260 | + /* | ||
3261 | + * With the current implementation it is always possible to switch to a | ||
3262 | + * safe stack because #VC exceptions only happen at known places, like | ||
3263 | + * intercepted instructions or accesses to MMIO areas/IO ports. They can | ||
3264 | + * also happen with code instrumentation when the hypervisor intercepts | ||
3265 | + * #DB, but the critical paths are forbidden to be instrumented, so #DB | ||
3266 | + * exceptions currently also only happen in safe places. | ||
3267 | + * | ||
3268 | + * But keep this here in case the noinstr annotations are violated due | ||
3269 | + * to bug elsewhere. | ||
3270 | + */ | ||
3271 | + if (unlikely(vc_from_invalid_context(regs))) { | ||
3272 | + instrumentation_begin(); | ||
3273 | + panic("Can't handle #VC exception from unsupported context\n"); | ||
3274 | + instrumentation_end(); | ||
3275 | + } | ||
3276 | + | ||
3277 | + /* | ||
3278 | + * Handle #DB before calling into !noinstr code to avoid recursive #DB. | ||
3279 | + */ | ||
3280 | + if (vc_is_db(error_code)) { | ||
3281 | + exc_debug(regs); | ||
3282 | + return; | ||
3283 | + } | ||
3284 | + | ||
3285 | + irq_state = irqentry_nmi_enter(regs); | ||
3286 | + | ||
3287 | + instrumentation_begin(); | ||
3288 | + | ||
3289 | + if (!vc_raw_handle_exception(regs, error_code)) { | ||
3290 | + /* Show some debug info */ | ||
3291 | + show_regs(regs); | ||
3292 | + | ||
3293 | + /* Ask hypervisor to sev_es_terminate */ | ||
3294 | + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); | ||
3295 | + | ||
3296 | + /* If that fails and we get here - just panic */ | ||
3297 | + panic("Returned from Terminate-Request to Hypervisor\n"); | ||
3298 | + } | ||
3299 | + | ||
3300 | + instrumentation_end(); | ||
3301 | + irqentry_nmi_exit(regs, irq_state); | ||
3302 | +} | ||
3303 | + | ||
3304 | +/* | ||
3305 | + * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode | ||
3306 | + * and will kill the current task with SIGBUS when an error happens. | ||
3307 | + */ | ||
3308 | +DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) | ||
3309 | +{ | ||
3310 | + /* | ||
3311 | + * Handle #DB before calling into !noinstr code to avoid recursive #DB. | ||
3312 | + */ | ||
3313 | + if (vc_is_db(error_code)) { | ||
3314 | + noist_exc_debug(regs); | ||
3315 | + return; | ||
3316 | + } | ||
3317 | + | ||
3318 | + irqentry_enter_from_user_mode(regs); | ||
3319 | + instrumentation_begin(); | ||
3320 | + | ||
3321 | + if (!vc_raw_handle_exception(regs, error_code)) { | ||
3322 | + /* | ||
3323 | + * Do not kill the machine if user-space triggered the | ||
3324 | + * exception. Send SIGBUS instead and let user-space deal with | ||
3325 | + * it. | ||
3326 | + */ | ||
3327 | + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); | ||
3328 | + } | ||
3329 | + | ||
3330 | + instrumentation_end(); | ||
3331 | + irqentry_exit_to_user_mode(regs); | ||
3332 | +} | ||
3333 | + | ||
3334 | +bool __init handle_vc_boot_ghcb(struct pt_regs *regs) | ||
3335 | +{ | ||
3336 | + unsigned long exit_code = regs->orig_ax; | ||
3337 | + struct es_em_ctxt ctxt; | ||
3338 | + enum es_result result; | ||
3339 | + | ||
3340 | + vc_ghcb_invalidate(boot_ghcb); | ||
3341 | + | ||
3342 | + result = vc_init_em_ctxt(&ctxt, regs, exit_code); | ||
3343 | + if (result == ES_OK) | ||
3344 | + result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); | ||
3345 | + | ||
3346 | + /* Done - now check the result */ | ||
3347 | + switch (result) { | ||
3348 | + case ES_OK: | ||
3349 | + vc_finish_insn(&ctxt); | ||
3350 | + break; | ||
3351 | + case ES_UNSUPPORTED: | ||
3352 | + early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | ||
3353 | + exit_code, regs->ip); | ||
3354 | + goto fail; | ||
3355 | + case ES_VMM_ERROR: | ||
3356 | + early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | ||
3357 | + exit_code, regs->ip); | ||
3358 | + goto fail; | ||
3359 | + case ES_DECODE_FAILED: | ||
3360 | + early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | ||
3361 | + exit_code, regs->ip); | ||
3362 | + goto fail; | ||
3363 | + case ES_EXCEPTION: | ||
3364 | + vc_early_forward_exception(&ctxt); | ||
3365 | + break; | ||
3366 | + case ES_RETRY: | ||
3367 | + /* Nothing to do */ | ||
3368 | + break; | ||
3369 | + default: | ||
3370 | + BUG(); | ||
3371 | + } | ||
3372 | + | ||
3373 | + return true; | ||
3374 | + | ||
3375 | +fail: | ||
3376 | + show_regs(regs); | ||
3377 | + | ||
3378 | + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); | ||
3379 | +} | ||
3380 | + | ||
3381 | +/* | ||
3382 | + * Initial set up of SNP relies on information provided by the | ||
3383 | + * Confidential Computing blob, which can be passed to the kernel | ||
3384 | + * in the following ways, depending on how it is booted: | ||
3385 | + * | ||
3386 | + * - when booted via the boot/decompress kernel: | ||
3387 | + * - via boot_params | ||
3388 | + * | ||
3389 | + * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): | ||
3390 | + * - via a setup_data entry, as defined by the Linux Boot Protocol | ||
3391 | + * | ||
3392 | + * Scan for the blob in that order. | ||
3393 | + */ | ||
3394 | +static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) | ||
3395 | +{ | ||
3396 | + struct cc_blob_sev_info *cc_info; | ||
3397 | + | ||
3398 | + /* Boot kernel would have passed the CC blob via boot_params. */ | ||
3399 | + if (bp->cc_blob_address) { | ||
3400 | + cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; | ||
3401 | + goto found_cc_info; | ||
3402 | + } | ||
3403 | + | ||
3404 | + /* | ||
3405 | + * If kernel was booted directly, without the use of the | ||
3406 | + * boot/decompression kernel, the CC blob may have been passed via | ||
3407 | + * setup_data instead. | ||
3408 | + */ | ||
3409 | + cc_info = find_cc_blob_setup_data(bp); | ||
3410 | + if (!cc_info) | ||
3411 | + return NULL; | ||
3412 | + | ||
3413 | +found_cc_info: | ||
3414 | + if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) | ||
3415 | + snp_abort(); | ||
3416 | + | ||
3417 | + return cc_info; | ||
3418 | +} | ||
3419 | + | ||
3420 | +static __head void svsm_setup(struct cc_blob_sev_info *cc_info) | ||
3421 | +{ | ||
3422 | + struct svsm_call call = {}; | ||
3423 | + int ret; | ||
3424 | + u64 pa; | ||
3425 | + | ||
3426 | + /* | ||
3427 | + * Record the SVSM Calling Area address (CAA) if the guest is not | ||
3428 | + * running at VMPL0. The CA will be used to communicate with the | ||
3429 | + * SVSM to perform the SVSM services. | ||
3430 | + */ | ||
3431 | + if (!svsm_setup_ca(cc_info)) | ||
3432 | + return; | ||
3433 | + | ||
3434 | + /* | ||
3435 | + * It is very early in the boot and the kernel is running identity | ||
3436 | + * mapped but without having adjusted the pagetables to where the | ||
3437 | + * kernel was loaded (physbase), so the get the CA address using | ||
3438 | + * RIP-relative addressing. | ||
3439 | + */ | ||
3440 | + pa = (u64)rip_rel_ptr(&boot_svsm_ca_page); | ||
3441 | + | ||
3442 | + /* | ||
3443 | + * Switch over to the boot SVSM CA while the current CA is still | ||
3444 | + * addressable. There is no GHCB at this point so use the MSR protocol. | ||
3445 | + * | ||
3446 | + * SVSM_CORE_REMAP_CA call: | ||
3447 | + * RAX = 0 (Protocol=0, CallID=0) | ||
3448 | + * RCX = New CA GPA | ||
3449 | + */ | ||
3450 | + call.caa = svsm_get_caa(); | ||
3451 | + call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA); | ||
3452 | + call.rcx = pa; | ||
3453 | + ret = svsm_perform_call_protocol(&call); | ||
3454 | + if (ret) | ||
3455 | + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); | ||
3456 | + | ||
3457 | + RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; | ||
3458 | + RIP_REL_REF(boot_svsm_caa_pa) = pa; | ||
3459 | +} | ||
3460 | + | ||
3461 | +bool __head snp_init(struct boot_params *bp) | ||
3462 | +{ | ||
3463 | + struct cc_blob_sev_info *cc_info; | ||
3464 | + | ||
3465 | + if (!bp) | ||
3466 | + return false; | ||
3467 | + | ||
3468 | + cc_info = find_cc_blob(bp); | ||
3469 | + if (!cc_info) | ||
3470 | + return false; | ||
3471 | + | ||
3472 | + if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE) | ||
3473 | + secrets_pa = cc_info->secrets_phys; | ||
3474 | + else | ||
3475 | + return false; | ||
3476 | + | ||
3477 | + setup_cpuid_table(cc_info); | ||
3478 | + | ||
3479 | + svsm_setup(cc_info); | ||
3480 | + | ||
3481 | + /* | ||
3482 | + * The CC blob will be used later to access the secrets page. Cache | ||
3483 | + * it here like the boot kernel does. | ||
3484 | + */ | ||
3485 | + bp->cc_blob_address = (u32)(unsigned long)cc_info; | ||
3486 | + | ||
3487 | + return true; | ||
3488 | +} | ||
3489 | + | ||
3490 | +void __head __noreturn snp_abort(void) | ||
3491 | +{ | ||
3492 | + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); | ||
3493 | +} | ||
66 | -- | 3494 | -- |
67 | 2.49.0.472.ge94155a9ec-goog | 3495 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Linus expressed a strong preference for arch-specific asm code (i.e., | 3 | Move the SEV startup code into arch/x86/boot/startup/, where it will |
4 | virtually all of it) to reside under arch/ rather than anywhere else. | 4 | reside along with other code that executes extremely early, and |
5 | 5 | therefore needs to be built in a special manner. | |
6 | So move the EFI mixed mode startup code back, and put it under | ||
7 | arch/x86/boot/startup/ where all shared x86 startup code is going to | ||
8 | live. | ||
9 | 6 | ||
10 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 7 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
11 | --- | 8 | --- |
12 | arch/x86/boot/startup/Makefile | 3 +++ | 9 | arch/x86/boot/compressed/sev.c | 2 +- |
13 | drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S | 0 | 10 | arch/x86/boot/startup/Makefile | 2 +- |
14 | drivers/firmware/efi/libstub/Makefile | 1 - | 11 | arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} | 0 |
15 | 3 files changed, 3 insertions(+), 1 deletion(-) | 12 | arch/x86/{coco/sev/startup.c => boot/startup/sev-startup.c} | 2 +- |
13 | arch/x86/coco/sev/Makefile | 21 +------------------- | ||
14 | 5 files changed, 4 insertions(+), 23 deletions(-) | ||
16 | 15 | ||
16 | diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/arch/x86/boot/compressed/sev.c | ||
19 | +++ b/arch/x86/boot/compressed/sev.c | ||
20 | @@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call); | ||
21 | u8 snp_vmpl; | ||
22 | |||
23 | /* Include code for early handlers */ | ||
24 | -#include "../../coco/sev/shared.c" | ||
25 | +#include "../../boot/startup/sev-shared.c" | ||
26 | |||
27 | int svsm_perform_call_protocol(struct svsm_call *call) | ||
28 | { | ||
17 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile | 29 | diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile |
18 | index XXXXXXX..XXXXXXX 100644 | 30 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/arch/x86/boot/startup/Makefile | 31 | --- a/arch/x86/boot/startup/Makefile |
20 | +++ b/arch/x86/boot/startup/Makefile | 32 | +++ b/arch/x86/boot/startup/Makefile |
33 | @@ -XXX,XX +XXX,XX @@ UBSAN_SANITIZE := n | ||
34 | KCOV_INSTRUMENT := n | ||
35 | |||
36 | obj-$(CONFIG_X86_64) += gdt_idt.o map_kernel.o | ||
37 | -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o | ||
38 | +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sme.o sev-startup.o | ||
39 | |||
40 | lib-$(CONFIG_X86_64) += la57toggle.o | ||
41 | lib-$(CONFIG_EFI_MIXED) += efi-mixed.o | ||
42 | diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/boot/startup/sev-shared.c | ||
43 | similarity index 100% | ||
44 | rename from arch/x86/coco/sev/shared.c | ||
45 | rename to arch/x86/boot/startup/sev-shared.c | ||
46 | diff --git a/arch/x86/coco/sev/startup.c b/arch/x86/boot/startup/sev-startup.c | ||
47 | similarity index 99% | ||
48 | rename from arch/x86/coco/sev/startup.c | ||
49 | rename to arch/x86/boot/startup/sev-startup.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/arch/x86/coco/sev/startup.c | ||
52 | +++ b/arch/x86/boot/startup/sev-startup.c | ||
53 | @@ -XXX,XX +XXX,XX @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) | ||
54 | } | ||
55 | |||
56 | /* Include code shared with pre-decompression boot stage */ | ||
57 | -#include "shared.c" | ||
58 | +#include "sev-shared.c" | ||
59 | |||
60 | noinstr void __sev_put_ghcb(struct ghcb_state *state) | ||
61 | { | ||
62 | diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile | ||
63 | index XXXXXXX..XXXXXXX 100644 | ||
64 | --- a/arch/x86/coco/sev/Makefile | ||
65 | +++ b/arch/x86/coco/sev/Makefile | ||
21 | @@ -XXX,XX +XXX,XX @@ | 66 | @@ -XXX,XX +XXX,XX @@ |
22 | # SPDX-License-Identifier: GPL-2.0 | 67 | # SPDX-License-Identifier: GPL-2.0 |
23 | 68 | ||
24 | +KBUILD_AFLAGS += -D__DISABLE_EXPORTS | 69 | -obj-y += core.o startup.o |
25 | + | 70 | - |
26 | lib-$(CONFIG_X86_64) += la57toggle.o | 71 | -# jump tables are emitted using absolute references in non-PIC code |
27 | +lib-$(CONFIG_EFI_MIXED) += efi-mixed.o | 72 | -# so they cannot be used in the early SEV startup code |
28 | diff --git a/drivers/firmware/efi/libstub/x86-mixed.S b/arch/x86/boot/startup/efi-mixed.S | 73 | -CFLAGS_startup.o += -fno-jump-tables |
29 | similarity index 100% | 74 | - |
30 | rename from drivers/firmware/efi/libstub/x86-mixed.S | 75 | -ifdef CONFIG_FUNCTION_TRACER |
31 | rename to arch/x86/boot/startup/efi-mixed.S | 76 | -CFLAGS_REMOVE_startup.o = -pg |
32 | diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile | 77 | -endif |
33 | index XXXXXXX..XXXXXXX 100644 | 78 | - |
34 | --- a/drivers/firmware/efi/libstub/Makefile | 79 | -KASAN_SANITIZE_startup.o := n |
35 | +++ b/drivers/firmware/efi/libstub/Makefile | 80 | -KMSAN_SANITIZE_startup.o := n |
36 | @@ -XXX,XX +XXX,XX @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \ | 81 | -KCOV_INSTRUMENT_startup.o := n |
37 | lib-$(CONFIG_ARM) += arm32-stub.o | 82 | - |
38 | lib-$(CONFIG_ARM64) += kaslr.o arm64.o arm64-stub.o smbios.o | 83 | -# With some compiler versions the generated code results in boot hangs, caused |
39 | lib-$(CONFIG_X86) += x86-stub.o smbios.o | 84 | -# by several compilation units. To be safe, disable all instrumentation. |
40 | -lib-$(CONFIG_EFI_MIXED) += x86-mixed.o | 85 | -KCSAN_SANITIZE := n |
41 | lib-$(CONFIG_X86_64) += x86-5lvl.o | 86 | - |
42 | lib-$(CONFIG_RISCV) += kaslr.o riscv.o riscv-stub.o | 87 | -# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining |
43 | lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o | 88 | -UBSAN_SANITIZE := n |
89 | +obj-y += core.o | ||
44 | -- | 90 | -- |
45 | 2.49.0.472.ge94155a9ec-goog | 91 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Ard Biesheuvel <ardb@kernel.org> | ||
1 | 2 | ||
3 | Now that the early SEV code is built with -fPIC, RIP_REL_REF() has no | ||
4 | effect and can be dropped. | ||
5 | |||
6 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | ||
7 | --- | ||
8 | arch/x86/boot/startup/sev-shared.c | 26 +++++++++----------- | ||
9 | arch/x86/boot/startup/sev-startup.c | 16 ++++++------ | ||
10 | arch/x86/include/asm/sev-internal.h | 18 +++----------- | ||
11 | 3 files changed, 23 insertions(+), 37 deletions(-) | ||
12 | |||
13 | diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/arch/x86/boot/startup/sev-shared.c | ||
16 | +++ b/arch/x86/boot/startup/sev-shared.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call) | ||
18 | * Fill in protocol and format specifiers. This can be called very early | ||
19 | * in the boot, so use rip-relative references as needed. | ||
20 | */ | ||
21 | - ghcb->protocol_version = RIP_REL_REF(ghcb_version); | ||
22 | + ghcb->protocol_version = ghcb_version; | ||
23 | ghcb->ghcb_usage = GHCB_DEFAULT_USAGE; | ||
24 | |||
25 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL); | ||
26 | @@ -XXX,XX +XXX,XX @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf) | ||
27 | leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0; | ||
28 | |||
29 | /* Skip post-processing for out-of-range zero leafs. */ | ||
30 | - if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) || | ||
31 | - (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) || | ||
32 | - (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max)))) | ||
33 | + if (!(leaf->fn <= cpuid_std_range_max || | ||
34 | + (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) || | ||
35 | + (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max))) | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | @@ -XXX,XX +XXX,XX @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info) | ||
40 | const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; | ||
41 | |||
42 | if (fn->eax_in == 0x0) | ||
43 | - RIP_REL_REF(cpuid_std_range_max) = fn->eax; | ||
44 | + cpuid_std_range_max = fn->eax; | ||
45 | else if (fn->eax_in == 0x40000000) | ||
46 | - RIP_REL_REF(cpuid_hyp_range_max) = fn->eax; | ||
47 | + cpuid_hyp_range_max = fn->eax; | ||
48 | else if (fn->eax_in == 0x80000000) | ||
49 | - RIP_REL_REF(cpuid_ext_range_max) = fn->eax; | ||
50 | + cpuid_ext_range_max = fn->eax; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr, | ||
55 | { | ||
56 | int ret; | ||
57 | |||
58 | - /* | ||
59 | - * This can be called very early during boot, so use rIP-relative | ||
60 | - * references as needed. | ||
61 | - */ | ||
62 | - if (RIP_REL_REF(snp_vmpl)) { | ||
63 | + if (snp_vmpl) { | ||
64 | svsm_pval_4k_page(paddr, validate); | ||
65 | } else { | ||
66 | ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate); | ||
67 | @@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) | ||
68 | if (!secrets_page->svsm_guest_vmpl) | ||
69 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0); | ||
70 | |||
71 | - RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl; | ||
72 | + snp_vmpl = secrets_page->svsm_guest_vmpl; | ||
73 | |||
74 | caa = secrets_page->svsm_caa; | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info) | ||
77 | * The CA is identity mapped when this routine is called, both by the | ||
78 | * decompressor code and the early kernel code. | ||
79 | */ | ||
80 | - RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa; | ||
81 | - RIP_REL_REF(boot_svsm_caa_pa) = caa; | ||
82 | + boot_svsm_caa = (struct svsm_ca *)caa; | ||
83 | + boot_svsm_caa_pa = caa; | ||
84 | |||
85 | /* Advertise the SVSM presence via CPUID. */ | ||
86 | cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table(); | ||
87 | diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c | ||
88 | index XXXXXXX..XXXXXXX 100644 | ||
89 | --- a/arch/x86/boot/startup/sev-startup.c | ||
90 | +++ b/arch/x86/boot/startup/sev-startup.c | ||
91 | @@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call) | ||
92 | * ghcbs_initialized is set, then it is late in the boot and no need | ||
93 | * to worry about rip-relative references in called functions. | ||
94 | */ | ||
95 | - if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
96 | + if (sev_cfg.ghcbs_initialized) | ||
97 | ghcb = __sev_get_ghcb(&state); | ||
98 | - else if (RIP_REL_REF(boot_ghcb)) | ||
99 | - ghcb = RIP_REL_REF(boot_ghcb); | ||
100 | + else if (boot_ghcb) | ||
101 | + ghcb = boot_ghcb; | ||
102 | else | ||
103 | ghcb = NULL; | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call) | ||
106 | : svsm_perform_msr_protocol(call); | ||
107 | } while (ret == -EAGAIN); | ||
108 | |||
109 | - if (RIP_REL_REF(sev_cfg).ghcbs_initialized) | ||
110 | + if (sev_cfg.ghcbs_initialized) | ||
111 | __sev_put_ghcb(&state); | ||
112 | |||
113 | native_local_irq_restore(flags); | ||
114 | @@ -XXX,XX +XXX,XX @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd | ||
115 | * This eliminates worries about jump tables or checking boot_cpu_data | ||
116 | * in the cc_platform_has() function. | ||
117 | */ | ||
118 | - if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
119 | + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) | ||
120 | return; | ||
121 | |||
122 | /* | ||
123 | @@ -XXX,XX +XXX,XX @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr | ||
124 | * This eliminates worries about jump tables or checking boot_cpu_data | ||
125 | * in the cc_platform_has() function. | ||
126 | */ | ||
127 | - if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) | ||
128 | + if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) | ||
129 | return; | ||
130 | |||
131 | /* Ask hypervisor to mark the memory pages shared in the RMP table. */ | ||
132 | @@ -XXX,XX +XXX,XX @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info) | ||
133 | if (ret) | ||
134 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL); | ||
135 | |||
136 | - RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa; | ||
137 | - RIP_REL_REF(boot_svsm_caa_pa) = pa; | ||
138 | + boot_svsm_caa = (struct svsm_ca *)pa; | ||
139 | + boot_svsm_caa_pa = pa; | ||
140 | } | ||
141 | |||
142 | bool __head snp_init(struct boot_params *bp) | ||
143 | diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h | ||
144 | index XXXXXXX..XXXXXXX 100644 | ||
145 | --- a/arch/x86/include/asm/sev-internal.h | ||
146 | +++ b/arch/x86/include/asm/sev-internal.h | ||
147 | @@ -XXX,XX +XXX,XX @@ extern u64 boot_svsm_caa_pa; | ||
148 | |||
149 | static __always_inline struct svsm_ca *svsm_get_caa(void) | ||
150 | { | ||
151 | - /* | ||
152 | - * Use rIP-relative references when called early in the boot. If | ||
153 | - * ->use_cas is set, then it is late in the boot and no need | ||
154 | - * to worry about rIP-relative references. | ||
155 | - */ | ||
156 | - if (RIP_REL_REF(sev_cfg).use_cas) | ||
157 | + if (sev_cfg.use_cas) | ||
158 | return this_cpu_read(svsm_caa); | ||
159 | else | ||
160 | - return RIP_REL_REF(boot_svsm_caa); | ||
161 | + return boot_svsm_caa; | ||
162 | } | ||
163 | |||
164 | static __always_inline u64 svsm_get_caa_pa(void) | ||
165 | { | ||
166 | - /* | ||
167 | - * Use rIP-relative references when called early in the boot. If | ||
168 | - * ->use_cas is set, then it is late in the boot and no need | ||
169 | - * to worry about rIP-relative references. | ||
170 | - */ | ||
171 | - if (RIP_REL_REF(sev_cfg).use_cas) | ||
172 | + if (sev_cfg.use_cas) | ||
173 | return this_cpu_read(svsm_caa_pa); | ||
174 | else | ||
175 | - return RIP_REL_REF(boot_svsm_caa_pa); | ||
176 | + return boot_svsm_caa_pa; | ||
177 | } | ||
178 | |||
179 | int svsm_perform_call_protocol(struct svsm_call *call); | ||
180 | -- | ||
181 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |
1 | From: Ard Biesheuvel <ardb@kernel.org> | 1 | From: Ard Biesheuvel <ardb@kernel.org> |
---|---|---|---|
2 | 2 | ||
3 | Merge the local include "pgtable.h" -which declares the API of the | 3 | Now that all users have been moved into startup/ where PIC codegen is |
4 | 5-level paging trampoline- into <asm/boot.h> so that its implementation | 4 | used, RIP_REL_REF() is no longer needed. Remove it. |
5 | in la57toggle.S as well as the calling code can be decoupled from the | ||
6 | traditional decompressor. | ||
7 | 5 | ||
8 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> | 6 | Signed-off-by: Ard Biesheuvel <ardb@kernel.org> |
9 | --- | 7 | --- |
10 | arch/x86/boot/compressed/head_64.S | 1 - | 8 | arch/x86/include/asm/asm.h | 5 ----- |
11 | arch/x86/boot/compressed/la57toggle.S | 1 - | 9 | 1 file changed, 5 deletions(-) |
12 | arch/x86/boot/compressed/misc.c | 1 - | ||
13 | arch/x86/boot/compressed/pgtable.h | 18 ------------------ | ||
14 | arch/x86/boot/compressed/pgtable_64.c | 1 - | ||
15 | arch/x86/include/asm/boot.h | 10 ++++++++++ | ||
16 | 6 files changed, 10 insertions(+), 22 deletions(-) | ||
17 | 10 | ||
18 | diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S | 11 | diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h |
19 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/arch/x86/boot/compressed/head_64.S | 13 | --- a/arch/x86/include/asm/asm.h |
21 | +++ b/arch/x86/boot/compressed/head_64.S | 14 | +++ b/arch/x86/include/asm/asm.h |
22 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ static __always_inline __pure void *rip_rel_ptr(void *p) |
23 | #include <asm/bootparam.h> | 16 | |
24 | #include <asm/desc_defs.h> | 17 | return p; |
25 | #include <asm/trapnr.h> | 18 | } |
26 | -#include "pgtable.h" | 19 | -#ifndef __pic__ |
20 | -#define RIP_REL_REF(var) (*(typeof(&(var)))rip_rel_ptr(&(var))) | ||
21 | -#else | ||
22 | -#define RIP_REL_REF(var) (var) | ||
23 | -#endif | ||
24 | #endif | ||
27 | 25 | ||
28 | /* | 26 | /* |
29 | * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result | ||
30 | diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/compressed/la57toggle.S | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/arch/x86/boot/compressed/la57toggle.S | ||
33 | +++ b/arch/x86/boot/compressed/la57toggle.S | ||
34 | @@ -XXX,XX +XXX,XX @@ | ||
35 | #include <asm/boot.h> | ||
36 | #include <asm/msr.h> | ||
37 | #include <asm/processor-flags.h> | ||
38 | -#include "pgtable.h" | ||
39 | |||
40 | /* | ||
41 | * This is the 32-bit trampoline that will be copied over to low memory. It | ||
42 | diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | ||
44 | --- a/arch/x86/boot/compressed/misc.c | ||
45 | +++ b/arch/x86/boot/compressed/misc.c | ||
46 | @@ -XXX,XX +XXX,XX @@ | ||
47 | |||
48 | #include "misc.h" | ||
49 | #include "error.h" | ||
50 | -#include "pgtable.h" | ||
51 | #include "../string.h" | ||
52 | #include "../voffset.h" | ||
53 | #include <asm/bootparam_utils.h> | ||
54 | diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h | ||
55 | deleted file mode 100644 | ||
56 | index XXXXXXX..XXXXXXX | ||
57 | --- a/arch/x86/boot/compressed/pgtable.h | ||
58 | +++ /dev/null | ||
59 | @@ -XXX,XX +XXX,XX @@ | ||
60 | -#ifndef BOOT_COMPRESSED_PAGETABLE_H | ||
61 | -#define BOOT_COMPRESSED_PAGETABLE_H | ||
62 | - | ||
63 | -#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) | ||
64 | - | ||
65 | -#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE | ||
66 | -#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 | ||
67 | - | ||
68 | -#ifndef __ASSEMBLER__ | ||
69 | - | ||
70 | -extern unsigned long *trampoline_32bit; | ||
71 | - | ||
72 | -extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); | ||
73 | - | ||
74 | -extern const u16 trampoline_ljmp_imm_offset; | ||
75 | - | ||
76 | -#endif /* __ASSEMBLER__ */ | ||
77 | -#endif /* BOOT_COMPRESSED_PAGETABLE_H */ | ||
78 | diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/arch/x86/boot/compressed/pgtable_64.c | ||
81 | +++ b/arch/x86/boot/compressed/pgtable_64.c | ||
82 | @@ -XXX,XX +XXX,XX @@ | ||
83 | #include <asm/bootparam_utils.h> | ||
84 | #include <asm/e820/types.h> | ||
85 | #include <asm/processor.h> | ||
86 | -#include "pgtable.h" | ||
87 | #include "../string.h" | ||
88 | #include "efi.h" | ||
89 | |||
90 | diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h | ||
91 | index XXXXXXX..XXXXXXX 100644 | ||
92 | --- a/arch/x86/include/asm/boot.h | ||
93 | +++ b/arch/x86/include/asm/boot.h | ||
94 | @@ -XXX,XX +XXX,XX @@ | ||
95 | # define BOOT_STACK_SIZE 0x1000 | ||
96 | #endif | ||
97 | |||
98 | +#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) | ||
99 | + | ||
100 | +#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE | ||
101 | +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 | ||
102 | + | ||
103 | #ifndef __ASSEMBLER__ | ||
104 | extern unsigned int output_len; | ||
105 | extern const unsigned long kernel_text_size; | ||
106 | @@ -XXX,XX +XXX,XX @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, | ||
107 | void (*error)(char *x)); | ||
108 | |||
109 | extern struct boot_params *boot_params_ptr; | ||
110 | +extern unsigned long *trampoline_32bit; | ||
111 | +extern const u16 trampoline_ljmp_imm_offset; | ||
112 | + | ||
113 | +void trampoline_32bit_src(void *trampoline, bool enable_5lvl); | ||
114 | + | ||
115 | #endif | ||
116 | |||
117 | #endif /* _ASM_X86_BOOT_H */ | ||
118 | -- | 27 | -- |
119 | 2.49.0.472.ge94155a9ec-goog | 28 | 2.49.0.504.g3bcea36a83-goog | diff view generated by jsdifflib |