1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Start refactoring the x86 startup code so we keep all the code that is
3
!! NOTE: patches #7 - #10 depend on [0] !!
4
shared between different boot stages (EFI stub, decompressor, early
5
startup in the core kernel *) and/or needs to be built in a special way
6
(due to the fact that it is C code that runs from the 1:1 mapping of
7
RAM) in a single place, sharing all the C flags and other runes that are
8
needed to disable instrumentation, sanitizers, etc.
9
4
10
This is an RFC so I have left some things for later, e.g., the SEV-SNP
5
Reorganize C code that is used during early boot, either in the
11
init code in arch/x86/coco that is shared between all of the above [*]
6
decompressor/EFI stub or the kernel proper, but before the kernel
12
and will be tricky to disentangle; there are also some known issues in
7
virtual mapping is up.
13
that code related to EFI boot that we are addressing in parallel.
8
9
v4:
10
- drop patches that were queued up
11
- fix address space error in patch #1
12
- add patches for SEV-SNP boot code - these cannot be applied yet, but
13
are included for completeness
14
15
v3:
16
- keep rip_rel_ptr() around in PIC code - sadly, it is still needed in
17
some cases
18
- remove RIP_REL_REF() uses in separate patches
19
- keep __head annotations for now, they will all be removed later
20
- disable objtool validation for library objects (i.e., pieces that are
21
not linked into vmlinux)
22
23
I will follow up with a series that gets rid of .head.text altogether,
24
as it will no longer be needed at all once the startup code is checked
25
for absolute relocations.
14
26
15
Cc: Tom Lendacky <thomas.lendacky@amd.com>
27
Cc: Tom Lendacky <thomas.lendacky@amd.com>
16
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
28
Cc: Dionna Amalie Glaze <dionnaglaze@google.com>
17
Cc: Kevin Loughlin <kevinloughlin@google.com>
29
Cc: Kevin Loughlin <kevinloughlin@google.com>
18
30
19
Ard Biesheuvel (6):
31
[0] https://lore.kernel.org/all/20250410132850.3708703-2-ardb+git@google.com/T/#u
20
x86/boot/compressed: Merge local pgtable.h include into asm/boot.h
32
21
x86/boot: Move 5-level paging trampoline into startup code
33
Ard Biesheuvel (11):
22
x86/boot: Move EFI mixed mode startup code back under arch/x86
34
x86/asm: Make rip_rel_ptr() usable from fPIC code
23
x86/boot: Move early GDT/IDT setup code into startup/
35
x86/boot: Move the early GDT/IDT setup code into startup/
24
x86/boot: Move early kernel mapping code into startup/
36
x86/boot: Move early kernel mapping code into startup/
37
x86/boot: Drop RIP_REL_REF() uses from early mapping code
25
x86/boot: Move early SME init code into startup/
38
x86/boot: Move early SME init code into startup/
39
x86/boot: Drop RIP_REL_REF() uses from SME startup code
40
x86/sev: Prepare for splitting off early SEV code
41
x86/sev: Split off startup code from core code
42
x86/boot: Move SEV startup code into startup/
43
x86/boot: Drop RIP_REL_REF() uses from early SEV code
44
x86/asm: Retire RIP_REL_REF()
26
45
27
arch/x86/Makefile | 1 +
46
arch/x86/boot/compressed/Makefile | 2 +-
28
arch/x86/boot/compressed/Makefile | 4 +-
47
arch/x86/boot/compressed/sev.c | 17 +-
29
arch/x86/boot/compressed/head_64.S | 1 -
48
arch/x86/boot/startup/Makefile | 16 +
30
arch/x86/boot/compressed/misc.c | 1 -
49
arch/x86/boot/startup/gdt_idt.c | 84 +
31
arch/x86/boot/compressed/pgtable.h | 18 --
50
arch/x86/boot/startup/map_kernel.c | 225 +++
32
arch/x86/boot/compressed/pgtable_64.c | 1 -
51
arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} | 375 +----
33
arch/x86/boot/startup/Makefile | 22 ++
52
arch/x86/boot/startup/sev-startup.c | 1395 ++++++++++++++++
34
drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S | 0
53
arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 19 +-
35
arch/x86/boot/startup/gdt_idt.c | 82 ++++++
54
arch/x86/coco/sev/Makefile | 19 -
36
arch/x86/boot/{compressed => startup}/la57toggle.S | 1 -
55
arch/x86/coco/sev/core.c | 1726 ++++----------------
37
arch/x86/boot/startup/map_kernel.c | 232 +++++++++++++++
56
arch/x86/include/asm/asm.h | 5 -
38
arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 45 ++-
57
arch/x86/include/asm/coco.h | 2 +-
39
arch/x86/include/asm/boot.h | 10 +
58
arch/x86/include/asm/mem_encrypt.h | 2 +-
40
arch/x86/include/asm/mem_encrypt.h | 2 +-
59
arch/x86/include/asm/sev-internal.h | 112 ++
41
arch/x86/kernel/head64.c | 302 +-------------------
60
arch/x86/include/asm/sev.h | 37 +
42
arch/x86/mm/Makefile | 6 -
61
arch/x86/kernel/head64.c | 285 +---
43
drivers/firmware/efi/libstub/Makefile | 1 -
62
arch/x86/mm/Makefile | 6 -
44
17 files changed, 372 insertions(+), 357 deletions(-)
63
17 files changed, 2208 insertions(+), 2119 deletions(-)
45
delete mode 100644 arch/x86/boot/compressed/pgtable.h
46
create mode 100644 arch/x86/boot/startup/Makefile
47
rename drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S (100%)
48
create mode 100644 arch/x86/boot/startup/gdt_idt.c
64
create mode 100644 arch/x86/boot/startup/gdt_idt.c
49
rename arch/x86/boot/{compressed => startup}/la57toggle.S (99%)
50
create mode 100644 arch/x86/boot/startup/map_kernel.c
65
create mode 100644 arch/x86/boot/startup/map_kernel.c
51
rename arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} (92%)
66
rename arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} (78%)
67
create mode 100644 arch/x86/boot/startup/sev-startup.c
68
rename arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} (97%)
69
create mode 100644 arch/x86/include/asm/sev-internal.h
52
70
53
--
71
--
54
2.49.0.472.ge94155a9ec-goog
72
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
New patch
1
1
From: Ard Biesheuvel <ardb@kernel.org>
2
3
RIP_REL_REF() is used in non-PIC C code that is called very early,
4
before the kernel virtual mapping is up, which is the mapping that the
5
linker expects. It is currently used in two different ways:
6
- to refer to the value of a global variable, including as an lvalue in
7
assignments;
8
- to take the address of a global variable via the mapping that the code
9
currently executes at.
10
11
The former case is only needed in non-PIC code, as PIC code will never
12
use absolute symbol references when the address of the symbol is not
13
being used. But taking the address of a variable in PIC code may still
14
require extra care, as a stack allocated struct assignment may be
15
emitted as a memcpy() from a statically allocated copy in .rodata.
16
17
For instance, this
18
19
void startup_64_setup_gdt_idt(void)
20
{
21
struct desc_ptr startup_gdt_descr = {
22
.address = (__force unsigned long)gdt_page.gdt,
23
.size = GDT_SIZE - 1,
24
};
25
26
may result in an absolute symbol reference in PIC code, even though the
27
struct is allocated on the stack and populated at runtime.
28
29
To address this case, make rip_rel_ptr() accessible in PIC code, and
30
update any existing uses where the address of a global variable is
31
taken using RIP_REL_REF.
32
33
Once all code of this nature has been moved into arch/x86/boot/startup
34
and built with -fPIC, RIP_REL_REF() can be retired, and only
35
rip_rel_ptr() will remain.
36
37
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
38
---
39
arch/x86/coco/sev/core.c | 2 +-
40
arch/x86/coco/sev/shared.c | 4 ++--
41
arch/x86/include/asm/asm.h | 2 +-
42
arch/x86/kernel/head64.c | 24 ++++++++++----------
43
arch/x86/mm/mem_encrypt_identity.c | 6 ++---
44
5 files changed, 19 insertions(+), 19 deletions(-)
45
46
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/arch/x86/coco/sev/core.c
49
+++ b/arch/x86/coco/sev/core.c
50
@@ -XXX,XX +XXX,XX @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
51
     * kernel was loaded (physbase), so the get the CA address using
52
     * RIP-relative addressing.
53
     */
54
-    pa = (u64)&RIP_REL_REF(boot_svsm_ca_page);
55
+    pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
56
57
    /*
58
     * Switch over to the boot SVSM CA while the current CA is still
59
diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c
60
index XXXXXXX..XXXXXXX 100644
61
--- a/arch/x86/coco/sev/shared.c
62
+++ b/arch/x86/coco/sev/shared.c
63
@@ -XXX,XX +XXX,XX @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
64
*/
65
static const struct snp_cpuid_table *snp_cpuid_get_table(void)
66
{
67
-    return &RIP_REL_REF(cpuid_table_copy);
68
+    return rip_rel_ptr(&cpuid_table_copy);
69
}
70
71
/*
72
@@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
73
     * routine is running identity mapped when called, both by the decompressor
74
     * code and the early kernel code.
75
     */
76
-    if (!rmpadjust((unsigned long)&RIP_REL_REF(boot_ghcb_page), RMP_PG_SIZE_4K, 1))
77
+    if (!rmpadjust((unsigned long)rip_rel_ptr(&boot_ghcb_page), RMP_PG_SIZE_4K, 1))
78
        return false;
79
80
    /*
81
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/arch/x86/include/asm/asm.h
84
+++ b/arch/x86/include/asm/asm.h
85
@@ -XXX,XX +XXX,XX @@
86
#endif
87
88
#ifndef __ASSEMBLER__
89
-#ifndef __pic__
90
static __always_inline __pure void *rip_rel_ptr(void *p)
91
{
92
    asm("leaq %c1(%%rip), %0" : "=r"(p) : "i"(p));
93
94
    return p;
95
}
96
+#ifndef __pic__
97
#define RIP_REL_REF(var)    (*(typeof(&(var)))rip_rel_ptr(&(var)))
98
#else
99
#define RIP_REL_REF(var)    (var)
100
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/arch/x86/kernel/head64.c
103
+++ b/arch/x86/kernel/head64.c
104
@@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
105
     * attribute.
106
     */
107
    if (sme_get_me_mask()) {
108
-        paddr = (unsigned long)&RIP_REL_REF(__start_bss_decrypted);
109
-        paddr_end = (unsigned long)&RIP_REL_REF(__end_bss_decrypted);
110
+        paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
111
+        paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
112
113
        for (; paddr < paddr_end; paddr += PMD_SIZE) {
114
            /*
115
@@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
116
unsigned long __head __startup_64(unsigned long p2v_offset,
117
                 struct boot_params *bp)
118
{
119
-    pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
120
-    unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text);
121
+    pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
122
+    unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
123
    unsigned long va_text, va_end;
124
    unsigned long pgtable_flags;
125
    unsigned long load_delta;
126
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
127
        for (;;);
128
129
    va_text = physaddr - p2v_offset;
130
-    va_end = (unsigned long)&RIP_REL_REF(_end) - p2v_offset;
131
+    va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
132
133
    /* Include the SME encryption mask in the fixup value */
134
    load_delta += sme_get_me_mask();
135
136
    /* Fixup the physical addresses in the page table */
137
138
-    pgd = &RIP_REL_REF(early_top_pgt)->pgd;
139
+    pgd = rip_rel_ptr(early_top_pgt);
140
    pgd[pgd_index(__START_KERNEL_map)] += load_delta;
141
142
    if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
143
-        p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
144
+        p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
145
        p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
146
147
        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
148
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
149
     * error, causing the BIOS to halt the system.
150
     */
151
152
-    pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd;
153
+    pmd = rip_rel_ptr(level2_kernel_pgt);
154
155
    /* invalidate pages before the kernel image */
156
    for (i = 0; i < pmd_index(va_text); i++)
157
@@ -XXX,XX +XXX,XX @@ static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
158
static void __head startup_64_load_idt(void *vc_handler)
159
{
160
    struct desc_ptr desc = {
161
-        .address = (unsigned long)&RIP_REL_REF(bringup_idt_table),
162
+        .address = (unsigned long)rip_rel_ptr(bringup_idt_table),
163
        .size = sizeof(bringup_idt_table) - 1,
164
    };
165
    struct idt_data data;
166
@@ -XXX,XX +XXX,XX @@ void early_setup_idt(void)
167
*/
168
void __head startup_64_setup_gdt_idt(void)
169
{
170
-    struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt;
171
+    struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
172
    void *handler = NULL;
173
174
    struct desc_ptr startup_gdt_descr = {
175
-        .address = (unsigned long)&RIP_REL_REF(*gdt),
176
+        .address = (unsigned long)gp->gdt,
177
        .size = GDT_SIZE - 1,
178
    };
179
180
@@ -XXX,XX +XXX,XX @@ void __head startup_64_setup_gdt_idt(void)
181
         "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
182
183
    if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
184
-        handler = &RIP_REL_REF(vc_no_ghcb);
185
+        handler = rip_rel_ptr(vc_no_ghcb);
186
187
    startup_64_load_idt(handler);
188
}
189
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/arch/x86/mm/mem_encrypt_identity.c
192
+++ b/arch/x86/mm/mem_encrypt_identity.c
193
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
194
     * memory from being cached.
195
     */
196
197
-    kernel_start = (unsigned long)RIP_REL_REF(_text);
198
-    kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
199
+    kernel_start = (unsigned long)rip_rel_ptr(_text);
200
+    kernel_end = ALIGN((unsigned long)rip_rel_ptr(_end), PMD_SIZE);
201
    kernel_len = kernel_end - kernel_start;
202
203
    initrd_start = 0;
204
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
205
     * pagetable structures for the encryption of the kernel
206
     * pagetable structures for workarea (in case not currently mapped)
207
     */
208
-    execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
209
+    execute_start = workarea_start = (unsigned long)rip_rel_ptr(sme_workarea);
210
    execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
211
    execute_len = execute_end - execute_start;
212
213
--
214
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Move the early GDT/IDT setup code that runs long before the kernel
3
Move the early GDT/IDT setup code that runs long before the kernel
4
virtual mapping is up into arch/x86/boot/startup/, and build it in a way
4
virtual mapping is up into arch/x86/boot/startup/, and build it in a way
5
that ensures that the code tolerates being called from the 1:1 mapping
5
that ensures that the code tolerates being called from the 1:1 mapping
6
of memory.
6
of memory. The code itself is left unchanged by this patch.
7
8
This allows the RIP_REL_REF() macro uses to be dropped, and removes the
9
need for emitting the code into the special .head.text section.
10
7
11
Also tweak the sed symbol matching pattern in the decompressor to match
8
Also tweak the sed symbol matching pattern in the decompressor to match
12
on lower case 't' or 'b', as these will be emitted by Clang for symbols
9
on lower case 't' or 'b', as these will be emitted by Clang for symbols
13
with hidden linkage.
10
with hidden linkage.
14
11
15
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
12
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
16
---
13
---
17
arch/x86/boot/compressed/Makefile | 2 +-
14
arch/x86/boot/compressed/Makefile | 2 +-
18
arch/x86/boot/startup/Makefile | 15 ++++
15
arch/x86/boot/startup/Makefile | 15 ++++
19
arch/x86/boot/startup/gdt_idt.c | 82 ++++++++++++++++++++
16
arch/x86/boot/startup/gdt_idt.c | 84 ++++++++++++++++++++
20
arch/x86/kernel/head64.c | 74 ------------------
17
arch/x86/kernel/head64.c | 74 -----------------
21
4 files changed, 98 insertions(+), 75 deletions(-)
18
4 files changed, 100 insertions(+), 75 deletions(-)
22
19
23
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
20
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
24
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
25
--- a/arch/x86/boot/compressed/Makefile
22
--- a/arch/x86/boot/compressed/Makefile
26
+++ b/arch/x86/boot/compressed/Makefile
23
+++ b/arch/x86/boot/compressed/Makefile
...
...
69
+
66
+
70
+#include <linux/linkage.h>
67
+#include <linux/linkage.h>
71
+#include <linux/types.h>
68
+#include <linux/types.h>
72
+
69
+
73
+#include <asm/desc.h>
70
+#include <asm/desc.h>
71
+#include <asm/init.h>
74
+#include <asm/setup.h>
72
+#include <asm/setup.h>
75
+#include <asm/sev.h>
73
+#include <asm/sev.h>
76
+#include <asm/trapnr.h>
74
+#include <asm/trapnr.h>
77
+
75
+
78
+/*
76
+/*
...
...
88
+ * which also hasn't happened yet in early CPU bringup.
86
+ * which also hasn't happened yet in early CPU bringup.
89
+ */
87
+ */
90
+static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
88
+static gate_desc bringup_idt_table[NUM_EXCEPTION_VECTORS] __page_aligned_data;
91
+
89
+
92
+/* This may run while still in the direct mapping */
90
+/* This may run while still in the direct mapping */
93
+static void startup_64_load_idt(void *vc_handler)
91
+static void __head startup_64_load_idt(void *vc_handler)
94
+{
92
+{
95
+    struct desc_ptr desc = {
93
+    struct desc_ptr desc = {
96
+        .address = (unsigned long)bringup_idt_table,
94
+        .address = (unsigned long)rip_rel_ptr(bringup_idt_table),
97
+        .size = sizeof(bringup_idt_table) - 1,
95
+        .size = sizeof(bringup_idt_table) - 1,
98
+    };
96
+    };
99
+    struct idt_data data;
97
+    struct idt_data data;
100
+    gate_desc idt_desc;
98
+    gate_desc idt_desc;
101
+
99
+
...
...
123
+}
121
+}
124
+
122
+
125
+/*
123
+/*
126
+ * Setup boot CPU state needed before kernel switches to virtual addresses.
124
+ * Setup boot CPU state needed before kernel switches to virtual addresses.
127
+ */
125
+ */
128
+void __init startup_64_setup_gdt_idt(void)
126
+void __head startup_64_setup_gdt_idt(void)
129
+{
127
+{
128
+    struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
130
+    void *handler = NULL;
129
+    void *handler = NULL;
131
+
130
+
132
+    struct desc_ptr startup_gdt_descr = {
131
+    struct desc_ptr startup_gdt_descr = {
133
+        .address = (__force unsigned long)gdt_page.gdt,
132
+        .address = (unsigned long)gp->gdt,
134
+        .size = GDT_SIZE - 1,
133
+        .size = GDT_SIZE - 1,
135
+    };
134
+    };
136
+
135
+
137
+    /* Load GDT */
136
+    /* Load GDT */
138
+    native_load_gdt(&startup_gdt_descr);
137
+    native_load_gdt(&startup_gdt_descr);
...
...
141
+    asm volatile("movl %%eax, %%ds\n"
140
+    asm volatile("movl %%eax, %%ds\n"
142
+         "movl %%eax, %%ss\n"
141
+         "movl %%eax, %%ss\n"
143
+         "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
142
+         "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
144
+
143
+
145
+    if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
144
+    if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
146
+        handler = vc_no_ghcb;
145
+        handler = rip_rel_ptr(vc_no_ghcb);
147
+
146
+
148
+    startup_64_load_idt(handler);
147
+    startup_64_load_idt(handler);
149
+}
148
+}
150
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
149
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
151
index XXXXXXX..XXXXXXX 100644
150
index XXXXXXX..XXXXXXX 100644
...
...
172
-
171
-
173
-/* This may run while still in the direct mapping */
172
-/* This may run while still in the direct mapping */
174
-static void __head startup_64_load_idt(void *vc_handler)
173
-static void __head startup_64_load_idt(void *vc_handler)
175
-{
174
-{
176
-    struct desc_ptr desc = {
175
-    struct desc_ptr desc = {
177
-        .address = (unsigned long)&RIP_REL_REF(bringup_idt_table),
176
-        .address = (unsigned long)rip_rel_ptr(bringup_idt_table),
178
-        .size = sizeof(bringup_idt_table) - 1,
177
-        .size = sizeof(bringup_idt_table) - 1,
179
-    };
178
-    };
180
-    struct idt_data data;
179
-    struct idt_data data;
181
-    gate_desc idt_desc;
180
-    gate_desc idt_desc;
182
-
181
-
...
...
206
-/*
205
-/*
207
- * Setup boot CPU state needed before kernel switches to virtual addresses.
206
- * Setup boot CPU state needed before kernel switches to virtual addresses.
208
- */
207
- */
209
-void __head startup_64_setup_gdt_idt(void)
208
-void __head startup_64_setup_gdt_idt(void)
210
-{
209
-{
211
-    struct desc_struct *gdt = (void *)(__force unsigned long)gdt_page.gdt;
210
-    struct gdt_page *gp = rip_rel_ptr((void *)(__force unsigned long)&gdt_page);
212
-    void *handler = NULL;
211
-    void *handler = NULL;
213
-
212
-
214
-    struct desc_ptr startup_gdt_descr = {
213
-    struct desc_ptr startup_gdt_descr = {
215
-        .address = (unsigned long)&RIP_REL_REF(*gdt),
214
-        .address = (unsigned long)gp->gdt,
216
-        .size = GDT_SIZE - 1,
215
-        .size = GDT_SIZE - 1,
217
-    };
216
-    };
218
-
217
-
219
-    /* Load GDT */
218
-    /* Load GDT */
220
-    native_load_gdt(&startup_gdt_descr);
219
-    native_load_gdt(&startup_gdt_descr);
...
...
223
-    asm volatile("movl %%eax, %%ds\n"
222
-    asm volatile("movl %%eax, %%ds\n"
224
-         "movl %%eax, %%ss\n"
223
-         "movl %%eax, %%ss\n"
225
-         "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
224
-         "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory");
226
-
225
-
227
-    if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
226
-    if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
228
-        handler = &RIP_REL_REF(vc_no_ghcb);
227
-        handler = rip_rel_ptr(vc_no_ghcb);
229
-
228
-
230
-    startup_64_load_idt(handler);
229
-    startup_64_load_idt(handler);
231
-}
230
-}
232
--
231
--
233
2.49.0.472.ge94155a9ec-goog
232
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
The startup code that constructs the kernel virtual mapping runs from
3
The startup code that constructs the kernel virtual mapping runs from
4
the 1:1 mapping of memory itself, and therefore, cannot use absolute
4
the 1:1 mapping of memory itself, and therefore, cannot use absolute
5
symbol references. Move this code into a separate source file under
5
symbol references. Before making changes in subsequent patches, move
6
arch/x86/boot/startup/ where all such code will be kept from now on.
6
this code into a separate source file under arch/x86/boot/startup/ where
7
7
all such code will be kept from now on.
8
Since all code here is constructed in a manner that ensures that it
9
tolerates running from the 1:1 mapping of memory, any uses of the
10
RIP_REL_REF() macro can be dropped, along with __head annotations for
11
placing this code in a dedicated startup section.
12
8
13
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
9
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
14
---
10
---
15
arch/x86/boot/startup/Makefile | 2 +-
11
arch/x86/boot/startup/Makefile | 2 +-
16
arch/x86/boot/startup/map_kernel.c | 232 ++++++++++++++++++++
12
arch/x86/boot/startup/map_kernel.c | 224 ++++++++++++++++++++
17
arch/x86/kernel/head64.c | 228 +------------------
13
arch/x86/kernel/head64.c | 211 +-----------------
18
3 files changed, 234 insertions(+), 228 deletions(-)
14
3 files changed, 226 insertions(+), 211 deletions(-)
19
15
20
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
16
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
21
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
22
--- a/arch/x86/boot/startup/Makefile
18
--- a/arch/x86/boot/startup/Makefile
23
+++ b/arch/x86/boot/startup/Makefile
19
+++ b/arch/x86/boot/startup/Makefile
...
...
42
+#include <linux/linkage.h>
38
+#include <linux/linkage.h>
43
+#include <linux/types.h>
39
+#include <linux/types.h>
44
+#include <linux/kernel.h>
40
+#include <linux/kernel.h>
45
+#include <linux/pgtable.h>
41
+#include <linux/pgtable.h>
46
+
42
+
43
+#include <asm/init.h>
47
+#include <asm/sections.h>
44
+#include <asm/sections.h>
48
+#include <asm/setup.h>
45
+#include <asm/setup.h>
49
+#include <asm/sev.h>
46
+#include <asm/sev.h>
50
+
47
+
51
+extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
48
+extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
52
+extern unsigned int next_early_pgt;
49
+extern unsigned int next_early_pgt;
53
+
54
+#ifdef CONFIG_X86_5LEVEL
55
+unsigned int __pgtable_l5_enabled __ro_after_init;
56
+unsigned int pgdir_shift __ro_after_init = 39;
57
+EXPORT_SYMBOL(pgdir_shift);
58
+unsigned int ptrs_per_p4d __ro_after_init = 1;
59
+EXPORT_SYMBOL(ptrs_per_p4d);
60
+#endif
61
+
62
+#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
63
+unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
64
+EXPORT_SYMBOL(page_offset_base);
65
+unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
66
+EXPORT_SYMBOL(vmalloc_base);
67
+unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
68
+EXPORT_SYMBOL(vmemmap_base);
69
+#endif
70
+
50
+
71
+static inline bool check_la57_support(void)
51
+static inline bool check_la57_support(void)
72
+{
52
+{
73
+    if (!IS_ENABLED(CONFIG_X86_5LEVEL))
53
+    if (!IS_ENABLED(CONFIG_X86_5LEVEL))
74
+        return false;
54
+        return false;
...
...
78
+     * stage. Only check if it has been enabled there.
58
+     * stage. Only check if it has been enabled there.
79
+     */
59
+     */
80
+    if (!(native_read_cr4() & X86_CR4_LA57))
60
+    if (!(native_read_cr4() & X86_CR4_LA57))
81
+        return false;
61
+        return false;
82
+
62
+
83
+    __pgtable_l5_enabled    = 1;
63
+    RIP_REL_REF(__pgtable_l5_enabled)    = 1;
84
+    pgdir_shift        = 48;
64
+    RIP_REL_REF(pgdir_shift)        = 48;
85
+    ptrs_per_p4d        = 512;
65
+    RIP_REL_REF(ptrs_per_p4d)        = 512;
86
+    page_offset_base    = __PAGE_OFFSET_BASE_L5;
66
+    RIP_REL_REF(page_offset_base)        = __PAGE_OFFSET_BASE_L5;
87
+    vmalloc_base        = __VMALLOC_BASE_L5;
67
+    RIP_REL_REF(vmalloc_base)        = __VMALLOC_BASE_L5;
88
+    vmemmap_base        = __VMEMMAP_BASE_L5;
68
+    RIP_REL_REF(vmemmap_base)        = __VMEMMAP_BASE_L5;
89
+
69
+
90
+    return true;
70
+    return true;
91
+}
71
+}
92
+
72
+
93
+static unsigned long sme_postprocess_startup(struct boot_params *bp,
73
+static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
94
+                     pmdval_t *pmd,
74
+                         pmdval_t *pmd,
95
+                     unsigned long p2v_offset)
75
+                         unsigned long p2v_offset)
96
+{
76
+{
97
+    unsigned long paddr, paddr_end;
77
+    unsigned long paddr, paddr_end;
98
+    int i;
78
+    int i;
99
+
79
+
100
+    /* Encrypt the kernel and related (if SME is active) */
80
+    /* Encrypt the kernel and related (if SME is active) */
...
...
105
+     * The bss section will be memset to zero later in the initialization so
85
+     * The bss section will be memset to zero later in the initialization so
106
+     * there is no need to zero it after changing the memory encryption
86
+     * there is no need to zero it after changing the memory encryption
107
+     * attribute.
87
+     * attribute.
108
+     */
88
+     */
109
+    if (sme_get_me_mask()) {
89
+    if (sme_get_me_mask()) {
110
+        paddr = (unsigned long)__start_bss_decrypted;
90
+        paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
111
+        paddr_end = (unsigned long)__end_bss_decrypted;
91
+        paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
112
+
92
+
113
+        for (; paddr < paddr_end; paddr += PMD_SIZE) {
93
+        for (; paddr < paddr_end; paddr += PMD_SIZE) {
114
+            /*
94
+            /*
115
+             * On SNP, transition the page to shared in the RMP table so that
95
+             * On SNP, transition the page to shared in the RMP table so that
116
+             * it is consistent with the page table attribute change.
96
+             * it is consistent with the page table attribute change.
...
...
133
+     * modifier for the initial pgdir entry programmed into CR3.
113
+     * modifier for the initial pgdir entry programmed into CR3.
134
+     */
114
+     */
135
+    return sme_get_me_mask();
115
+    return sme_get_me_mask();
136
+}
116
+}
137
+
117
+
138
+unsigned long __init __startup_64(unsigned long p2v_offset,
118
+/* Code in __startup_64() can be relocated during execution, but the compiler
119
+ * doesn't have to generate PC-relative relocations when accessing globals from
120
+ * that function. Clang actually does not generate them, which leads to
121
+ * boot-time crashes. To work around this problem, every global pointer must
122
+ * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined
123
+ * by subtracting p2v_offset from the RIP-relative address.
124
+ */
125
+unsigned long __head __startup_64(unsigned long p2v_offset,
139
+                 struct boot_params *bp)
126
+                 struct boot_params *bp)
140
+{
127
+{
141
+    pmd_t (*early_pgts)[PTRS_PER_PMD] = early_dynamic_pgts;
128
+    pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
142
+    unsigned long physaddr = (unsigned long)_text;
129
+    unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
143
+    unsigned long va_text, va_end;
130
+    unsigned long va_text, va_end;
144
+    unsigned long pgtable_flags;
131
+    unsigned long pgtable_flags;
145
+    unsigned long load_delta;
132
+    unsigned long load_delta;
146
+    pgdval_t *pgd;
133
+    pgdval_t *pgd;
147
+    p4dval_t *p4d;
134
+    p4dval_t *p4d;
...
...
158
+
145
+
159
+    /*
146
+    /*
160
+     * Compute the delta between the address I am compiled to run at
147
+     * Compute the delta between the address I am compiled to run at
161
+     * and the address I am actually running at.
148
+     * and the address I am actually running at.
162
+     */
149
+     */
163
+    phys_base = load_delta = __START_KERNEL_map + p2v_offset;
150
+    load_delta = __START_KERNEL_map + p2v_offset;
151
+    RIP_REL_REF(phys_base) = load_delta;
164
+
152
+
165
+    /* Is the address not 2M aligned? */
153
+    /* Is the address not 2M aligned? */
166
+    if (load_delta & ~PMD_MASK)
154
+    if (load_delta & ~PMD_MASK)
167
+        for (;;);
155
+        for (;;);
168
+
156
+
169
+    va_text = physaddr - p2v_offset;
157
+    va_text = physaddr - p2v_offset;
170
+    va_end = (unsigned long)_end - p2v_offset;
158
+    va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
171
+
159
+
172
+    /* Include the SME encryption mask in the fixup value */
160
+    /* Include the SME encryption mask in the fixup value */
173
+    load_delta += sme_get_me_mask();
161
+    load_delta += sme_get_me_mask();
174
+
162
+
175
+    /* Fixup the physical addresses in the page table */
163
+    /* Fixup the physical addresses in the page table */
176
+
164
+
177
+    pgd = &early_top_pgt[0].pgd;
165
+    pgd = rip_rel_ptr(early_top_pgt);
178
+    pgd[pgd_index(__START_KERNEL_map)] += load_delta;
166
+    pgd[pgd_index(__START_KERNEL_map)] += load_delta;
179
+
167
+
180
+    if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
168
+    if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
181
+        p4d = (p4dval_t *)level4_kernel_pgt;
169
+        p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
182
+        p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
170
+        p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
183
+
171
+
184
+        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
172
+        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
185
+    }
173
+    }
186
+
174
+
187
+    level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
175
+    RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
188
+    level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
176
+    RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta;
189
+
177
+
190
+    for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
178
+    for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
191
+        level2_fixmap_pgt[i].pmd += load_delta;
179
+        RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta;
192
+
180
+
193
+    /*
181
+    /*
194
+     * Set up the identity mapping for the switchover. These
182
+     * Set up the identity mapping for the switchover. These
195
+     * entries should *NOT* have the global bit set! This also
183
+     * entries should *NOT* have the global bit set! This also
196
+     * creates a bunch of nonsense entries but that is fine --
184
+     * creates a bunch of nonsense entries but that is fine --
197
+     * it avoids problems around wraparound.
185
+     * it avoids problems around wraparound.
198
+     */
186
+     */
199
+
187
+
200
+    pud = &early_pgts[0]->pmd;
188
+    pud = &early_pgts[0]->pmd;
201
+    pmd = &early_pgts[1]->pmd;
189
+    pmd = &early_pgts[1]->pmd;
202
+    next_early_pgt = 2;
190
+    RIP_REL_REF(next_early_pgt) = 2;
203
+
191
+
204
+    pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
192
+    pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
205
+
193
+
206
+    if (la57) {
194
+    if (la57) {
207
+        p4d = &early_pgts[next_early_pgt++]->pmd;
195
+        p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd;
208
+
196
+
209
+        i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
197
+        i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
210
+        pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
198
+        pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
211
+        pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
199
+        pgd[i + 1] = (pgdval_t)p4d + pgtable_flags;
212
+
200
+
...
...
223
+    pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
211
+    pud[(i + 0) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
224
+    pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
212
+    pud[(i + 1) % PTRS_PER_PUD] = (pudval_t)pmd + pgtable_flags;
225
+
213
+
226
+    pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
214
+    pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
227
+    /* Filter out unsupported __PAGE_KERNEL_* bits: */
215
+    /* Filter out unsupported __PAGE_KERNEL_* bits: */
228
+    pmd_entry &= __supported_pte_mask;
216
+    pmd_entry &= RIP_REL_REF(__supported_pte_mask);
229
+    pmd_entry += sme_get_me_mask();
217
+    pmd_entry += sme_get_me_mask();
230
+    pmd_entry += physaddr;
218
+    pmd_entry += physaddr;
231
+
219
+
232
+    for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
220
+    for (i = 0; i < DIV_ROUND_UP(va_end - va_text, PMD_SIZE); i++) {
233
+        int idx = i + (physaddr >> PMD_SHIFT);
221
+        int idx = i + (physaddr >> PMD_SHIFT);
...
...
249
+     * and on some hardware (particularly the UV platform) even
237
+     * and on some hardware (particularly the UV platform) even
250
+     * speculative access to some reserved areas is caught as an
238
+     * speculative access to some reserved areas is caught as an
251
+     * error, causing the BIOS to halt the system.
239
+     * error, causing the BIOS to halt the system.
252
+     */
240
+     */
253
+
241
+
254
+    pmd = &level2_kernel_pgt[0].pmd;
242
+    pmd = rip_rel_ptr(level2_kernel_pgt);
255
+
243
+
256
+    /* invalidate pages before the kernel image */
244
+    /* invalidate pages before the kernel image */
257
+    for (i = 0; i < pmd_index(va_text); i++)
245
+    for (i = 0; i < pmd_index(va_text); i++)
258
+        pmd[i] &= ~_PAGE_PRESENT;
246
+        pmd[i] &= ~_PAGE_PRESENT;
259
+
247
+
...
...
278
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
266
extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
279
-static unsigned int __initdata next_early_pgt;
267
-static unsigned int __initdata next_early_pgt;
280
+unsigned int __initdata next_early_pgt;
268
+unsigned int __initdata next_early_pgt;
281
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
269
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
282
270
283
-#ifdef CONFIG_X86_5LEVEL
271
#ifdef CONFIG_X86_5LEVEL
284
-unsigned int __pgtable_l5_enabled __ro_after_init;
272
@@ -XXX,XX +XXX,XX @@ unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
285
-unsigned int pgdir_shift __ro_after_init = 39;
273
EXPORT_SYMBOL(vmemmap_base);
286
-EXPORT_SYMBOL(pgdir_shift);
274
#endif
287
-unsigned int ptrs_per_p4d __ro_after_init = 1;
275
288
-EXPORT_SYMBOL(ptrs_per_p4d);
289
-#endif
290
-
291
-#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
292
-unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
293
-EXPORT_SYMBOL(page_offset_base);
294
-unsigned long vmalloc_base __ro_after_init = __VMALLOC_BASE_L4;
295
-EXPORT_SYMBOL(vmalloc_base);
296
-unsigned long vmemmap_base __ro_after_init = __VMEMMAP_BASE_L4;
297
-EXPORT_SYMBOL(vmemmap_base);
298
-#endif
299
-
300
-static inline bool check_la57_support(void)
276
-static inline bool check_la57_support(void)
301
-{
277
-{
302
-    if (!IS_ENABLED(CONFIG_X86_5LEVEL))
278
-    if (!IS_ENABLED(CONFIG_X86_5LEVEL))
303
-        return false;
279
-        return false;
304
-
280
-
...
...
334
-     * The bss section will be memset to zero later in the initialization so
310
-     * The bss section will be memset to zero later in the initialization so
335
-     * there is no need to zero it after changing the memory encryption
311
-     * there is no need to zero it after changing the memory encryption
336
-     * attribute.
312
-     * attribute.
337
-     */
313
-     */
338
-    if (sme_get_me_mask()) {
314
-    if (sme_get_me_mask()) {
339
-        paddr = (unsigned long)&RIP_REL_REF(__start_bss_decrypted);
315
-        paddr = (unsigned long)rip_rel_ptr(__start_bss_decrypted);
340
-        paddr_end = (unsigned long)&RIP_REL_REF(__end_bss_decrypted);
316
-        paddr_end = (unsigned long)rip_rel_ptr(__end_bss_decrypted);
341
-
317
-
342
-        for (; paddr < paddr_end; paddr += PMD_SIZE) {
318
-        for (; paddr < paddr_end; paddr += PMD_SIZE) {
343
-            /*
319
-            /*
344
-             * On SNP, transition the page to shared in the RMP table so that
320
-             * On SNP, transition the page to shared in the RMP table so that
345
-             * it is consistent with the page table attribute change.
321
-             * it is consistent with the page table attribute change.
...
...
372
- * by subtracting p2v_offset from the RIP-relative address.
348
- * by subtracting p2v_offset from the RIP-relative address.
373
- */
349
- */
374
-unsigned long __head __startup_64(unsigned long p2v_offset,
350
-unsigned long __head __startup_64(unsigned long p2v_offset,
375
-                 struct boot_params *bp)
351
-                 struct boot_params *bp)
376
-{
352
-{
377
-    pmd_t (*early_pgts)[PTRS_PER_PMD] = RIP_REL_REF(early_dynamic_pgts);
353
-    pmd_t (*early_pgts)[PTRS_PER_PMD] = rip_rel_ptr(early_dynamic_pgts);
378
-    unsigned long physaddr = (unsigned long)&RIP_REL_REF(_text);
354
-    unsigned long physaddr = (unsigned long)rip_rel_ptr(_text);
379
-    unsigned long va_text, va_end;
355
-    unsigned long va_text, va_end;
380
-    unsigned long pgtable_flags;
356
-    unsigned long pgtable_flags;
381
-    unsigned long load_delta;
357
-    unsigned long load_delta;
382
-    pgdval_t *pgd;
358
-    pgdval_t *pgd;
383
-    p4dval_t *p4d;
359
-    p4dval_t *p4d;
...
...
402
-    /* Is the address not 2M aligned? */
378
-    /* Is the address not 2M aligned? */
403
-    if (load_delta & ~PMD_MASK)
379
-    if (load_delta & ~PMD_MASK)
404
-        for (;;);
380
-        for (;;);
405
-
381
-
406
-    va_text = physaddr - p2v_offset;
382
-    va_text = physaddr - p2v_offset;
407
-    va_end = (unsigned long)&RIP_REL_REF(_end) - p2v_offset;
383
-    va_end = (unsigned long)rip_rel_ptr(_end) - p2v_offset;
408
-
384
-
409
-    /* Include the SME encryption mask in the fixup value */
385
-    /* Include the SME encryption mask in the fixup value */
410
-    load_delta += sme_get_me_mask();
386
-    load_delta += sme_get_me_mask();
411
-
387
-
412
-    /* Fixup the physical addresses in the page table */
388
-    /* Fixup the physical addresses in the page table */
413
-
389
-
414
-    pgd = &RIP_REL_REF(early_top_pgt)->pgd;
390
-    pgd = rip_rel_ptr(early_top_pgt);
415
-    pgd[pgd_index(__START_KERNEL_map)] += load_delta;
391
-    pgd[pgd_index(__START_KERNEL_map)] += load_delta;
416
-
392
-
417
-    if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
393
-    if (IS_ENABLED(CONFIG_X86_5LEVEL) && la57) {
418
-        p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt);
394
-        p4d = (p4dval_t *)rip_rel_ptr(level4_kernel_pgt);
419
-        p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
395
-        p4d[MAX_PTRS_PER_P4D - 1] += load_delta;
420
-
396
-
421
-        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
397
-        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
422
-    }
398
-    }
423
-
399
-
...
...
486
-     * and on some hardware (particularly the UV platform) even
462
-     * and on some hardware (particularly the UV platform) even
487
-     * speculative access to some reserved areas is caught as an
463
-     * speculative access to some reserved areas is caught as an
488
-     * error, causing the BIOS to halt the system.
464
-     * error, causing the BIOS to halt the system.
489
-     */
465
-     */
490
-
466
-
491
-    pmd = &RIP_REL_REF(level2_kernel_pgt)->pmd;
467
-    pmd = rip_rel_ptr(level2_kernel_pgt);
492
-
468
-
493
-    /* invalidate pages before the kernel image */
469
-    /* invalidate pages before the kernel image */
494
-    for (i = 0; i < pmd_index(va_text); i++)
470
-    for (i = 0; i < pmd_index(va_text); i++)
495
-        pmd[i] &= ~_PAGE_PRESENT;
471
-        pmd[i] &= ~_PAGE_PRESENT;
496
-
472
-
...
...
508
-
484
-
509
/* Wipe all early page tables except for the kernel symbol map */
485
/* Wipe all early page tables except for the kernel symbol map */
510
static void __init reset_early_page_tables(void)
486
static void __init reset_early_page_tables(void)
511
{
487
{
512
--
488
--
513
2.49.0.472.ge94155a9ec-goog
489
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
New patch
1
From: Ard Biesheuvel <ardb@kernel.org>
1
2
3
Now that __startup_64() is built using -fPIC, RIP_REL_REF() has become a
4
NOP and can be removed. Only some occurrences of rip_rel_ptr() will
5
remain, to explicitly take the address of certain global structures in
6
the 1:1 mapping of memory.
7
8
While at it, update the code comment to describe why this is needed.
9
10
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
11
---
12
arch/x86/boot/startup/map_kernel.c | 41 ++++++++++----------
13
1 file changed, 21 insertions(+), 20 deletions(-)
14
15
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/arch/x86/boot/startup/map_kernel.c
18
+++ b/arch/x86/boot/startup/map_kernel.c
19
@@ -XXX,XX +XXX,XX @@ static inline bool check_la57_support(void)
20
    if (!(native_read_cr4() & X86_CR4_LA57))
21
        return false;
22
23
-    RIP_REL_REF(__pgtable_l5_enabled)    = 1;
24
-    RIP_REL_REF(pgdir_shift)        = 48;
25
-    RIP_REL_REF(ptrs_per_p4d)        = 512;
26
-    RIP_REL_REF(page_offset_base)        = __PAGE_OFFSET_BASE_L5;
27
-    RIP_REL_REF(vmalloc_base)        = __VMALLOC_BASE_L5;
28
-    RIP_REL_REF(vmemmap_base)        = __VMEMMAP_BASE_L5;
29
+    __pgtable_l5_enabled    = 1;
30
+    pgdir_shift        = 48;
31
+    ptrs_per_p4d        = 512;
32
+    page_offset_base    = __PAGE_OFFSET_BASE_L5;
33
+    vmalloc_base        = __VMALLOC_BASE_L5;
34
+    vmemmap_base        = __VMEMMAP_BASE_L5;
35
36
    return true;
37
}
38
@@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_postprocess_startup(struct boot_params *bp,
39
    return sme_get_me_mask();
40
}
41
42
-/* Code in __startup_64() can be relocated during execution, but the compiler
43
- * doesn't have to generate PC-relative relocations when accessing globals from
44
- * that function. Clang actually does not generate them, which leads to
45
- * boot-time crashes. To work around this problem, every global pointer must
46
- * be accessed using RIP_REL_REF(). Kernel virtual addresses can be determined
47
- * by subtracting p2v_offset from the RIP-relative address.
48
+/*
49
+ * This code is compiled using PIC codegen because it will execute from the
50
+ * early 1:1 mapping of memory, which deviates from the mapping expected by the
51
+ * linker. Due to this deviation, taking the address of a global variable will
52
+ * produce an ambiguous result when using the plain & operator. Instead,
53
+ * rip_rel_ptr() must be used, which will return the RIP-relative address in
54
+ * the 1:1 mapping of memory. Kernel virtual addresses can be determined by
55
+ * subtracting p2v_offset from the RIP-relative address.
56
*/
57
unsigned long __head __startup_64(unsigned long p2v_offset,
58
                 struct boot_params *bp)
59
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
60
     * Compute the delta between the address I am compiled to run at
61
     * and the address I am actually running at.
62
     */
63
-    load_delta = __START_KERNEL_map + p2v_offset;
64
-    RIP_REL_REF(phys_base) = load_delta;
65
+    phys_base = load_delta = __START_KERNEL_map + p2v_offset;
66
67
    /* Is the address not 2M aligned? */
68
    if (load_delta & ~PMD_MASK)
69
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
70
        pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE;
71
    }
72
73
-    RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta;
74
-    RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 1].pud += load_delta;
75
+    level3_kernel_pgt[PTRS_PER_PUD - 2].pud += load_delta;
76
+    level3_kernel_pgt[PTRS_PER_PUD - 1].pud += load_delta;
77
78
    for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
79
-        RIP_REL_REF(level2_fixmap_pgt)[i].pmd += load_delta;
80
+        level2_fixmap_pgt[i].pmd += load_delta;
81
82
    /*
83
     * Set up the identity mapping for the switchover. These
84
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
85
86
    pud = &early_pgts[0]->pmd;
87
    pmd = &early_pgts[1]->pmd;
88
-    RIP_REL_REF(next_early_pgt) = 2;
89
+    next_early_pgt = 2;
90
91
    pgtable_flags = _KERNPG_TABLE_NOENC + sme_get_me_mask();
92
93
    if (la57) {
94
-        p4d = &early_pgts[RIP_REL_REF(next_early_pgt)++]->pmd;
95
+        p4d = &early_pgts[next_early_pgt++]->pmd;
96
97
        i = (physaddr >> PGDIR_SHIFT) % PTRS_PER_PGD;
98
        pgd[i + 0] = (pgdval_t)p4d + pgtable_flags;
99
@@ -XXX,XX +XXX,XX @@ unsigned long __head __startup_64(unsigned long p2v_offset,
100
101
    pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL;
102
    /* Filter out unsupported __PAGE_KERNEL_* bits: */
103
-    pmd_entry &= RIP_REL_REF(__supported_pte_mask);
104
+    pmd_entry &= __supported_pte_mask;
105
    pmd_entry += sme_get_me_mask();
106
    pmd_entry += physaddr;
107
108
--
109
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
...
...
3
Move the SME initialization code, which runs from the 1:1 mapping of
3
Move the SME initialization code, which runs from the 1:1 mapping of
4
memory as it operates on the kernel virtual mapping, into the new
4
memory as it operates on the kernel virtual mapping, into the new
5
sub-directory arch/x86/boot/startup/ where all startup code will reside
5
sub-directory arch/x86/boot/startup/ where all startup code will reside
6
that needs to tolerate executing from the 1:1 mapping.
6
that needs to tolerate executing from the 1:1 mapping.
7
7
8
This allows RIP_REL_REF() macro invocations and __head annotations to be
9
dropped.
10
11
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
8
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
12
---
9
---
13
arch/x86/boot/startup/Makefile | 1 +
10
arch/x86/boot/startup/Makefile | 1 +
14
arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 45 +++++++++-----------
11
arch/x86/{mm/mem_encrypt_identity.c => boot/startup/sme.c} | 2 --
15
arch/x86/include/asm/mem_encrypt.h | 2 +-
12
arch/x86/mm/Makefile | 6 ------
16
arch/x86/mm/Makefile | 6 ---
13
3 files changed, 1 insertion(+), 8 deletions(-)
17
4 files changed, 23 insertions(+), 31 deletions(-)
18
14
19
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
15
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/arch/x86/boot/startup/Makefile
17
--- a/arch/x86/boot/startup/Makefile
22
+++ b/arch/x86/boot/startup/Makefile
18
+++ b/arch/x86/boot/startup/Makefile
...
...
27
+obj-$(CONFIG_AMD_MEM_ENCRYPT)    += sme.o
23
+obj-$(CONFIG_AMD_MEM_ENCRYPT)    += sme.o
28
24
29
lib-$(CONFIG_X86_64)        += la57toggle.o
25
lib-$(CONFIG_X86_64)        += la57toggle.o
30
lib-$(CONFIG_EFI_MIXED)        += efi-mixed.o
26
lib-$(CONFIG_EFI_MIXED)        += efi-mixed.o
31
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/boot/startup/sme.c
27
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/boot/startup/sme.c
32
similarity index 92%
28
similarity index 99%
33
rename from arch/x86/mm/mem_encrypt_identity.c
29
rename from arch/x86/mm/mem_encrypt_identity.c
34
rename to arch/x86/boot/startup/sme.c
30
rename to arch/x86/boot/startup/sme.c
35
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
36
--- a/arch/x86/mm/mem_encrypt_identity.c
32
--- a/arch/x86/mm/mem_encrypt_identity.c
37
+++ b/arch/x86/boot/startup/sme.c
33
+++ b/arch/x86/boot/startup/sme.c
...
...
42
-#include "mm_internal.h"
38
-#include "mm_internal.h"
43
-
39
-
44
#define PGD_FLAGS        _KERNPG_TABLE_NOENC
40
#define PGD_FLAGS        _KERNPG_TABLE_NOENC
45
#define P4D_FLAGS        _KERNPG_TABLE_NOENC
41
#define P4D_FLAGS        _KERNPG_TABLE_NOENC
46
#define PUD_FLAGS        _KERNPG_TABLE_NOENC
42
#define PUD_FLAGS        _KERNPG_TABLE_NOENC
47
@@ -XXX,XX +XXX,XX @@ struct sme_populate_pgd_data {
48
*/
49
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
50
51
-static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
52
+static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
53
{
54
    unsigned long pgd_start, pgd_end, pgd_size;
55
    pgd_t *pgd_p;
56
@@ -XXX,XX +XXX,XX @@ static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
57
    memset(pgd_p, 0, pgd_size);
58
}
59
60
-static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
61
+static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
62
{
63
    pgd_t *pgd;
64
    p4d_t *p4d;
65
@@ -XXX,XX +XXX,XX @@ static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
66
    return pud;
67
}
68
69
-static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
70
+static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
71
{
72
    pud_t *pud;
73
    pmd_t *pmd;
74
@@ -XXX,XX +XXX,XX @@ static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
75
    set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
76
}
77
78
-static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
79
+static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
80
{
81
    pud_t *pud;
82
    pmd_t *pmd;
83
@@ -XXX,XX +XXX,XX @@ static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
84
        set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
85
}
86
87
-static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
88
+static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
89
{
90
    while (ppd->vaddr < ppd->vaddr_end) {
91
        sme_populate_pgd_large(ppd);
92
@@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
93
    }
94
}
95
96
-static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
97
+static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
98
{
99
    while (ppd->vaddr < ppd->vaddr_end) {
100
        sme_populate_pgd(ppd);
101
@@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
102
    }
103
}
104
105
-static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
106
+static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
107
                 pmdval_t pmd_flags, pteval_t pte_flags)
108
{
109
    unsigned long vaddr_end;
110
@@ -XXX,XX +XXX,XX @@ static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
111
    __sme_map_range_pte(ppd);
112
}
113
114
-static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
115
+static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
116
{
117
    __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
118
}
119
120
-static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
121
+static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
122
{
123
    __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
124
}
125
126
-static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
127
+static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
128
{
129
    __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
130
}
131
132
-static unsigned long __head sme_pgtable_calc(unsigned long len)
133
+static unsigned long __init sme_pgtable_calc(unsigned long len)
134
{
135
    unsigned long entries = 0, tables = 0;
136
137
@@ -XXX,XX +XXX,XX @@ static unsigned long __head sme_pgtable_calc(unsigned long len)
138
    return entries + tables;
139
}
140
141
-void __head sme_encrypt_kernel(struct boot_params *bp)
142
+void __init sme_encrypt_kernel(struct boot_params *bp)
143
{
144
    unsigned long workarea_start, workarea_end, workarea_len;
145
    unsigned long execute_start, execute_end, execute_len;
146
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
147
     * instrumentation or checking boot_cpu_data in the cc_platform_has()
148
     * function.
149
     */
150
-    if (!sme_get_me_mask() ||
151
-     RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
152
+    if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
153
        return;
154
155
    /*
156
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
157
     * memory from being cached.
158
     */
159
160
-    kernel_start = (unsigned long)RIP_REL_REF(_text);
161
-    kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
162
+    kernel_start = (unsigned long)_text;
163
+    kernel_end = ALIGN((unsigned long)_end, PMD_SIZE);
164
    kernel_len = kernel_end - kernel_start;
165
166
    initrd_start = 0;
167
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
168
     * pagetable structures for the encryption of the kernel
169
     * pagetable structures for workarea (in case not currently mapped)
170
     */
171
-    execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
172
+    execute_start = workarea_start = (unsigned long)sme_workarea;
173
    execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
174
    execute_len = execute_end - execute_start;
175
176
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
177
    native_write_cr3(__native_read_cr3());
178
}
179
180
-void __head sme_enable(struct boot_params *bp)
181
+void __init sme_enable(struct boot_params *bp)
182
{
183
    unsigned int eax, ebx, ecx, edx;
184
    unsigned long feature_mask;
185
@@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp)
186
    me_mask = 1UL << (ebx & 0x3f);
187
188
    /* Check the SEV MSR whether SEV or SME is enabled */
189
-    RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
190
+    sev_status = msr = __rdmsr(MSR_AMD64_SEV);
191
    feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
192
193
    /*
194
@@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp)
195
            return;
196
    }
197
198
-    RIP_REL_REF(sme_me_mask) = me_mask;
199
-    RIP_REL_REF(physical_mask) &= ~me_mask;
200
-    RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD;
201
+    sme_me_mask    = me_mask;
202
+    physical_mask    &= ~me_mask;
203
+    cc_vendor    = CC_VENDOR_AMD;
204
    cc_set_mask(me_mask);
205
}
206
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
207
index XXXXXXX..XXXXXXX 100644
208
--- a/arch/x86/include/asm/mem_encrypt.h
209
+++ b/arch/x86/include/asm/mem_encrypt.h
210
@@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void);
211
212
static inline u64 sme_get_me_mask(void)
213
{
214
-    return RIP_REL_REF(sme_me_mask);
215
+    return sme_me_mask;
216
}
217
218
#define __bss_decrypted __section(".bss..decrypted")
219
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
43
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
220
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
221
--- a/arch/x86/mm/Makefile
45
--- a/arch/x86/mm/Makefile
222
+++ b/arch/x86/mm/Makefile
46
+++ b/arch/x86/mm/Makefile
223
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
...
...
259
obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_amd.o
83
obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_amd.o
260
84
261
-obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_identity.o
85
-obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_identity.o
262
obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_boot.o
86
obj-$(CONFIG_AMD_MEM_ENCRYPT)    += mem_encrypt_boot.o
263
--
87
--
264
2.49.0.472.ge94155a9ec-goog
88
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
New patch
1
From: Ard Biesheuvel <ardb@kernel.org>
1
2
3
RIP_REL_REF() has no effect on code residing in arch/x86/boot/startup,
4
as it is built with -fPIC. So remove any occurrences from the SME
5
startup code.
6
7
Note the SME is the only caller of cc_set_mask() that requires this, so
8
drop it from there as well.
9
10
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
11
---
12
arch/x86/boot/startup/sme.c | 11 +++++------
13
arch/x86/include/asm/coco.h | 2 +-
14
arch/x86/include/asm/mem_encrypt.h | 2 +-
15
3 files changed, 7 insertions(+), 8 deletions(-)
16
17
diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/arch/x86/boot/startup/sme.c
20
+++ b/arch/x86/boot/startup/sme.c
21
@@ -XXX,XX +XXX,XX @@ void __head sme_encrypt_kernel(struct boot_params *bp)
22
     * instrumentation or checking boot_cpu_data in the cc_platform_has()
23
     * function.
24
     */
25
-    if (!sme_get_me_mask() ||
26
-     RIP_REL_REF(sev_status) & MSR_AMD64_SEV_ENABLED)
27
+    if (!sme_get_me_mask() || sev_status & MSR_AMD64_SEV_ENABLED)
28
        return;
29
30
    /*
31
@@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp)
32
    me_mask = 1UL << (ebx & 0x3f);
33
34
    /* Check the SEV MSR whether SEV or SME is enabled */
35
-    RIP_REL_REF(sev_status) = msr = __rdmsr(MSR_AMD64_SEV);
36
+    sev_status = msr = __rdmsr(MSR_AMD64_SEV);
37
    feature_mask = (msr & MSR_AMD64_SEV_ENABLED) ? AMD_SEV_BIT : AMD_SME_BIT;
38
39
    /*
40
@@ -XXX,XX +XXX,XX @@ void __head sme_enable(struct boot_params *bp)
41
            return;
42
    }
43
44
-    RIP_REL_REF(sme_me_mask) = me_mask;
45
-    RIP_REL_REF(physical_mask) &= ~me_mask;
46
-    RIP_REL_REF(cc_vendor) = CC_VENDOR_AMD;
47
+    sme_me_mask    = me_mask;
48
+    physical_mask    &= ~me_mask;
49
+    cc_vendor    = CC_VENDOR_AMD;
50
    cc_set_mask(me_mask);
51
}
52
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/arch/x86/include/asm/coco.h
55
+++ b/arch/x86/include/asm/coco.h
56
@@ -XXX,XX +XXX,XX @@ static inline u64 cc_get_mask(void)
57
58
static inline void cc_set_mask(u64 mask)
59
{
60
-    RIP_REL_REF(cc_mask) = mask;
61
+    cc_mask = mask;
62
}
63
64
u64 cc_mkenc(u64 val);
65
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/arch/x86/include/asm/mem_encrypt.h
68
+++ b/arch/x86/include/asm/mem_encrypt.h
69
@@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void);
70
71
static inline u64 sme_get_me_mask(void)
72
{
73
-    return RIP_REL_REF(sme_me_mask);
74
+    return sme_me_mask;
75
}
76
77
#define __bss_decrypted __section(".bss..decrypted")
78
--
79
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
New patch
1
From: Ard Biesheuvel <ardb@kernel.org>
1
2
3
Prepare for splitting off parts of the SEV core.c source file into a
4
file that carries code that must tolerate being called from the early
5
1:1 mapping. This will allow special build-time handling of thise code,
6
to ensure that it gets generated in a way that is compatible with the
7
early execution context.
8
9
So create a de-facto internal SEV API and put the definitions into
10
sev-internal.h. No attempt is made to allow this header file to be
11
included in arbitrary other sources - this is explicitly not the intent.
12
13
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
14
---
15
arch/x86/boot/compressed/sev.c | 15 ++-
16
arch/x86/coco/sev/core.c | 108 +++--------------
17
arch/x86/coco/sev/shared.c | 64 ++--------
18
arch/x86/include/asm/sev-internal.h | 122 ++++++++++++++++++++
19
arch/x86/include/asm/sev.h | 37 ++++++
20
5 files changed, 194 insertions(+), 152 deletions(-)
21
22
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/arch/x86/boot/compressed/sev.c
25
+++ b/arch/x86/boot/compressed/sev.c
26
@@ -XXX,XX +XXX,XX @@ static bool fault_in_kernel_space(unsigned long address)
27
#include "../../lib/inat.c"
28
#include "../../lib/insn.c"
29
30
-/* Include code for early handlers */
31
-#include "../../coco/sev/shared.c"
32
+extern struct svsm_ca *boot_svsm_caa;
33
+extern u64 boot_svsm_caa_pa;
34
35
-static struct svsm_ca *svsm_get_caa(void)
36
+struct svsm_ca *svsm_get_caa(void)
37
{
38
    return boot_svsm_caa;
39
}
40
41
-static u64 svsm_get_caa_pa(void)
42
+u64 svsm_get_caa_pa(void)
43
{
44
    return boot_svsm_caa_pa;
45
}
46
47
-static int svsm_perform_call_protocol(struct svsm_call *call)
48
+int svsm_perform_call_protocol(struct svsm_call *call);
49
+
50
+/* Include code for early handlers */
51
+#include "../../coco/sev/shared.c"
52
+
53
+int svsm_perform_call_protocol(struct svsm_call *call)
54
{
55
    struct ghcb *ghcb;
56
    int ret;
57
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/arch/x86/coco/sev/core.c
60
+++ b/arch/x86/coco/sev/core.c
61
@@ -XXX,XX +XXX,XX @@
62
#include <asm/cpu_entry_area.h>
63
#include <asm/stacktrace.h>
64
#include <asm/sev.h>
65
+#include <asm/sev-internal.h>
66
#include <asm/insn-eval.h>
67
#include <asm/fpu/xcr.h>
68
#include <asm/processor.h>
69
@@ -XXX,XX +XXX,XX @@
70
#include <asm/cpuid.h>
71
#include <asm/cmdline.h>
72
73
-#define DR7_RESET_VALUE 0x400
74
-
75
/* AP INIT values as documented in the APM2 section "Processor Initialization State" */
76
#define AP_INIT_CS_LIMIT        0xffff
77
#define AP_INIT_DS_LIMIT        0xffff
78
@@ -XXX,XX +XXX,XX @@ static const char * const sev_status_feat_names[] = {
79
};
80
81
/* For early boot hypervisor communication in SEV-ES enabled guests */
82
-static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
83
+struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
84
85
/*
86
* Needs to be in the .data section because we need it NULL before bss is
87
* cleared
88
*/
89
-static struct ghcb *boot_ghcb __section(".data");
90
+struct ghcb *boot_ghcb __section(".data");
91
92
/* Bitmap of SEV features supported by the hypervisor */
93
-static u64 sev_hv_features __ro_after_init;
94
+u64 sev_hv_features __ro_after_init;
95
96
/* Secrets page physical address from the CC blob */
97
static u64 secrets_pa __ro_after_init;
98
@@ -XXX,XX +XXX,XX @@ static u64 snp_tsc_scale __ro_after_init;
99
static u64 snp_tsc_offset __ro_after_init;
100
static u64 snp_tsc_freq_khz __ro_after_init;
101
102
-/* #VC handler runtime per-CPU data */
103
-struct sev_es_runtime_data {
104
-    struct ghcb ghcb_page;
105
-
106
-    /*
107
-     * Reserve one page per CPU as backup storage for the unencrypted GHCB.
108
-     * It is needed when an NMI happens while the #VC handler uses the real
109
-     * GHCB, and the NMI handler itself is causing another #VC exception. In
110
-     * that case the GHCB content of the first handler needs to be backed up
111
-     * and restored.
112
-     */
113
-    struct ghcb backup_ghcb;
114
-
115
-    /*
116
-     * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
117
-     * There is no need for it to be atomic, because nothing is written to
118
-     * the GHCB between the read and the write of ghcb_active. So it is safe
119
-     * to use it when a nested #VC exception happens before the write.
120
-     *
121
-     * This is necessary for example in the #VC->NMI->#VC case when the NMI
122
-     * happens while the first #VC handler uses the GHCB. When the NMI code
123
-     * raises a second #VC handler it might overwrite the contents of the
124
-     * GHCB written by the first handler. To avoid this the content of the
125
-     * GHCB is saved and restored when the GHCB is detected to be in use
126
-     * already.
127
-     */
128
-    bool ghcb_active;
129
-    bool backup_ghcb_active;
130
-
131
-    /*
132
-     * Cached DR7 value - write it on DR7 writes and return it on reads.
133
-     * That value will never make it to the real hardware DR7 as debugging
134
-     * is currently unsupported in SEV-ES guests.
135
-     */
136
-    unsigned long dr7;
137
-};
138
-
139
-struct ghcb_state {
140
-    struct ghcb *ghcb;
141
-};
142
143
/* For early boot SVSM communication */
144
-static struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
145
+struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
146
147
-static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
148
-static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
149
-static DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
150
-static DEFINE_PER_CPU(u64, svsm_caa_pa);
151
+DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
152
+DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
153
+DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
154
+DEFINE_PER_CPU(u64, svsm_caa_pa);
155
156
static __always_inline bool on_vc_stack(struct pt_regs *regs)
157
{
158
@@ -XXX,XX +XXX,XX @@ void noinstr __sev_es_ist_exit(void)
159
*
160
* Callers must disable local interrupts around it.
161
*/
162
-static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
163
+noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
164
{
165
    struct sev_es_runtime_data *data;
166
    struct ghcb *ghcb;
167
@@ -XXX,XX +XXX,XX @@ static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
168
    return ghcb;
169
}
170
171
-static inline u64 sev_es_rd_ghcb_msr(void)
172
-{
173
-    return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
174
-}
175
-
176
-static __always_inline void sev_es_wr_ghcb_msr(u64 val)
177
-{
178
-    u32 low, high;
179
-
180
-    low = (u32)(val);
181
-    high = (u32)(val >> 32);
182
-
183
-    native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
184
-}
185
-
186
static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
187
                unsigned char *buffer)
188
{
189
@@ -XXX,XX +XXX,XX @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
190
/* Include code shared with pre-decompression boot stage */
191
#include "shared.c"
192
193
-static inline struct svsm_ca *svsm_get_caa(void)
194
-{
195
-    /*
196
-     * Use rIP-relative references when called early in the boot. If
197
-     * ->use_cas is set, then it is late in the boot and no need
198
-     * to worry about rIP-relative references.
199
-     */
200
-    if (RIP_REL_REF(sev_cfg).use_cas)
201
-        return this_cpu_read(svsm_caa);
202
-    else
203
-        return RIP_REL_REF(boot_svsm_caa);
204
-}
205
-
206
-static u64 svsm_get_caa_pa(void)
207
-{
208
-    /*
209
-     * Use rIP-relative references when called early in the boot. If
210
-     * ->use_cas is set, then it is late in the boot and no need
211
-     * to worry about rIP-relative references.
212
-     */
213
-    if (RIP_REL_REF(sev_cfg).use_cas)
214
-        return this_cpu_read(svsm_caa_pa);
215
-    else
216
-        return RIP_REL_REF(boot_svsm_caa_pa);
217
-}
218
-
219
-static noinstr void __sev_put_ghcb(struct ghcb_state *state)
220
+noinstr void __sev_put_ghcb(struct ghcb_state *state)
221
{
222
    struct sev_es_runtime_data *data;
223
    struct ghcb *ghcb;
224
@@ -XXX,XX +XXX,XX @@ static noinstr void __sev_put_ghcb(struct ghcb_state *state)
225
    }
226
}
227
228
-static int svsm_perform_call_protocol(struct svsm_call *call)
229
+int svsm_perform_call_protocol(struct svsm_call *call)
230
{
231
    struct ghcb_state state;
232
    unsigned long flags;
233
@@ -XXX,XX +XXX,XX @@ static u64 __init get_jump_table_addr(void)
234
    return ret;
235
}
236
237
-static void __head
238
+void __head
239
early_set_pages_state(unsigned long vaddr, unsigned long paddr,
240
         unsigned long npages, enum psc_op op)
241
{
242
diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c
243
index XXXXXXX..XXXXXXX 100644
244
--- a/arch/x86/coco/sev/shared.c
245
+++ b/arch/x86/coco/sev/shared.c
246
@@ -XXX,XX +XXX,XX @@
247
*/
248
u8 snp_vmpl __ro_after_init;
249
EXPORT_SYMBOL_GPL(snp_vmpl);
250
-static struct svsm_ca *boot_svsm_caa __ro_after_init;
251
-static u64 boot_svsm_caa_pa __ro_after_init;
252
-
253
-static struct svsm_ca *svsm_get_caa(void);
254
-static u64 svsm_get_caa_pa(void);
255
-static int svsm_perform_call_protocol(struct svsm_call *call);
256
+struct svsm_ca *boot_svsm_caa __ro_after_init;
257
+u64 boot_svsm_caa_pa __ro_after_init;
258
259
/* I/O parameters for CPUID-related helpers */
260
struct cpuid_leaf {
261
@@ -XXX,XX +XXX,XX @@ struct cpuid_leaf {
262
    u32 edx;
263
};
264
265
-/*
266
- * Individual entries of the SNP CPUID table, as defined by the SNP
267
- * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
268
- */
269
-struct snp_cpuid_fn {
270
-    u32 eax_in;
271
-    u32 ecx_in;
272
-    u64 xcr0_in;
273
-    u64 xss_in;
274
-    u32 eax;
275
-    u32 ebx;
276
-    u32 ecx;
277
-    u32 edx;
278
-    u64 __reserved;
279
-} __packed;
280
-
281
-/*
282
- * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
283
- * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
284
- * of 64 entries per CPUID table.
285
- */
286
-#define SNP_CPUID_COUNT_MAX 64
287
-
288
-struct snp_cpuid_table {
289
-    u32 count;
290
-    u32 __reserved1;
291
-    u64 __reserved2;
292
-    struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
293
-} __packed;
294
-
295
/*
296
* Since feature negotiation related variables are set early in the boot
297
* process they must reside in the .data section so as not to be zeroed
298
@@ -XXX,XX +XXX,XX @@ static u32 cpuid_std_range_max __ro_after_init;
299
static u32 cpuid_hyp_range_max __ro_after_init;
300
static u32 cpuid_ext_range_max __ro_after_init;
301
302
-static bool __init sev_es_check_cpu_features(void)
303
+bool __init sev_es_check_cpu_features(void)
304
{
305
    if (!has_cpuflag(X86_FEATURE_RDRAND)) {
306
        error("RDRAND instruction not supported - no trusted source of randomness available\n");
307
@@ -XXX,XX +XXX,XX @@ static bool __init sev_es_check_cpu_features(void)
308
    return true;
309
}
310
311
-static void __head __noreturn
312
+void __head __noreturn
313
sev_es_terminate(unsigned int set, unsigned int reason)
314
{
315
    u64 val = GHCB_MSR_TERM_REQ;
316
@@ -XXX,XX +XXX,XX @@ sev_es_terminate(unsigned int set, unsigned int reason)
317
/*
318
* The hypervisor features are available from GHCB version 2 onward.
319
*/
320
-static u64 get_hv_features(void)
321
+u64 get_hv_features(void)
322
{
323
    u64 val;
324
325
@@ -XXX,XX +XXX,XX @@ static u64 get_hv_features(void)
326
    return GHCB_MSR_HV_FT_RESP_VAL(val);
327
}
328
329
-static void snp_register_ghcb_early(unsigned long paddr)
330
+void snp_register_ghcb_early(unsigned long paddr)
331
{
332
    unsigned long pfn = paddr >> PAGE_SHIFT;
333
    u64 val;
334
@@ -XXX,XX +XXX,XX @@ static void snp_register_ghcb_early(unsigned long paddr)
335
        sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_REGISTER);
336
}
337
338
-static bool sev_es_negotiate_protocol(void)
339
+bool sev_es_negotiate_protocol(void)
340
{
341
    u64 val;
342
343
@@ -XXX,XX +XXX,XX @@ static bool sev_es_negotiate_protocol(void)
344
    return true;
345
}
346
347
-static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
348
-{
349
-    ghcb->save.sw_exit_code = 0;
350
-    __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
351
-}
352
-
353
static bool vc_decoding_needed(unsigned long exit_code)
354
{
355
    /* Exceptions don't require to decode the instruction */
356
@@ -XXX,XX +XXX,XX @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
357
    return svsm_process_result_codes(call);
358
}
359
360
-static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
361
-                     struct es_em_ctxt *ctxt,
362
-                     u64 exit_code, u64 exit_info_1,
363
-                     u64 exit_info_2)
364
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
365
+                 struct es_em_ctxt *ctxt,
366
+                 u64 exit_code, u64 exit_info_1,
367
+                 u64 exit_info_2)
368
{
369
    /* Fill in protocol and format specifiers */
370
    ghcb->protocol_version = ghcb_version;
371
@@ -XXX,XX +XXX,XX @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
372
* while running with the initial identity mapping as well as the
373
* switch-over to kernel virtual addresses later.
374
*/
375
-static const struct snp_cpuid_table *snp_cpuid_get_table(void)
376
+const struct snp_cpuid_table *snp_cpuid_get_table(void)
377
{
378
    return rip_rel_ptr(&cpuid_table_copy);
379
}
380
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
381
new file mode 100644
382
index XXXXXXX..XXXXXXX
383
--- /dev/null
384
+++ b/arch/x86/include/asm/sev-internal.h
385
@@ -XXX,XX +XXX,XX @@
386
+/* SPDX-License-Identifier: GPL-2.0 */
387
+
388
+#define DR7_RESET_VALUE 0x400
389
+
390
+extern struct ghcb boot_ghcb_page;
391
+extern struct ghcb *boot_ghcb;
392
+extern u64 sev_hv_features;
393
+
394
+/* #VC handler runtime per-CPU data */
395
+struct sev_es_runtime_data {
396
+    struct ghcb ghcb_page;
397
+
398
+    /*
399
+     * Reserve one page per CPU as backup storage for the unencrypted GHCB.
400
+     * It is needed when an NMI happens while the #VC handler uses the real
401
+     * GHCB, and the NMI handler itself is causing another #VC exception. In
402
+     * that case the GHCB content of the first handler needs to be backed up
403
+     * and restored.
404
+     */
405
+    struct ghcb backup_ghcb;
406
+
407
+    /*
408
+     * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
409
+     * There is no need for it to be atomic, because nothing is written to
410
+     * the GHCB between the read and the write of ghcb_active. So it is safe
411
+     * to use it when a nested #VC exception happens before the write.
412
+     *
413
+     * This is necessary for example in the #VC->NMI->#VC case when the NMI
414
+     * happens while the first #VC handler uses the GHCB. When the NMI code
415
+     * raises a second #VC handler it might overwrite the contents of the
416
+     * GHCB written by the first handler. To avoid this the content of the
417
+     * GHCB is saved and restored when the GHCB is detected to be in use
418
+     * already.
419
+     */
420
+    bool ghcb_active;
421
+    bool backup_ghcb_active;
422
+
423
+    /*
424
+     * Cached DR7 value - write it on DR7 writes and return it on reads.
425
+     * That value will never make it to the real hardware DR7 as debugging
426
+     * is currently unsupported in SEV-ES guests.
427
+     */
428
+    unsigned long dr7;
429
+};
430
+
431
+struct ghcb_state {
432
+    struct ghcb *ghcb;
433
+};
434
+
435
+extern struct svsm_ca boot_svsm_ca_page;
436
+
437
+struct ghcb *__sev_get_ghcb(struct ghcb_state *state);
438
+void __sev_put_ghcb(struct ghcb_state *state);
439
+
440
+DECLARE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
441
+DECLARE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
442
+
443
+void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
444
+             unsigned long npages, enum psc_op op);
445
+
446
+void __noreturn sev_es_terminate(unsigned int set, unsigned int reason);
447
+
448
+DECLARE_PER_CPU(struct svsm_ca *, svsm_caa);
449
+DECLARE_PER_CPU(u64, svsm_caa_pa);
450
+
451
+extern struct svsm_ca *boot_svsm_caa;
452
+extern u64 boot_svsm_caa_pa;
453
+
454
+static __always_inline struct svsm_ca *svsm_get_caa(void)
455
+{
456
+    /*
457
+     * Use rIP-relative references when called early in the boot. If
458
+     * ->use_cas is set, then it is late in the boot and no need
459
+     * to worry about rIP-relative references.
460
+     */
461
+    if (RIP_REL_REF(sev_cfg).use_cas)
462
+        return this_cpu_read(svsm_caa);
463
+    else
464
+        return RIP_REL_REF(boot_svsm_caa);
465
+}
466
+
467
+static __always_inline u64 svsm_get_caa_pa(void)
468
+{
469
+    /*
470
+     * Use rIP-relative references when called early in the boot. If
471
+     * ->use_cas is set, then it is late in the boot and no need
472
+     * to worry about rIP-relative references.
473
+     */
474
+    if (RIP_REL_REF(sev_cfg).use_cas)
475
+        return this_cpu_read(svsm_caa_pa);
476
+    else
477
+        return RIP_REL_REF(boot_svsm_caa_pa);
478
+}
479
+
480
+int svsm_perform_call_protocol(struct svsm_call *call);
481
+
482
+static inline u64 sev_es_rd_ghcb_msr(void)
483
+{
484
+    return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
485
+}
486
+
487
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
488
+{
489
+    u32 low, high;
490
+
491
+    low = (u32)(val);
492
+    high = (u32)(val >> 32);
493
+
494
+    native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
495
+}
496
+
497
+enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
498
+                 struct es_em_ctxt *ctxt,
499
+                 u64 exit_code, u64 exit_info_1,
500
+                 u64 exit_info_2);
501
+
502
+void snp_register_ghcb_early(unsigned long paddr);
503
+bool sev_es_negotiate_protocol(void);
504
+bool sev_es_check_cpu_features(void);
505
+u64 get_hv_features(void);
506
+
507
+const struct snp_cpuid_table *snp_cpuid_get_table(void);
508
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
509
index XXXXXXX..XXXXXXX 100644
510
--- a/arch/x86/include/asm/sev.h
511
+++ b/arch/x86/include/asm/sev.h
512
@@ -XXX,XX +XXX,XX @@
513
#include <asm/sev-common.h>
514
#include <asm/coco.h>
515
#include <asm/set_memory.h>
516
+#include <asm/svm.h>
517
518
#define GHCB_PROTOCOL_MIN    1ULL
519
#define GHCB_PROTOCOL_MAX    2ULL
520
@@ -XXX,XX +XXX,XX @@ extern void vc_no_ghcb(void);
521
extern void vc_boot_ghcb(void);
522
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
523
524
+/*
525
+ * Individual entries of the SNP CPUID table, as defined by the SNP
526
+ * Firmware ABI, Revision 0.9, Section 7.1, Table 14.
527
+ */
528
+struct snp_cpuid_fn {
529
+    u32 eax_in;
530
+    u32 ecx_in;
531
+    u64 xcr0_in;
532
+    u64 xss_in;
533
+    u32 eax;
534
+    u32 ebx;
535
+    u32 ecx;
536
+    u32 edx;
537
+    u64 __reserved;
538
+} __packed;
539
+
540
+/*
541
+ * SNP CPUID table, as defined by the SNP Firmware ABI, Revision 0.9,
542
+ * Section 8.14.2.6. Also noted there is the SNP firmware-enforced limit
543
+ * of 64 entries per CPUID table.
544
+ */
545
+#define SNP_CPUID_COUNT_MAX 64
546
+
547
+struct snp_cpuid_table {
548
+    u32 count;
549
+    u32 __reserved1;
550
+    u64 __reserved2;
551
+    struct snp_cpuid_fn fn[SNP_CPUID_COUNT_MAX];
552
+} __packed;
553
+
554
/* PVALIDATE return codes */
555
#define PVALIDATE_FAIL_SIZEMISMATCH    6
556
557
@@ -XXX,XX +XXX,XX @@ int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req
558
void __init snp_secure_tsc_prepare(void);
559
void __init snp_secure_tsc_init(void);
560
561
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
562
+{
563
+    ghcb->save.sw_exit_code = 0;
564
+    __builtin_memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
565
+}
566
+
567
#else    /* !CONFIG_AMD_MEM_ENCRYPT */
568
569
#define snp_vmpl 0
570
--
571
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
The 5-level paging trampoline is used by both the EFI stub and the
3
Disentangle the SEV core code and the SEV code that is called during
4
traditional decompressor. Move it out of the decompressor sources into
4
early boot. The latter piece will be moved into startup/ in a subsequent
5
the newly minted arch/x86/boot/startup/ sub-directory which will hold
5
patch.
6
startup code that may be shared between the decompressor, the EFI stub
7
and the kernel proper, and needs to tolerate being called during early
8
boot, before the kernel virtual mapping has been created.
9
10
This will allow the 5-level paging trampoline to be used by EFI boot
11
images such as zboot that omit the traditional decompressor entirely.
12
6
13
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
7
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
14
---
8
---
15
arch/x86/Makefile | 1 +
9
arch/x86/boot/compressed/sev.c | 2 +
16
arch/x86/boot/compressed/Makefile | 2 +-
10
arch/x86/coco/sev/Makefile | 12 +-
17
arch/x86/boot/startup/Makefile | 3 +++
11
arch/x86/coco/sev/core.c | 1574 ++++----------------
18
arch/x86/boot/{compressed => startup}/la57toggle.S | 0
12
arch/x86/coco/sev/shared.c | 281 ----
19
4 files changed, 5 insertions(+), 1 deletion(-)
13
arch/x86/coco/sev/startup.c | 1395 +++++++++++++++++
14
5 files changed, 1658 insertions(+), 1606 deletions(-)
20
15
21
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
16
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
22
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
23
--- a/arch/x86/Makefile
18
--- a/arch/x86/boot/compressed/sev.c
24
+++ b/arch/x86/Makefile
19
+++ b/arch/x86/boot/compressed/sev.c
25
@@ -XXX,XX +XXX,XX @@ archprepare: $(cpufeaturemasks.hdr)
20
@@ -XXX,XX +XXX,XX @@ u64 svsm_get_caa_pa(void)
26
###
21
27
# Kernel objects
22
int svsm_perform_call_protocol(struct svsm_call *call);
28
23
29
+core-y += arch/x86/boot/startup/
24
+u8 snp_vmpl;
30
libs-y += arch/x86/lib/
25
+
31
26
/* Include code for early handlers */
32
# drivers-y are linked after core-y
27
#include "../../coco/sev/shared.c"
33
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
28
29
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
34
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
35
--- a/arch/x86/boot/compressed/Makefile
31
--- a/arch/x86/coco/sev/Makefile
36
+++ b/arch/x86/boot/compressed/Makefile
32
+++ b/arch/x86/coco/sev/Makefile
37
@@ -XXX,XX +XXX,XX @@ ifdef CONFIG_X86_64
33
@@ -XXX,XX +XXX,XX @@
38
    vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/mem_encrypt.o
34
# SPDX-License-Identifier: GPL-2.0
39
    vmlinux-objs-y += $(obj)/pgtable_64.o
35
40
    vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
36
-obj-y += core.o
41
-    vmlinux-objs-y += $(obj)/la57toggle.o
37
+obj-y += core.o startup.o
38
39
# jump tables are emitted using absolute references in non-PIC code
40
# so they cannot be used in the early SEV startup code
41
-CFLAGS_core.o += -fno-jump-tables
42
+CFLAGS_startup.o += -fno-jump-tables
43
44
ifdef CONFIG_FUNCTION_TRACER
45
-CFLAGS_REMOVE_core.o = -pg
46
+CFLAGS_REMOVE_startup.o = -pg
42
endif
47
endif
43
48
44
vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
49
-KASAN_SANITIZE_core.o    := n
45
@@ -XXX,XX +XXX,XX @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o
50
-KMSAN_SANITIZE_core.o    := n
46
51
-KCOV_INSTRUMENT_core.o    := n
47
vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o
52
+KASAN_SANITIZE_startup.o    := n
48
vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
53
+KMSAN_SANITIZE_startup.o    := n
49
+vmlinux-libs-$(CONFIG_X86_64)    += $(objtree)/arch/x86/boot/startup/lib.a
54
+KCOV_INSTRUMENT_startup.o    := n
50
55
51
$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE
56
# With some compiler versions the generated code results in boot hangs, caused
52
    $(call if_changed,ld)
57
# by several compilation units. To be safe, disable all instrumentation.
53
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
58
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/arch/x86/coco/sev/core.c
61
+++ b/arch/x86/coco/sev/core.c
62
@@ -XXX,XX +XXX,XX @@ static const char * const sev_status_feat_names[] = {
63
    [MSR_AMD64_SNP_SMT_PROT_BIT]        = "SMTProt",
64
};
65
66
-/* For early boot hypervisor communication in SEV-ES enabled guests */
67
-struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
68
-
69
-/*
70
- * Needs to be in the .data section because we need it NULL before bss is
71
- * cleared
72
- */
73
-struct ghcb *boot_ghcb __section(".data");
74
-
75
-/* Bitmap of SEV features supported by the hypervisor */
76
-u64 sev_hv_features __ro_after_init;
77
-
78
/* Secrets page physical address from the CC blob */
79
static u64 secrets_pa __ro_after_init;
80
81
@@ -XXX,XX +XXX,XX @@ static u64 snp_tsc_scale __ro_after_init;
82
static u64 snp_tsc_offset __ro_after_init;
83
static u64 snp_tsc_freq_khz __ro_after_init;
84
85
-
86
-/* For early boot SVSM communication */
87
-struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
88
-
89
DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
90
DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa);
91
-DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
92
-DEFINE_PER_CPU(u64, svsm_caa_pa);
93
+
94
+/*
95
+ * SVSM related information:
96
+ * When running under an SVSM, the VMPL that Linux is executing at must be
97
+ * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
98
+ */
99
+u8 snp_vmpl __ro_after_init;
100
+EXPORT_SYMBOL_GPL(snp_vmpl);
101
102
static __always_inline bool on_vc_stack(struct pt_regs *regs)
103
{
104
@@ -XXX,XX +XXX,XX @@ static __always_inline bool on_vc_stack(struct pt_regs *regs)
105
    return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
106
}
107
108
+
109
/*
110
* This function handles the case when an NMI is raised in the #VC
111
* exception handler entry code, before the #VC handler has switched off
112
@@ -XXX,XX +XXX,XX @@ void noinstr __sev_es_ist_exit(void)
113
    this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
114
}
115
116
-/*
117
- * Nothing shall interrupt this code path while holding the per-CPU
118
- * GHCB. The backup GHCB is only for NMIs interrupting this path.
119
- *
120
- * Callers must disable local interrupts around it.
121
- */
122
-noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
123
+static u64 __init get_snp_jump_table_addr(void)
124
{
125
-    struct sev_es_runtime_data *data;
126
-    struct ghcb *ghcb;
127
-
128
-    WARN_ON(!irqs_disabled());
129
-
130
-    data = this_cpu_read(runtime_data);
131
-    ghcb = &data->ghcb_page;
132
-
133
-    if (unlikely(data->ghcb_active)) {
134
-        /* GHCB is already in use - save its contents */
135
-
136
-        if (unlikely(data->backup_ghcb_active)) {
137
-            /*
138
-             * Backup-GHCB is also already in use. There is no way
139
-             * to continue here so just kill the machine. To make
140
-             * panic() work, mark GHCBs inactive so that messages
141
-             * can be printed out.
142
-             */
143
-            data->ghcb_active = false;
144
-            data->backup_ghcb_active = false;
145
-
146
-            instrumentation_begin();
147
-            panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
148
-            instrumentation_end();
149
-        }
150
-
151
-        /* Mark backup_ghcb active before writing to it */
152
-        data->backup_ghcb_active = true;
153
-
154
-        state->ghcb = &data->backup_ghcb;
155
+    struct snp_secrets_page *secrets;
156
+    void __iomem *mem;
157
+    u64 addr;
158
159
-        /* Backup GHCB content */
160
-        *state->ghcb = *ghcb;
161
-    } else {
162
-        state->ghcb = NULL;
163
-        data->ghcb_active = true;
164
+    mem = ioremap_encrypted(secrets_pa, PAGE_SIZE);
165
+    if (!mem) {
166
+        pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
167
+        return 0;
168
    }
169
170
-    return ghcb;
171
-}
172
+    secrets = (__force struct snp_secrets_page *)mem;
173
174
-static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
175
-                unsigned char *buffer)
176
-{
177
-    return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
178
+    addr = secrets->os_area.ap_jump_table_pa;
179
+    iounmap(mem);
180
+
181
+    return addr;
182
}
183
184
-static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
185
+void noinstr __sev_es_nmi_complete(void)
186
{
187
-    char buffer[MAX_INSN_SIZE];
188
-    int insn_bytes;
189
-
190
-    insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
191
-    if (insn_bytes == 0) {
192
-        /* Nothing could be copied */
193
-        ctxt->fi.vector = X86_TRAP_PF;
194
-        ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
195
-        ctxt->fi.cr2 = ctxt->regs->ip;
196
-        return ES_EXCEPTION;
197
-    } else if (insn_bytes == -EINVAL) {
198
-        /* Effective RIP could not be calculated */
199
-        ctxt->fi.vector = X86_TRAP_GP;
200
-        ctxt->fi.error_code = 0;
201
-        ctxt->fi.cr2 = 0;
202
-        return ES_EXCEPTION;
203
-    }
204
-
205
-    if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
206
-        return ES_DECODE_FAILED;
207
+    struct ghcb_state state;
208
+    struct ghcb *ghcb;
209
210
-    if (ctxt->insn.immediate.got)
211
-        return ES_OK;
212
-    else
213
-        return ES_DECODE_FAILED;
214
-}
215
+    ghcb = __sev_get_ghcb(&state);
216
217
-static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
218
-{
219
-    char buffer[MAX_INSN_SIZE];
220
-    int res, ret;
221
+    vc_ghcb_invalidate(ghcb);
222
+    ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
223
+    ghcb_set_sw_exit_info_1(ghcb, 0);
224
+    ghcb_set_sw_exit_info_2(ghcb, 0);
225
226
-    res = vc_fetch_insn_kernel(ctxt, buffer);
227
-    if (res) {
228
-        ctxt->fi.vector = X86_TRAP_PF;
229
-        ctxt->fi.error_code = X86_PF_INSTR;
230
-        ctxt->fi.cr2 = ctxt->regs->ip;
231
-        return ES_EXCEPTION;
232
-    }
233
+    sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
234
+    VMGEXIT();
235
236
-    ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
237
-    if (ret < 0)
238
-        return ES_DECODE_FAILED;
239
-    else
240
-        return ES_OK;
241
+    __sev_put_ghcb(&state);
242
}
243
244
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
245
+static u64 __init get_jump_table_addr(void)
246
{
247
-    if (user_mode(ctxt->regs))
248
-        return __vc_decode_user_insn(ctxt);
249
-    else
250
-        return __vc_decode_kern_insn(ctxt);
251
-}
252
+    struct ghcb_state state;
253
+    unsigned long flags;
254
+    struct ghcb *ghcb;
255
+    u64 ret = 0;
256
257
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
258
-                 char *dst, char *buf, size_t size)
259
-{
260
-    unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
261
+    if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
262
+        return get_snp_jump_table_addr();
263
264
-    /*
265
-     * This function uses __put_user() independent of whether kernel or user
266
-     * memory is accessed. This works fine because __put_user() does no
267
-     * sanity checks of the pointer being accessed. All that it does is
268
-     * to report when the access failed.
269
-     *
270
-     * Also, this function runs in atomic context, so __put_user() is not
271
-     * allowed to sleep. The page-fault handler detects that it is running
272
-     * in atomic context and will not try to take mmap_sem and handle the
273
-     * fault, so additional pagefault_enable()/disable() calls are not
274
-     * needed.
275
-     *
276
-     * The access can't be done via copy_to_user() here because
277
-     * vc_write_mem() must not use string instructions to access unsafe
278
-     * memory. The reason is that MOVS is emulated by the #VC handler by
279
-     * splitting the move up into a read and a write and taking a nested #VC
280
-     * exception on whatever of them is the MMIO access. Using string
281
-     * instructions here would cause infinite nesting.
282
-     */
283
-    switch (size) {
284
-    case 1: {
285
-        u8 d1;
286
-        u8 __user *target = (u8 __user *)dst;
287
-
288
-        memcpy(&d1, buf, 1);
289
-        if (__put_user(d1, target))
290
-            goto fault;
291
-        break;
292
-    }
293
-    case 2: {
294
-        u16 d2;
295
-        u16 __user *target = (u16 __user *)dst;
296
+    local_irq_save(flags);
297
298
-        memcpy(&d2, buf, 2);
299
-        if (__put_user(d2, target))
300
-            goto fault;
301
-        break;
302
-    }
303
-    case 4: {
304
-        u32 d4;
305
-        u32 __user *target = (u32 __user *)dst;
306
+    ghcb = __sev_get_ghcb(&state);
307
308
-        memcpy(&d4, buf, 4);
309
-        if (__put_user(d4, target))
310
-            goto fault;
311
-        break;
312
-    }
313
-    case 8: {
314
-        u64 d8;
315
-        u64 __user *target = (u64 __user *)dst;
316
+    vc_ghcb_invalidate(ghcb);
317
+    ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
318
+    ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
319
+    ghcb_set_sw_exit_info_2(ghcb, 0);
320
321
-        memcpy(&d8, buf, 8);
322
-        if (__put_user(d8, target))
323
-            goto fault;
324
-        break;
325
-    }
326
-    default:
327
-        WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
328
-        return ES_UNSUPPORTED;
329
-    }
330
+    sev_es_wr_ghcb_msr(__pa(ghcb));
331
+    VMGEXIT();
332
333
-    return ES_OK;
334
+    if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
335
+     ghcb_sw_exit_info_2_is_valid(ghcb))
336
+        ret = ghcb->save.sw_exit_info_2;
337
338
-fault:
339
-    if (user_mode(ctxt->regs))
340
-        error_code |= X86_PF_USER;
341
+    __sev_put_ghcb(&state);
342
343
-    ctxt->fi.vector = X86_TRAP_PF;
344
-    ctxt->fi.error_code = error_code;
345
-    ctxt->fi.cr2 = (unsigned long)dst;
346
+    local_irq_restore(flags);
347
348
-    return ES_EXCEPTION;
349
+    return ret;
350
}
351
352
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
353
-                 char *src, char *buf, size_t size)
354
+static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
355
+                 int ret, u64 svsm_ret)
356
{
357
-    unsigned long error_code = X86_PF_PROT;
358
-
359
-    /*
360
-     * This function uses __get_user() independent of whether kernel or user
361
-     * memory is accessed. This works fine because __get_user() does no
362
-     * sanity checks of the pointer being accessed. All that it does is
363
-     * to report when the access failed.
364
-     *
365
-     * Also, this function runs in atomic context, so __get_user() is not
366
-     * allowed to sleep. The page-fault handler detects that it is running
367
-     * in atomic context and will not try to take mmap_sem and handle the
368
-     * fault, so additional pagefault_enable()/disable() calls are not
369
-     * needed.
370
-     *
371
-     * The access can't be done via copy_from_user() here because
372
-     * vc_read_mem() must not use string instructions to access unsafe
373
-     * memory. The reason is that MOVS is emulated by the #VC handler by
374
-     * splitting the move up into a read and a write and taking a nested #VC
375
-     * exception on whatever of them is the MMIO access. Using string
376
-     * instructions here would cause infinite nesting.
377
-     */
378
-    switch (size) {
379
-    case 1: {
380
-        u8 d1;
381
-        u8 __user *s = (u8 __user *)src;
382
-
383
-        if (__get_user(d1, s))
384
-            goto fault;
385
-        memcpy(buf, &d1, 1);
386
-        break;
387
-    }
388
-    case 2: {
389
-        u16 d2;
390
-        u16 __user *s = (u16 __user *)src;
391
-
392
-        if (__get_user(d2, s))
393
-            goto fault;
394
-        memcpy(buf, &d2, 2);
395
-        break;
396
-    }
397
-    case 4: {
398
-        u32 d4;
399
-        u32 __user *s = (u32 __user *)src;
400
-
401
-        if (__get_user(d4, s))
402
-            goto fault;
403
-        memcpy(buf, &d4, 4);
404
-        break;
405
-    }
406
-    case 8: {
407
-        u64 d8;
408
-        u64 __user *s = (u64 __user *)src;
409
-        if (__get_user(d8, s))
410
-            goto fault;
411
-        memcpy(buf, &d8, 8);
412
-        break;
413
-    }
414
-    default:
415
-        WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
416
-        return ES_UNSUPPORTED;
417
-    }
418
+    WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
419
+     pfn, action, page_size, ret, svsm_ret);
420
421
-    return ES_OK;
422
+    sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
423
+}
424
425
-fault:
426
-    if (user_mode(ctxt->regs))
427
-        error_code |= X86_PF_USER;
428
+static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
429
+{
430
+    unsigned int page_size;
431
+    bool action;
432
+    u64 pfn;
433
434
-    ctxt->fi.vector = X86_TRAP_PF;
435
-    ctxt->fi.error_code = error_code;
436
-    ctxt->fi.cr2 = (unsigned long)src;
437
+    pfn = pc->entry[pc->cur_index].pfn;
438
+    action = pc->entry[pc->cur_index].action;
439
+    page_size = pc->entry[pc->cur_index].page_size;
440
441
-    return ES_EXCEPTION;
442
+    __pval_terminate(pfn, action, page_size, ret, svsm_ret);
443
}
444
445
-static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
446
-                     unsigned long vaddr, phys_addr_t *paddr)
447
+static void pval_pages(struct snp_psc_desc *desc)
448
{
449
-    unsigned long va = (unsigned long)vaddr;
450
-    unsigned int level;
451
-    phys_addr_t pa;
452
-    pgd_t *pgd;
453
-    pte_t *pte;
454
-
455
-    pgd = __va(read_cr3_pa());
456
-    pgd = &pgd[pgd_index(va)];
457
-    pte = lookup_address_in_pgd(pgd, va, &level);
458
-    if (!pte) {
459
-        ctxt->fi.vector = X86_TRAP_PF;
460
-        ctxt->fi.cr2 = vaddr;
461
-        ctxt->fi.error_code = 0;
462
-
463
-        if (user_mode(ctxt->regs))
464
-            ctxt->fi.error_code |= X86_PF_USER;
465
+    struct psc_entry *e;
466
+    unsigned long vaddr;
467
+    unsigned int size;
468
+    unsigned int i;
469
+    bool validate;
470
+    u64 pfn;
471
+    int rc;
472
473
-        return ES_EXCEPTION;
474
-    }
475
+    for (i = 0; i <= desc->hdr.end_entry; i++) {
476
+        e = &desc->entries[i];
477
478
-    if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
479
-        /* Emulated MMIO to/from encrypted memory not supported */
480
-        return ES_UNSUPPORTED;
481
+        pfn = e->gfn;
482
+        vaddr = (unsigned long)pfn_to_kaddr(pfn);
483
+        size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
484
+        validate = e->operation == SNP_PAGE_STATE_PRIVATE;
485
486
-    pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
487
-    pa |= va & ~page_level_mask(level);
488
+        rc = pvalidate(vaddr, size, validate);
489
+        if (!rc)
490
+            continue;
491
492
-    *paddr = pa;
493
+        if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
494
+            unsigned long vaddr_end = vaddr + PMD_SIZE;
495
496
-    return ES_OK;
497
+            for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
498
+                rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
499
+                if (rc)
500
+                    __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
501
+            }
502
+        } else {
503
+            __pval_terminate(pfn, validate, size, rc, 0);
504
+        }
505
+    }
506
}
507
508
-static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
509
+static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
510
+                    struct svsm_pvalidate_call *pc)
511
{
512
-    BUG_ON(size > 4);
513
+    struct svsm_pvalidate_entry *pe;
514
515
-    if (user_mode(ctxt->regs)) {
516
-        struct thread_struct *t = &current->thread;
517
-        struct io_bitmap *iobm = t->io_bitmap;
518
-        size_t idx;
519
+    /* Nothing in the CA yet */
520
+    pc->num_entries = 0;
521
+    pc->cur_index = 0;
522
523
-        if (!iobm)
524
-            goto fault;
525
+    pe = &pc->entry[0];
526
527
-        for (idx = port; idx < port + size; ++idx) {
528
-            if (test_bit(idx, iobm->bitmap))
529
-                goto fault;
530
-        }
531
-    }
532
+    while (pfn < pfn_end) {
533
+        pe->page_size = RMP_PG_SIZE_4K;
534
+        pe->action = action;
535
+        pe->ignore_cf = 0;
536
+        pe->pfn = pfn;
537
538
-    return ES_OK;
539
+        pe++;
540
+        pfn++;
541
542
-fault:
543
-    ctxt->fi.vector = X86_TRAP_GP;
544
-    ctxt->fi.error_code = 0;
545
+        pc->num_entries++;
546
+        if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
547
+            break;
548
+    }
549
550
-    return ES_EXCEPTION;
551
+    return pfn;
552
}
553
554
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
555
+static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
556
+                 struct svsm_pvalidate_call *pc)
557
{
558
-    long error_code = ctxt->fi.error_code;
559
-    int trapnr = ctxt->fi.vector;
560
-
561
-    ctxt->regs->orig_ax = ctxt->fi.error_code;
562
-
563
-    switch (trapnr) {
564
-    case X86_TRAP_GP:
565
-        exc_general_protection(ctxt->regs, error_code);
566
-        break;
567
-    case X86_TRAP_UD:
568
-        exc_invalid_op(ctxt->regs);
569
-        break;
570
-    case X86_TRAP_PF:
571
-        write_cr2(ctxt->fi.cr2);
572
-        exc_page_fault(ctxt->regs, error_code);
573
-        break;
574
-    case X86_TRAP_AC:
575
-        exc_alignment_check(ctxt->regs, error_code);
576
-        break;
577
-    default:
578
-        pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
579
-        BUG();
580
-    }
581
-}
582
+    struct svsm_pvalidate_entry *pe;
583
+    struct psc_entry *e;
584
585
-/* Include code shared with pre-decompression boot stage */
586
-#include "shared.c"
587
+    /* Nothing in the CA yet */
588
+    pc->num_entries = 0;
589
+    pc->cur_index = 0;
590
591
-noinstr void __sev_put_ghcb(struct ghcb_state *state)
592
-{
593
-    struct sev_es_runtime_data *data;
594
-    struct ghcb *ghcb;
595
+    pe = &pc->entry[0];
596
+    e = &desc->entries[desc_entry];
597
598
-    WARN_ON(!irqs_disabled());
599
+    while (desc_entry <= desc->hdr.end_entry) {
600
+        pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
601
+        pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
602
+        pe->ignore_cf = 0;
603
+        pe->pfn = e->gfn;
604
605
-    data = this_cpu_read(runtime_data);
606
-    ghcb = &data->ghcb_page;
607
+        pe++;
608
+        e++;
609
610
-    if (state->ghcb) {
611
-        /* Restore GHCB from Backup */
612
-        *ghcb = *state->ghcb;
613
-        data->backup_ghcb_active = false;
614
-        state->ghcb = NULL;
615
-    } else {
616
-        /*
617
-         * Invalidate the GHCB so a VMGEXIT instruction issued
618
-         * from userspace won't appear to be valid.
619
-         */
620
-        vc_ghcb_invalidate(ghcb);
621
-        data->ghcb_active = false;
622
+        desc_entry++;
623
+        pc->num_entries++;
624
+        if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
625
+            break;
626
    }
627
+
628
+    return desc_entry;
629
}
630
631
-int svsm_perform_call_protocol(struct svsm_call *call)
632
+static void svsm_pval_pages(struct snp_psc_desc *desc)
633
{
634
-    struct ghcb_state state;
635
+    struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
636
+    unsigned int i, pv_4k_count = 0;
637
+    struct svsm_pvalidate_call *pc;
638
+    struct svsm_call call = {};
639
    unsigned long flags;
640
-    struct ghcb *ghcb;
641
+    bool action;
642
+    u64 pc_pa;
643
    int ret;
644
645
    /*
646
@@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call)
647
    flags = native_local_irq_save();
648
649
    /*
650
-     * Use rip-relative references when called early in the boot. If
651
-     * ghcbs_initialized is set, then it is late in the boot and no need
652
-     * to worry about rip-relative references in called functions.
653
+     * The SVSM calling area (CA) can support processing 510 entries at a
654
+     * time. Loop through the Page State Change descriptor until the CA is
655
+     * full or the last entry in the descriptor is reached, at which time
656
+     * the SVSM is invoked. This repeats until all entries in the descriptor
657
+     * are processed.
658
     */
659
-    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
660
-        ghcb = __sev_get_ghcb(&state);
661
-    else if (RIP_REL_REF(boot_ghcb))
662
-        ghcb = RIP_REL_REF(boot_ghcb);
663
-    else
664
-        ghcb = NULL;
665
+    call.caa = svsm_get_caa();
666
667
-    do {
668
-        ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
669
-             : svsm_perform_msr_protocol(call);
670
-    } while (ret == -EAGAIN);
671
+    pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
672
+    pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
673
674
-    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
675
-        __sev_put_ghcb(&state);
676
+    /* Protocol 0, Call ID 1 */
677
+    call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
678
+    call.rcx = pc_pa;
679
680
-    native_local_irq_restore(flags);
681
+    for (i = 0; i <= desc->hdr.end_entry;) {
682
+        i = svsm_build_ca_from_psc_desc(desc, i, pc);
683
684
-    return ret;
685
-}
686
+        do {
687
+            ret = svsm_perform_call_protocol(&call);
688
+            if (!ret)
689
+                continue;
690
691
-void noinstr __sev_es_nmi_complete(void)
692
-{
693
-    struct ghcb_state state;
694
-    struct ghcb *ghcb;
695
+            /*
696
+             * Check if the entry failed because of an RMP mismatch (a
697
+             * PVALIDATE at 2M was requested, but the page is mapped in
698
+             * the RMP as 4K).
699
+             */
700
701
-    ghcb = __sev_get_ghcb(&state);
702
+            if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
703
+             pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
704
+                /* Save this entry for post-processing at 4K */
705
+                pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
706
+
707
+                /* Skip to the next one unless at the end of the list */
708
+                pc->cur_index++;
709
+                if (pc->cur_index < pc->num_entries)
710
+                    ret = -EAGAIN;
711
+                else
712
+                    ret = 0;
713
+            }
714
+        } while (ret == -EAGAIN);
715
716
-    vc_ghcb_invalidate(ghcb);
717
-    ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
718
-    ghcb_set_sw_exit_info_1(ghcb, 0);
719
-    ghcb_set_sw_exit_info_2(ghcb, 0);
720
+        if (ret)
721
+            svsm_pval_terminate(pc, ret, call.rax_out);
722
+    }
723
724
-    sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
725
-    VMGEXIT();
726
+    /* Process any entries that failed to be validated at 2M and validate them at 4K */
727
+    for (i = 0; i < pv_4k_count; i++) {
728
+        u64 pfn, pfn_end;
729
730
-    __sev_put_ghcb(&state);
731
-}
732
+        action = pv_4k[i].action;
733
+        pfn = pv_4k[i].pfn;
734
+        pfn_end = pfn + 512;
735
736
-static u64 __init get_snp_jump_table_addr(void)
737
-{
738
-    struct snp_secrets_page *secrets;
739
-    void __iomem *mem;
740
-    u64 addr;
741
+        while (pfn < pfn_end) {
742
+            pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
743
744
-    mem = ioremap_encrypted(secrets_pa, PAGE_SIZE);
745
-    if (!mem) {
746
-        pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n");
747
-        return 0;
748
+            ret = svsm_perform_call_protocol(&call);
749
+            if (ret)
750
+                svsm_pval_terminate(pc, ret, call.rax_out);
751
+        }
752
    }
753
754
-    secrets = (__force struct snp_secrets_page *)mem;
755
-
756
-    addr = secrets->os_area.ap_jump_table_pa;
757
-    iounmap(mem);
758
-
759
-    return addr;
760
+    native_local_irq_restore(flags);
761
}
762
763
-static u64 __init get_jump_table_addr(void)
764
+static void pvalidate_pages(struct snp_psc_desc *desc)
765
{
766
-    struct ghcb_state state;
767
-    unsigned long flags;
768
-    struct ghcb *ghcb;
769
-    u64 ret = 0;
770
-
771
-    if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
772
-        return get_snp_jump_table_addr();
773
-
774
-    local_irq_save(flags);
775
+    if (snp_vmpl)
776
+        svsm_pval_pages(desc);
777
+    else
778
+        pval_pages(desc);
779
+}
780
781
-    ghcb = __sev_get_ghcb(&state);
782
+static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
783
+{
784
+    int cur_entry, end_entry, ret = 0;
785
+    struct snp_psc_desc *data;
786
+    struct es_em_ctxt ctxt;
787
788
    vc_ghcb_invalidate(ghcb);
789
-    ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
790
-    ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
791
-    ghcb_set_sw_exit_info_2(ghcb, 0);
792
-
793
-    sev_es_wr_ghcb_msr(__pa(ghcb));
794
-    VMGEXIT();
795
-
796
-    if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
797
-     ghcb_sw_exit_info_2_is_valid(ghcb))
798
-        ret = ghcb->save.sw_exit_info_2;
799
-
800
-    __sev_put_ghcb(&state);
801
802
-    local_irq_restore(flags);
803
-
804
-    return ret;
805
-}
806
+    /* Copy the input desc into GHCB shared buffer */
807
+    data = (struct snp_psc_desc *)ghcb->shared_buffer;
808
+    memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
809
810
-void __head
811
-early_set_pages_state(unsigned long vaddr, unsigned long paddr,
812
-         unsigned long npages, enum psc_op op)
813
-{
814
-    unsigned long paddr_end;
815
-    u64 val;
816
-
817
-    vaddr = vaddr & PAGE_MASK;
818
+    /*
819
+     * As per the GHCB specification, the hypervisor can resume the guest
820
+     * before processing all the entries. Check whether all the entries
821
+     * are processed. If not, then keep retrying. Note, the hypervisor
822
+     * will update the data memory directly to indicate the status, so
823
+     * reference the data->hdr everywhere.
824
+     *
825
+     * The strategy here is to wait for the hypervisor to change the page
826
+     * state in the RMP table before guest accesses the memory pages. If the
827
+     * page state change was not successful, then later memory access will
828
+     * result in a crash.
829
+     */
830
+    cur_entry = data->hdr.cur_entry;
831
+    end_entry = data->hdr.end_entry;
832
833
-    paddr = paddr & PAGE_MASK;
834
-    paddr_end = paddr + (npages << PAGE_SHIFT);
835
+    while (data->hdr.cur_entry <= data->hdr.end_entry) {
836
+        ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
837
838
-    while (paddr < paddr_end) {
839
-        /* Page validation must be rescinded before changing to shared */
840
-        if (op == SNP_PAGE_STATE_SHARED)
841
-            pvalidate_4k_page(vaddr, paddr, false);
842
+        /* This will advance the shared buffer data points to. */
843
+        ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
844
845
        /*
846
-         * Use the MSR protocol because this function can be called before
847
-         * the GHCB is established.
848
+         * Page State Change VMGEXIT can pass error code through
849
+         * exit_info_2.
850
         */
851
-        sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
852
-        VMGEXIT();
853
-
854
-        val = sev_es_rd_ghcb_msr();
855
-
856
-        if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
857
-            goto e_term;
858
-
859
-        if (GHCB_MSR_PSC_RESP_VAL(val))
860
-            goto e_term;
861
+        if (WARN(ret || ghcb->save.sw_exit_info_2,
862
+             "SNP: PSC failed ret=%d exit_info_2=%llx\n",
863
+             ret, ghcb->save.sw_exit_info_2)) {
864
+            ret = 1;
865
+            goto out;
866
+        }
867
868
-        /* Page validation must be performed after changing to private */
869
-        if (op == SNP_PAGE_STATE_PRIVATE)
870
-            pvalidate_4k_page(vaddr, paddr, true);
871
+        /* Verify that reserved bit is not set */
872
+        if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
873
+            ret = 1;
874
+            goto out;
875
+        }
876
877
-        vaddr += PAGE_SIZE;
878
-        paddr += PAGE_SIZE;
879
+        /*
880
+         * Sanity check that entry processing is not going backwards.
881
+         * This will happen only if hypervisor is tricking us.
882
+         */
883
+        if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
884
+"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
885
+             end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
886
+            ret = 1;
887
+            goto out;
888
+        }
889
    }
890
891
-    return;
892
-
893
-e_term:
894
-    sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
895
+out:
896
+    return ret;
897
}
898
899
-void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
900
-                     unsigned long npages)
901
-{
902
-    /*
903
-     * This can be invoked in early boot while running identity mapped, so
904
-     * use an open coded check for SNP instead of using cc_platform_has().
905
-     * This eliminates worries about jump tables or checking boot_cpu_data
906
-     * in the cc_platform_has() function.
907
-     */
908
-    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
909
-        return;
910
-
911
-     /*
912
-     * Ask the hypervisor to mark the memory pages as private in the RMP
913
-     * table.
914
-     */
915
-    early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
916
-}
917
-
918
-void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
919
-                    unsigned long npages)
920
-{
921
-    /*
922
-     * This can be invoked in early boot while running identity mapped, so
923
-     * use an open coded check for SNP instead of using cc_platform_has().
924
-     * This eliminates worries about jump tables or checking boot_cpu_data
925
-     * in the cc_platform_has() function.
926
-     */
927
-    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
928
-        return;
929
-
930
-     /* Ask hypervisor to mark the memory pages shared in the RMP table. */
931
-    early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
932
-}
933
-
934
-static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
935
-                 unsigned long vaddr_end, int op)
936
+static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr,
937
+                 unsigned long vaddr_end, int op)
938
{
939
    struct ghcb_state state;
940
    bool use_large_entry;
941
@@ -XXX,XX +XXX,XX @@ int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
942
    return 0;
943
}
944
945
-/* Writes to the SVSM CAA MSR are ignored */
946
-static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
947
-{
948
-    if (write)
949
-        return ES_OK;
950
-
951
-    regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
952
-    regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
953
-
954
-    return ES_OK;
955
-}
956
-
957
-/*
958
- * TSC related accesses should not exit to the hypervisor when a guest is
959
- * executing with Secure TSC enabled, so special handling is required for
960
- * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
961
- */
962
-static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write)
963
-{
964
-    u64 tsc;
965
-
966
-    /*
967
-     * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled.
968
-     * Terminate the SNP guest when the interception is enabled.
969
-     */
970
-    if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
971
-        return ES_VMM_ERROR;
972
-
973
-    /*
974
-     * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC
975
-     * to return undefined values, so ignore all writes.
976
-     *
977
-     * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use
978
-     * the value returned by rdtsc_ordered().
979
-     */
980
-    if (write) {
981
-        WARN_ONCE(1, "TSC MSR writes are verboten!\n");
982
-        return ES_OK;
983
-    }
984
-
985
-    tsc = rdtsc_ordered();
986
-    regs->ax = lower_32_bits(tsc);
987
-    regs->dx = upper_32_bits(tsc);
988
-
989
-    return ES_OK;
990
-}
991
-
992
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
993
-{
994
-    struct pt_regs *regs = ctxt->regs;
995
-    enum es_result ret;
996
-    bool write;
997
-
998
-    /* Is it a WRMSR? */
999
-    write = ctxt->insn.opcode.bytes[1] == 0x30;
1000
-
1001
-    switch (regs->cx) {
1002
-    case MSR_SVSM_CAA:
1003
-        return __vc_handle_msr_caa(regs, write);
1004
-    case MSR_IA32_TSC:
1005
-    case MSR_AMD64_GUEST_TSC_FREQ:
1006
-        if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
1007
-            return __vc_handle_secure_tsc_msrs(regs, write);
1008
-        break;
1009
-    default:
1010
-        break;
1011
-    }
1012
-
1013
-    ghcb_set_rcx(ghcb, regs->cx);
1014
-    if (write) {
1015
-        ghcb_set_rax(ghcb, regs->ax);
1016
-        ghcb_set_rdx(ghcb, regs->dx);
1017
-    }
1018
-
1019
-    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
1020
-
1021
-    if ((ret == ES_OK) && !write) {
1022
-        regs->ax = ghcb->save.rax;
1023
-        regs->dx = ghcb->save.rdx;
1024
-    }
1025
-
1026
-    return ret;
1027
-}
1028
-
1029
static void snp_register_per_cpu_ghcb(void)
1030
{
1031
    struct sev_es_runtime_data *data;
1032
@@ -XXX,XX +XXX,XX @@ void __init sev_es_init_vc_handling(void)
1033
    initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
1034
}
1035
1036
-static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
1037
-{
1038
-    int trapnr = ctxt->fi.vector;
1039
-
1040
-    if (trapnr == X86_TRAP_PF)
1041
-        native_write_cr2(ctxt->fi.cr2);
1042
-
1043
-    ctxt->regs->orig_ax = ctxt->fi.error_code;
1044
-    do_early_exception(ctxt->regs, trapnr);
1045
-}
1046
-
1047
-static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
1048
-{
1049
-    long *reg_array;
1050
-    int offset;
1051
-
1052
-    reg_array = (long *)ctxt->regs;
1053
-    offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
1054
-
1055
-    if (offset < 0)
1056
-        return NULL;
1057
-
1058
-    offset /= sizeof(long);
1059
-
1060
-    return reg_array + offset;
1061
-}
1062
-static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
1063
-                 unsigned int bytes, bool read)
1064
-{
1065
-    u64 exit_code, exit_info_1, exit_info_2;
1066
-    unsigned long ghcb_pa = __pa(ghcb);
1067
-    enum es_result res;
1068
-    phys_addr_t paddr;
1069
-    void __user *ref;
1070
-
1071
-    ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
1072
-    if (ref == (void __user *)-1L)
1073
-        return ES_UNSUPPORTED;
1074
-
1075
-    exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
1076
-
1077
-    res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
1078
-    if (res != ES_OK) {
1079
-        if (res == ES_EXCEPTION && !read)
1080
-            ctxt->fi.error_code |= X86_PF_WRITE;
1081
-
1082
-        return res;
1083
-    }
1084
-
1085
-    exit_info_1 = paddr;
1086
-    /* Can never be greater than 8 */
1087
-    exit_info_2 = bytes;
1088
-
1089
-    ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
1090
-
1091
-    return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
1092
-}
1093
-
1094
-/*
1095
- * The MOVS instruction has two memory operands, which raises the
1096
- * problem that it is not known whether the access to the source or the
1097
- * destination caused the #VC exception (and hence whether an MMIO read
1098
- * or write operation needs to be emulated).
1099
- *
1100
- * Instead of playing games with walking page-tables and trying to guess
1101
- * whether the source or destination is an MMIO range, split the move
1102
- * into two operations, a read and a write with only one memory operand.
1103
- * This will cause a nested #VC exception on the MMIO address which can
1104
- * then be handled.
1105
- *
1106
- * This implementation has the benefit that it also supports MOVS where
1107
- * source _and_ destination are MMIO regions.
1108
- *
1109
- * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
1110
- * rare operation. If it turns out to be a performance problem the split
1111
- * operations can be moved to memcpy_fromio() and memcpy_toio().
1112
- */
1113
-static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
1114
-                     unsigned int bytes)
1115
-{
1116
-    unsigned long ds_base, es_base;
1117
-    unsigned char *src, *dst;
1118
-    unsigned char buffer[8];
1119
-    enum es_result ret;
1120
-    bool rep;
1121
-    int off;
1122
-
1123
-    ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
1124
-    es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
1125
-
1126
-    if (ds_base == -1L || es_base == -1L) {
1127
-        ctxt->fi.vector = X86_TRAP_GP;
1128
-        ctxt->fi.error_code = 0;
1129
-        return ES_EXCEPTION;
1130
-    }
1131
-
1132
-    src = ds_base + (unsigned char *)ctxt->regs->si;
1133
-    dst = es_base + (unsigned char *)ctxt->regs->di;
1134
-
1135
-    ret = vc_read_mem(ctxt, src, buffer, bytes);
1136
-    if (ret != ES_OK)
1137
-        return ret;
1138
-
1139
-    ret = vc_write_mem(ctxt, dst, buffer, bytes);
1140
-    if (ret != ES_OK)
1141
-        return ret;
1142
-
1143
-    if (ctxt->regs->flags & X86_EFLAGS_DF)
1144
-        off = -bytes;
1145
-    else
1146
-        off = bytes;
1147
-
1148
-    ctxt->regs->si += off;
1149
-    ctxt->regs->di += off;
1150
-
1151
-    rep = insn_has_rep_prefix(&ctxt->insn);
1152
-    if (rep)
1153
-        ctxt->regs->cx -= 1;
1154
-
1155
-    if (!rep || ctxt->regs->cx == 0)
1156
-        return ES_OK;
1157
-    else
1158
-        return ES_RETRY;
1159
-}
1160
-
1161
-static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
1162
-{
1163
-    struct insn *insn = &ctxt->insn;
1164
-    enum insn_mmio_type mmio;
1165
-    unsigned int bytes = 0;
1166
-    enum es_result ret;
1167
-    u8 sign_byte;
1168
-    long *reg_data;
1169
-
1170
-    mmio = insn_decode_mmio(insn, &bytes);
1171
-    if (mmio == INSN_MMIO_DECODE_FAILED)
1172
-        return ES_DECODE_FAILED;
1173
-
1174
-    if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
1175
-        reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
1176
-        if (!reg_data)
1177
-            return ES_DECODE_FAILED;
1178
-    }
1179
-
1180
-    if (user_mode(ctxt->regs))
1181
-        return ES_UNSUPPORTED;
1182
-
1183
-    switch (mmio) {
1184
-    case INSN_MMIO_WRITE:
1185
-        memcpy(ghcb->shared_buffer, reg_data, bytes);
1186
-        ret = vc_do_mmio(ghcb, ctxt, bytes, false);
1187
-        break;
1188
-    case INSN_MMIO_WRITE_IMM:
1189
-        memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
1190
-        ret = vc_do_mmio(ghcb, ctxt, bytes, false);
1191
-        break;
1192
-    case INSN_MMIO_READ:
1193
-        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
1194
-        if (ret)
1195
-            break;
1196
-
1197
-        /* Zero-extend for 32-bit operation */
1198
-        if (bytes == 4)
1199
-            *reg_data = 0;
1200
-
1201
-        memcpy(reg_data, ghcb->shared_buffer, bytes);
1202
-        break;
1203
-    case INSN_MMIO_READ_ZERO_EXTEND:
1204
-        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
1205
-        if (ret)
1206
-            break;
1207
-
1208
-        /* Zero extend based on operand size */
1209
-        memset(reg_data, 0, insn->opnd_bytes);
1210
-        memcpy(reg_data, ghcb->shared_buffer, bytes);
1211
-        break;
1212
-    case INSN_MMIO_READ_SIGN_EXTEND:
1213
-        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
1214
-        if (ret)
1215
-            break;
1216
-
1217
-        if (bytes == 1) {
1218
-            u8 *val = (u8 *)ghcb->shared_buffer;
1219
-
1220
-            sign_byte = (*val & 0x80) ? 0xff : 0x00;
1221
-        } else {
1222
-            u16 *val = (u16 *)ghcb->shared_buffer;
1223
-
1224
-            sign_byte = (*val & 0x8000) ? 0xff : 0x00;
1225
-        }
1226
-
1227
-        /* Sign extend based on operand size */
1228
-        memset(reg_data, sign_byte, insn->opnd_bytes);
1229
-        memcpy(reg_data, ghcb->shared_buffer, bytes);
1230
-        break;
1231
-    case INSN_MMIO_MOVS:
1232
-        ret = vc_handle_mmio_movs(ctxt, bytes);
1233
-        break;
1234
-    default:
1235
-        ret = ES_UNSUPPORTED;
1236
-        break;
1237
-    }
1238
-
1239
-    return ret;
1240
-}
1241
-
1242
-static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
1243
-                     struct es_em_ctxt *ctxt)
1244
-{
1245
-    struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
1246
-    long val, *reg = vc_insn_get_rm(ctxt);
1247
-    enum es_result ret;
1248
-
1249
-    if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
1250
-        return ES_VMM_ERROR;
1251
-
1252
-    if (!reg)
1253
-        return ES_DECODE_FAILED;
1254
-
1255
-    val = *reg;
1256
-
1257
-    /* Upper 32 bits must be written as zeroes */
1258
-    if (val >> 32) {
1259
-        ctxt->fi.vector = X86_TRAP_GP;
1260
-        ctxt->fi.error_code = 0;
1261
-        return ES_EXCEPTION;
1262
-    }
1263
-
1264
-    /* Clear out other reserved bits and set bit 10 */
1265
-    val = (val & 0xffff23ffL) | BIT(10);
1266
-
1267
-    /* Early non-zero writes to DR7 are not supported */
1268
-    if (!data && (val & ~DR7_RESET_VALUE))
1269
-        return ES_UNSUPPORTED;
1270
-
1271
-    /* Using a value of 0 for ExitInfo1 means RAX holds the value */
1272
-    ghcb_set_rax(ghcb, val);
1273
-    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
1274
-    if (ret != ES_OK)
1275
-        return ret;
1276
-
1277
-    if (data)
1278
-        data->dr7 = val;
1279
-
1280
-    return ES_OK;
1281
-}
1282
-
1283
-static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
1284
-                     struct es_em_ctxt *ctxt)
1285
-{
1286
-    struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
1287
-    long *reg = vc_insn_get_rm(ctxt);
1288
-
1289
-    if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
1290
-        return ES_VMM_ERROR;
1291
-
1292
-    if (!reg)
1293
-        return ES_DECODE_FAILED;
1294
-
1295
-    if (data)
1296
-        *reg = data->dr7;
1297
-    else
1298
-        *reg = DR7_RESET_VALUE;
1299
-
1300
-    return ES_OK;
1301
-}
1302
-
1303
-static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
1304
-                 struct es_em_ctxt *ctxt)
1305
-{
1306
-    return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
1307
-}
1308
-
1309
-static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
1310
-{
1311
-    enum es_result ret;
1312
-
1313
-    ghcb_set_rcx(ghcb, ctxt->regs->cx);
1314
-
1315
-    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
1316
-    if (ret != ES_OK)
1317
-        return ret;
1318
-
1319
-    if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
1320
-        return ES_VMM_ERROR;
1321
-
1322
-    ctxt->regs->ax = ghcb->save.rax;
1323
-    ctxt->regs->dx = ghcb->save.rdx;
1324
-
1325
-    return ES_OK;
1326
-}
1327
-
1328
-static enum es_result vc_handle_monitor(struct ghcb *ghcb,
1329
-                    struct es_em_ctxt *ctxt)
1330
-{
1331
-    /*
1332
-     * Treat it as a NOP and do not leak a physical address to the
1333
-     * hypervisor.
1334
-     */
1335
-    return ES_OK;
1336
-}
1337
-
1338
-static enum es_result vc_handle_mwait(struct ghcb *ghcb,
1339
-                 struct es_em_ctxt *ctxt)
1340
-{
1341
-    /* Treat the same as MONITOR/MONITORX */
1342
-    return ES_OK;
1343
-}
1344
-
1345
-static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
1346
-                    struct es_em_ctxt *ctxt)
1347
-{
1348
-    enum es_result ret;
1349
-
1350
-    ghcb_set_rax(ghcb, ctxt->regs->ax);
1351
-    ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
1352
-
1353
-    if (x86_platform.hyper.sev_es_hcall_prepare)
1354
-        x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
1355
-
1356
-    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
1357
-    if (ret != ES_OK)
1358
-        return ret;
1359
-
1360
-    if (!ghcb_rax_is_valid(ghcb))
1361
-        return ES_VMM_ERROR;
1362
-
1363
-    ctxt->regs->ax = ghcb->save.rax;
1364
-
1365
-    /*
1366
-     * Call sev_es_hcall_finish() after regs->ax is already set.
1367
-     * This allows the hypervisor handler to overwrite it again if
1368
-     * necessary.
1369
-     */
1370
-    if (x86_platform.hyper.sev_es_hcall_finish &&
1371
-     !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
1372
-        return ES_VMM_ERROR;
1373
-
1374
-    return ES_OK;
1375
-}
1376
-
1377
-static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
1378
-                    struct es_em_ctxt *ctxt)
1379
-{
1380
-    /*
1381
-     * Calling ecx_alignment_check() directly does not work, because it
1382
-     * enables IRQs and the GHCB is active. Forward the exception and call
1383
-     * it later from vc_forward_exception().
1384
-     */
1385
-    ctxt->fi.vector = X86_TRAP_AC;
1386
-    ctxt->fi.error_code = 0;
1387
-    return ES_EXCEPTION;
1388
-}
1389
-
1390
-static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
1391
-                     struct ghcb *ghcb,
1392
-                     unsigned long exit_code)
1393
-{
1394
-    enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
1395
-
1396
-    if (result != ES_OK)
1397
-        return result;
1398
-
1399
-    switch (exit_code) {
1400
-    case SVM_EXIT_READ_DR7:
1401
-        result = vc_handle_dr7_read(ghcb, ctxt);
1402
-        break;
1403
-    case SVM_EXIT_WRITE_DR7:
1404
-        result = vc_handle_dr7_write(ghcb, ctxt);
1405
-        break;
1406
-    case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
1407
-        result = vc_handle_trap_ac(ghcb, ctxt);
1408
-        break;
1409
-    case SVM_EXIT_RDTSC:
1410
-    case SVM_EXIT_RDTSCP:
1411
-        result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
1412
-        break;
1413
-    case SVM_EXIT_RDPMC:
1414
-        result = vc_handle_rdpmc(ghcb, ctxt);
1415
-        break;
1416
-    case SVM_EXIT_INVD:
1417
-        pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
1418
-        result = ES_UNSUPPORTED;
1419
-        break;
1420
-    case SVM_EXIT_CPUID:
1421
-        result = vc_handle_cpuid(ghcb, ctxt);
1422
-        break;
1423
-    case SVM_EXIT_IOIO:
1424
-        result = vc_handle_ioio(ghcb, ctxt);
1425
-        break;
1426
-    case SVM_EXIT_MSR:
1427
-        result = vc_handle_msr(ghcb, ctxt);
1428
-        break;
1429
-    case SVM_EXIT_VMMCALL:
1430
-        result = vc_handle_vmmcall(ghcb, ctxt);
1431
-        break;
1432
-    case SVM_EXIT_WBINVD:
1433
-        result = vc_handle_wbinvd(ghcb, ctxt);
1434
-        break;
1435
-    case SVM_EXIT_MONITOR:
1436
-        result = vc_handle_monitor(ghcb, ctxt);
1437
-        break;
1438
-    case SVM_EXIT_MWAIT:
1439
-        result = vc_handle_mwait(ghcb, ctxt);
1440
-        break;
1441
-    case SVM_EXIT_NPF:
1442
-        result = vc_handle_mmio(ghcb, ctxt);
1443
-        break;
1444
-    default:
1445
-        /*
1446
-         * Unexpected #VC exception
1447
-         */
1448
-        result = ES_UNSUPPORTED;
1449
-    }
1450
-
1451
-    return result;
1452
-}
1453
-
1454
-static __always_inline bool is_vc2_stack(unsigned long sp)
1455
-{
1456
-    return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
1457
-}
1458
-
1459
-static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
1460
-{
1461
-    unsigned long sp, prev_sp;
1462
-
1463
-    sp = (unsigned long)regs;
1464
-    prev_sp = regs->sp;
1465
-
1466
-    /*
1467
-     * If the code was already executing on the VC2 stack when the #VC
1468
-     * happened, let it proceed to the normal handling routine. This way the
1469
-     * code executing on the VC2 stack can cause #VC exceptions to get handled.
1470
-     */
1471
-    return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
1472
-}
1473
-
1474
-static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
1475
-{
1476
-    struct ghcb_state state;
1477
-    struct es_em_ctxt ctxt;
1478
-    enum es_result result;
1479
-    struct ghcb *ghcb;
1480
-    bool ret = true;
1481
-
1482
-    ghcb = __sev_get_ghcb(&state);
1483
-
1484
-    vc_ghcb_invalidate(ghcb);
1485
-    result = vc_init_em_ctxt(&ctxt, regs, error_code);
1486
-
1487
-    if (result == ES_OK)
1488
-        result = vc_handle_exitcode(&ctxt, ghcb, error_code);
1489
-
1490
-    __sev_put_ghcb(&state);
1491
-
1492
-    /* Done - now check the result */
1493
-    switch (result) {
1494
-    case ES_OK:
1495
-        vc_finish_insn(&ctxt);
1496
-        break;
1497
-    case ES_UNSUPPORTED:
1498
-        pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
1499
-                 error_code, regs->ip);
1500
-        ret = false;
1501
-        break;
1502
-    case ES_VMM_ERROR:
1503
-        pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
1504
-                 error_code, regs->ip);
1505
-        ret = false;
1506
-        break;
1507
-    case ES_DECODE_FAILED:
1508
-        pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
1509
-                 error_code, regs->ip);
1510
-        ret = false;
1511
-        break;
1512
-    case ES_EXCEPTION:
1513
-        vc_forward_exception(&ctxt);
1514
-        break;
1515
-    case ES_RETRY:
1516
-        /* Nothing to do */
1517
-        break;
1518
-    default:
1519
-        pr_emerg("Unknown result in %s():%d\n", __func__, result);
1520
-        /*
1521
-         * Emulating the instruction which caused the #VC exception
1522
-         * failed - can't continue so print debug information
1523
-         */
1524
-        BUG();
1525
-    }
1526
-
1527
-    return ret;
1528
-}
1529
-
1530
-static __always_inline bool vc_is_db(unsigned long error_code)
1531
-{
1532
-    return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
1533
-}
1534
-
1535
-/*
1536
- * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
1537
- * and will panic when an error happens.
1538
- */
1539
-DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
1540
-{
1541
-    irqentry_state_t irq_state;
1542
-
1543
-    /*
1544
-     * With the current implementation it is always possible to switch to a
1545
-     * safe stack because #VC exceptions only happen at known places, like
1546
-     * intercepted instructions or accesses to MMIO areas/IO ports. They can
1547
-     * also happen with code instrumentation when the hypervisor intercepts
1548
-     * #DB, but the critical paths are forbidden to be instrumented, so #DB
1549
-     * exceptions currently also only happen in safe places.
1550
-     *
1551
-     * But keep this here in case the noinstr annotations are violated due
1552
-     * to bug elsewhere.
1553
-     */
1554
-    if (unlikely(vc_from_invalid_context(regs))) {
1555
-        instrumentation_begin();
1556
-        panic("Can't handle #VC exception from unsupported context\n");
1557
-        instrumentation_end();
1558
-    }
1559
-
1560
-    /*
1561
-     * Handle #DB before calling into !noinstr code to avoid recursive #DB.
1562
-     */
1563
-    if (vc_is_db(error_code)) {
1564
-        exc_debug(regs);
1565
-        return;
1566
-    }
1567
-
1568
-    irq_state = irqentry_nmi_enter(regs);
1569
-
1570
-    instrumentation_begin();
1571
-
1572
-    if (!vc_raw_handle_exception(regs, error_code)) {
1573
-        /* Show some debug info */
1574
-        show_regs(regs);
1575
-
1576
-        /* Ask hypervisor to sev_es_terminate */
1577
-        sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1578
-
1579
-        /* If that fails and we get here - just panic */
1580
-        panic("Returned from Terminate-Request to Hypervisor\n");
1581
-    }
1582
-
1583
-    instrumentation_end();
1584
-    irqentry_nmi_exit(regs, irq_state);
1585
-}
1586
-
1587
-/*
1588
- * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
1589
- * and will kill the current task with SIGBUS when an error happens.
1590
- */
1591
-DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
1592
-{
1593
-    /*
1594
-     * Handle #DB before calling into !noinstr code to avoid recursive #DB.
1595
-     */
1596
-    if (vc_is_db(error_code)) {
1597
-        noist_exc_debug(regs);
1598
-        return;
1599
-    }
1600
-
1601
-    irqentry_enter_from_user_mode(regs);
1602
-    instrumentation_begin();
1603
-
1604
-    if (!vc_raw_handle_exception(regs, error_code)) {
1605
-        /*
1606
-         * Do not kill the machine if user-space triggered the
1607
-         * exception. Send SIGBUS instead and let user-space deal with
1608
-         * it.
1609
-         */
1610
-        force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
1611
-    }
1612
-
1613
-    instrumentation_end();
1614
-    irqentry_exit_to_user_mode(regs);
1615
-}
1616
-
1617
-bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
1618
-{
1619
-    unsigned long exit_code = regs->orig_ax;
1620
-    struct es_em_ctxt ctxt;
1621
-    enum es_result result;
1622
-
1623
-    vc_ghcb_invalidate(boot_ghcb);
1624
-
1625
-    result = vc_init_em_ctxt(&ctxt, regs, exit_code);
1626
-    if (result == ES_OK)
1627
-        result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
1628
-
1629
-    /* Done - now check the result */
1630
-    switch (result) {
1631
-    case ES_OK:
1632
-        vc_finish_insn(&ctxt);
1633
-        break;
1634
-    case ES_UNSUPPORTED:
1635
-        early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
1636
-                exit_code, regs->ip);
1637
-        goto fail;
1638
-    case ES_VMM_ERROR:
1639
-        early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
1640
-                exit_code, regs->ip);
1641
-        goto fail;
1642
-    case ES_DECODE_FAILED:
1643
-        early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
1644
-                exit_code, regs->ip);
1645
-        goto fail;
1646
-    case ES_EXCEPTION:
1647
-        vc_early_forward_exception(&ctxt);
1648
-        break;
1649
-    case ES_RETRY:
1650
-        /* Nothing to do */
1651
-        break;
1652
-    default:
1653
-        BUG();
1654
-    }
1655
-
1656
-    return true;
1657
-
1658
-fail:
1659
-    show_regs(regs);
1660
-
1661
-    sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
1662
-}
1663
-
1664
-/*
1665
- * Initial set up of SNP relies on information provided by the
1666
- * Confidential Computing blob, which can be passed to the kernel
1667
- * in the following ways, depending on how it is booted:
1668
- *
1669
- * - when booted via the boot/decompress kernel:
1670
- * - via boot_params
1671
- *
1672
- * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
1673
- * - via a setup_data entry, as defined by the Linux Boot Protocol
1674
- *
1675
- * Scan for the blob in that order.
1676
- */
1677
-static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
1678
-{
1679
-    struct cc_blob_sev_info *cc_info;
1680
-
1681
-    /* Boot kernel would have passed the CC blob via boot_params. */
1682
-    if (bp->cc_blob_address) {
1683
-        cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
1684
-        goto found_cc_info;
1685
-    }
1686
-
1687
-    /*
1688
-     * If kernel was booted directly, without the use of the
1689
-     * boot/decompression kernel, the CC blob may have been passed via
1690
-     * setup_data instead.
1691
-     */
1692
-    cc_info = find_cc_blob_setup_data(bp);
1693
-    if (!cc_info)
1694
-        return NULL;
1695
-
1696
-found_cc_info:
1697
-    if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
1698
-        snp_abort();
1699
-
1700
-    return cc_info;
1701
-}
1702
-
1703
-static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
1704
-{
1705
-    struct svsm_call call = {};
1706
-    int ret;
1707
-    u64 pa;
1708
-
1709
-    /*
1710
-     * Record the SVSM Calling Area address (CAA) if the guest is not
1711
-     * running at VMPL0. The CA will be used to communicate with the
1712
-     * SVSM to perform the SVSM services.
1713
-     */
1714
-    if (!svsm_setup_ca(cc_info))
1715
-        return;
1716
-
1717
-    /*
1718
-     * It is very early in the boot and the kernel is running identity
1719
-     * mapped but without having adjusted the pagetables to where the
1720
-     * kernel was loaded (physbase), so the get the CA address using
1721
-     * RIP-relative addressing.
1722
-     */
1723
-    pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
1724
-
1725
-    /*
1726
-     * Switch over to the boot SVSM CA while the current CA is still
1727
-     * addressable. There is no GHCB at this point so use the MSR protocol.
1728
-     *
1729
-     * SVSM_CORE_REMAP_CA call:
1730
-     * RAX = 0 (Protocol=0, CallID=0)
1731
-     * RCX = New CA GPA
1732
-     */
1733
-    call.caa = svsm_get_caa();
1734
-    call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
1735
-    call.rcx = pa;
1736
-    ret = svsm_perform_call_protocol(&call);
1737
-    if (ret)
1738
-        sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
1739
-
1740
-    RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
1741
-    RIP_REL_REF(boot_svsm_caa_pa) = pa;
1742
-}
1743
-
1744
-bool __head snp_init(struct boot_params *bp)
1745
-{
1746
-    struct cc_blob_sev_info *cc_info;
1747
-
1748
-    if (!bp)
1749
-        return false;
1750
-
1751
-    cc_info = find_cc_blob(bp);
1752
-    if (!cc_info)
1753
-        return false;
1754
-
1755
-    if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE)
1756
-        secrets_pa = cc_info->secrets_phys;
1757
-    else
1758
-        return false;
1759
-
1760
-    setup_cpuid_table(cc_info);
1761
-
1762
-    svsm_setup(cc_info);
1763
-
1764
-    /*
1765
-     * The CC blob will be used later to access the secrets page. Cache
1766
-     * it here like the boot kernel does.
1767
-     */
1768
-    bp->cc_blob_address = (u32)(unsigned long)cc_info;
1769
-
1770
-    return true;
1771
-}
1772
-
1773
-void __head __noreturn snp_abort(void)
1774
-{
1775
-    sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
1776
-}
1777
-
1778
/*
1779
* SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are
1780
* enabled, as the alternative (fallback) logic for DMI probing in the legacy
1781
diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c
1782
index XXXXXXX..XXXXXXX 100644
1783
--- a/arch/x86/coco/sev/shared.c
1784
+++ b/arch/x86/coco/sev/shared.c
1785
@@ -XXX,XX +XXX,XX @@
1786
1787
/*
1788
* SVSM related information:
1789
- * When running under an SVSM, the VMPL that Linux is executing at must be
1790
- * non-zero. The VMPL is therefore used to indicate the presence of an SVSM.
1791
- *
1792
* During boot, the page tables are set up as identity mapped and later
1793
* changed to use kernel virtual addresses. Maintain separate virtual and
1794
* physical addresses for the CAA to allow SVSM functions to be used during
1795
* early boot, both with identity mapped virtual addresses and proper kernel
1796
* virtual addresses.
1797
*/
1798
-u8 snp_vmpl __ro_after_init;
1799
-EXPORT_SYMBOL_GPL(snp_vmpl);
1800
struct svsm_ca *boot_svsm_caa __ro_after_init;
1801
u64 boot_svsm_caa_pa __ro_after_init;
1802
1803
@@ -XXX,XX +XXX,XX @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
1804
    }
1805
}
1806
1807
-static inline void __pval_terminate(u64 pfn, bool action, unsigned int page_size,
1808
-                 int ret, u64 svsm_ret)
1809
-{
1810
-    WARN(1, "PVALIDATE failure: pfn: 0x%llx, action: %u, size: %u, ret: %d, svsm_ret: 0x%llx\n",
1811
-     pfn, action, page_size, ret, svsm_ret);
1812
-
1813
-    sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PVALIDATE);
1814
-}
1815
-
1816
-static void svsm_pval_terminate(struct svsm_pvalidate_call *pc, int ret, u64 svsm_ret)
1817
-{
1818
-    unsigned int page_size;
1819
-    bool action;
1820
-    u64 pfn;
1821
-
1822
-    pfn = pc->entry[pc->cur_index].pfn;
1823
-    action = pc->entry[pc->cur_index].action;
1824
-    page_size = pc->entry[pc->cur_index].page_size;
1825
-
1826
-    __pval_terminate(pfn, action, page_size, ret, svsm_ret);
1827
-}
1828
-
1829
static void __head svsm_pval_4k_page(unsigned long paddr, bool validate)
1830
{
1831
    struct svsm_pvalidate_call *pc;
1832
@@ -XXX,XX +XXX,XX @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
1833
    }
1834
}
1835
1836
-static void pval_pages(struct snp_psc_desc *desc)
1837
-{
1838
-    struct psc_entry *e;
1839
-    unsigned long vaddr;
1840
-    unsigned int size;
1841
-    unsigned int i;
1842
-    bool validate;
1843
-    u64 pfn;
1844
-    int rc;
1845
-
1846
-    for (i = 0; i <= desc->hdr.end_entry; i++) {
1847
-        e = &desc->entries[i];
1848
-
1849
-        pfn = e->gfn;
1850
-        vaddr = (unsigned long)pfn_to_kaddr(pfn);
1851
-        size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
1852
-        validate = e->operation == SNP_PAGE_STATE_PRIVATE;
1853
-
1854
-        rc = pvalidate(vaddr, size, validate);
1855
-        if (!rc)
1856
-            continue;
1857
-
1858
-        if (rc == PVALIDATE_FAIL_SIZEMISMATCH && size == RMP_PG_SIZE_2M) {
1859
-            unsigned long vaddr_end = vaddr + PMD_SIZE;
1860
-
1861
-            for (; vaddr < vaddr_end; vaddr += PAGE_SIZE, pfn++) {
1862
-                rc = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
1863
-                if (rc)
1864
-                    __pval_terminate(pfn, validate, RMP_PG_SIZE_4K, rc, 0);
1865
-            }
1866
-        } else {
1867
-            __pval_terminate(pfn, validate, size, rc, 0);
1868
-        }
1869
-    }
1870
-}
1871
-
1872
-static u64 svsm_build_ca_from_pfn_range(u64 pfn, u64 pfn_end, bool action,
1873
-                    struct svsm_pvalidate_call *pc)
1874
-{
1875
-    struct svsm_pvalidate_entry *pe;
1876
-
1877
-    /* Nothing in the CA yet */
1878
-    pc->num_entries = 0;
1879
-    pc->cur_index = 0;
1880
-
1881
-    pe = &pc->entry[0];
1882
-
1883
-    while (pfn < pfn_end) {
1884
-        pe->page_size = RMP_PG_SIZE_4K;
1885
-        pe->action = action;
1886
-        pe->ignore_cf = 0;
1887
-        pe->pfn = pfn;
1888
-
1889
-        pe++;
1890
-        pfn++;
1891
-
1892
-        pc->num_entries++;
1893
-        if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
1894
-            break;
1895
-    }
1896
-
1897
-    return pfn;
1898
-}
1899
-
1900
-static int svsm_build_ca_from_psc_desc(struct snp_psc_desc *desc, unsigned int desc_entry,
1901
-                 struct svsm_pvalidate_call *pc)
1902
-{
1903
-    struct svsm_pvalidate_entry *pe;
1904
-    struct psc_entry *e;
1905
-
1906
-    /* Nothing in the CA yet */
1907
-    pc->num_entries = 0;
1908
-    pc->cur_index = 0;
1909
-
1910
-    pe = &pc->entry[0];
1911
-    e = &desc->entries[desc_entry];
1912
-
1913
-    while (desc_entry <= desc->hdr.end_entry) {
1914
-        pe->page_size = e->pagesize ? RMP_PG_SIZE_2M : RMP_PG_SIZE_4K;
1915
-        pe->action = e->operation == SNP_PAGE_STATE_PRIVATE;
1916
-        pe->ignore_cf = 0;
1917
-        pe->pfn = e->gfn;
1918
-
1919
-        pe++;
1920
-        e++;
1921
-
1922
-        desc_entry++;
1923
-        pc->num_entries++;
1924
-        if (pc->num_entries == SVSM_PVALIDATE_MAX_COUNT)
1925
-            break;
1926
-    }
1927
-
1928
-    return desc_entry;
1929
-}
1930
-
1931
-static void svsm_pval_pages(struct snp_psc_desc *desc)
1932
-{
1933
-    struct svsm_pvalidate_entry pv_4k[VMGEXIT_PSC_MAX_ENTRY];
1934
-    unsigned int i, pv_4k_count = 0;
1935
-    struct svsm_pvalidate_call *pc;
1936
-    struct svsm_call call = {};
1937
-    unsigned long flags;
1938
-    bool action;
1939
-    u64 pc_pa;
1940
-    int ret;
1941
-
1942
-    /*
1943
-     * This can be called very early in the boot, use native functions in
1944
-     * order to avoid paravirt issues.
1945
-     */
1946
-    flags = native_local_irq_save();
1947
-
1948
-    /*
1949
-     * The SVSM calling area (CA) can support processing 510 entries at a
1950
-     * time. Loop through the Page State Change descriptor until the CA is
1951
-     * full or the last entry in the descriptor is reached, at which time
1952
-     * the SVSM is invoked. This repeats until all entries in the descriptor
1953
-     * are processed.
1954
-     */
1955
-    call.caa = svsm_get_caa();
1956
-
1957
-    pc = (struct svsm_pvalidate_call *)call.caa->svsm_buffer;
1958
-    pc_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer);
1959
-
1960
-    /* Protocol 0, Call ID 1 */
1961
-    call.rax = SVSM_CORE_CALL(SVSM_CORE_PVALIDATE);
1962
-    call.rcx = pc_pa;
1963
-
1964
-    for (i = 0; i <= desc->hdr.end_entry;) {
1965
-        i = svsm_build_ca_from_psc_desc(desc, i, pc);
1966
-
1967
-        do {
1968
-            ret = svsm_perform_call_protocol(&call);
1969
-            if (!ret)
1970
-                continue;
1971
-
1972
-            /*
1973
-             * Check if the entry failed because of an RMP mismatch (a
1974
-             * PVALIDATE at 2M was requested, but the page is mapped in
1975
-             * the RMP as 4K).
1976
-             */
1977
-
1978
-            if (call.rax_out == SVSM_PVALIDATE_FAIL_SIZEMISMATCH &&
1979
-             pc->entry[pc->cur_index].page_size == RMP_PG_SIZE_2M) {
1980
-                /* Save this entry for post-processing at 4K */
1981
-                pv_4k[pv_4k_count++] = pc->entry[pc->cur_index];
1982
-
1983
-                /* Skip to the next one unless at the end of the list */
1984
-                pc->cur_index++;
1985
-                if (pc->cur_index < pc->num_entries)
1986
-                    ret = -EAGAIN;
1987
-                else
1988
-                    ret = 0;
1989
-            }
1990
-        } while (ret == -EAGAIN);
1991
-
1992
-        if (ret)
1993
-            svsm_pval_terminate(pc, ret, call.rax_out);
1994
-    }
1995
-
1996
-    /* Process any entries that failed to be validated at 2M and validate them at 4K */
1997
-    for (i = 0; i < pv_4k_count; i++) {
1998
-        u64 pfn, pfn_end;
1999
-
2000
-        action = pv_4k[i].action;
2001
-        pfn = pv_4k[i].pfn;
2002
-        pfn_end = pfn + 512;
2003
-
2004
-        while (pfn < pfn_end) {
2005
-            pfn = svsm_build_ca_from_pfn_range(pfn, pfn_end, action, pc);
2006
-
2007
-            ret = svsm_perform_call_protocol(&call);
2008
-            if (ret)
2009
-                svsm_pval_terminate(pc, ret, call.rax_out);
2010
-        }
2011
-    }
2012
-
2013
-    native_local_irq_restore(flags);
2014
-}
2015
-
2016
-static void pvalidate_pages(struct snp_psc_desc *desc)
2017
-{
2018
-    if (snp_vmpl)
2019
-        svsm_pval_pages(desc);
2020
-    else
2021
-        pval_pages(desc);
2022
-}
2023
-
2024
-static int vmgexit_psc(struct ghcb *ghcb, struct snp_psc_desc *desc)
2025
-{
2026
-    int cur_entry, end_entry, ret = 0;
2027
-    struct snp_psc_desc *data;
2028
-    struct es_em_ctxt ctxt;
2029
-
2030
-    vc_ghcb_invalidate(ghcb);
2031
-
2032
-    /* Copy the input desc into GHCB shared buffer */
2033
-    data = (struct snp_psc_desc *)ghcb->shared_buffer;
2034
-    memcpy(ghcb->shared_buffer, desc, min_t(int, GHCB_SHARED_BUF_SIZE, sizeof(*desc)));
2035
-
2036
-    /*
2037
-     * As per the GHCB specification, the hypervisor can resume the guest
2038
-     * before processing all the entries. Check whether all the entries
2039
-     * are processed. If not, then keep retrying. Note, the hypervisor
2040
-     * will update the data memory directly to indicate the status, so
2041
-     * reference the data->hdr everywhere.
2042
-     *
2043
-     * The strategy here is to wait for the hypervisor to change the page
2044
-     * state in the RMP table before guest accesses the memory pages. If the
2045
-     * page state change was not successful, then later memory access will
2046
-     * result in a crash.
2047
-     */
2048
-    cur_entry = data->hdr.cur_entry;
2049
-    end_entry = data->hdr.end_entry;
2050
-
2051
-    while (data->hdr.cur_entry <= data->hdr.end_entry) {
2052
-        ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
2053
-
2054
-        /* This will advance the shared buffer data points to. */
2055
-        ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
2056
-
2057
-        /*
2058
-         * Page State Change VMGEXIT can pass error code through
2059
-         * exit_info_2.
2060
-         */
2061
-        if (WARN(ret || ghcb->save.sw_exit_info_2,
2062
-             "SNP: PSC failed ret=%d exit_info_2=%llx\n",
2063
-             ret, ghcb->save.sw_exit_info_2)) {
2064
-            ret = 1;
2065
-            goto out;
2066
-        }
2067
-
2068
-        /* Verify that reserved bit is not set */
2069
-        if (WARN(data->hdr.reserved, "Reserved bit is set in the PSC header\n")) {
2070
-            ret = 1;
2071
-            goto out;
2072
-        }
2073
-
2074
-        /*
2075
-         * Sanity check that entry processing is not going backwards.
2076
-         * This will happen only if hypervisor is tricking us.
2077
-         */
2078
-        if (WARN(data->hdr.end_entry > end_entry || cur_entry > data->hdr.cur_entry,
2079
-"SNP: PSC processing going backward, end_entry %d (got %d) cur_entry %d (got %d)\n",
2080
-             end_entry, data->hdr.end_entry, cur_entry, data->hdr.cur_entry)) {
2081
-            ret = 1;
2082
-            goto out;
2083
-        }
2084
-    }
2085
-
2086
-out:
2087
-    return ret;
2088
-}
2089
-
2090
static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt,
2091
                     unsigned long exit_code)
2092
{
2093
diff --git a/arch/x86/coco/sev/startup.c b/arch/x86/coco/sev/startup.c
54
new file mode 100644
2094
new file mode 100644
55
index XXXXXXX..XXXXXXX
2095
index XXXXXXX..XXXXXXX
56
--- /dev/null
2096
--- /dev/null
57
+++ b/arch/x86/boot/startup/Makefile
2097
+++ b/arch/x86/coco/sev/startup.c
58
@@ -XXX,XX +XXX,XX @@
2098
@@ -XXX,XX +XXX,XX @@
59
+# SPDX-License-Identifier: GPL-2.0
2099
+// SPDX-License-Identifier: GPL-2.0-only
60
+
2100
+/*
61
+lib-$(CONFIG_X86_64)        += la57toggle.o
2101
+ * AMD Memory Encryption Support
62
diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/startup/la57toggle.S
2102
+ *
63
similarity index 100%
2103
+ * Copyright (C) 2019 SUSE
64
rename from arch/x86/boot/compressed/la57toggle.S
2104
+ *
65
rename to arch/x86/boot/startup/la57toggle.S
2105
+ * Author: Joerg Roedel <jroedel@suse.de>
2106
+ */
2107
+
2108
+#define pr_fmt(fmt)    "SEV: " fmt
2109
+
2110
+#include <linux/sched/debug.h>    /* For show_regs() */
2111
+#include <linux/percpu-defs.h>
2112
+#include <linux/cc_platform.h>
2113
+#include <linux/printk.h>
2114
+#include <linux/mm_types.h>
2115
+#include <linux/set_memory.h>
2116
+#include <linux/memblock.h>
2117
+#include <linux/kernel.h>
2118
+#include <linux/mm.h>
2119
+#include <linux/cpumask.h>
2120
+#include <linux/efi.h>
2121
+#include <linux/io.h>
2122
+#include <linux/psp-sev.h>
2123
+#include <uapi/linux/sev-guest.h>
2124
+
2125
+#include <asm/init.h>
2126
+#include <asm/cpu_entry_area.h>
2127
+#include <asm/stacktrace.h>
2128
+#include <asm/sev.h>
2129
+#include <asm/sev-internal.h>
2130
+#include <asm/insn-eval.h>
2131
+#include <asm/fpu/xcr.h>
2132
+#include <asm/processor.h>
2133
+#include <asm/realmode.h>
2134
+#include <asm/setup.h>
2135
+#include <asm/traps.h>
2136
+#include <asm/svm.h>
2137
+#include <asm/smp.h>
2138
+#include <asm/cpu.h>
2139
+#include <asm/apic.h>
2140
+#include <asm/cpuid.h>
2141
+#include <asm/cmdline.h>
2142
+
2143
+/* For early boot hypervisor communication in SEV-ES enabled guests */
2144
+struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
2145
+
2146
+/*
2147
+ * Needs to be in the .data section because we need it NULL before bss is
2148
+ * cleared
2149
+ */
2150
+struct ghcb *boot_ghcb __section(".data");
2151
+
2152
+/* Bitmap of SEV features supported by the hypervisor */
2153
+u64 sev_hv_features __ro_after_init;
2154
+
2155
+/* Secrets page physical address from the CC blob */
2156
+static u64 secrets_pa __ro_after_init;
2157
+
2158
+/* For early boot SVSM communication */
2159
+struct svsm_ca boot_svsm_ca_page __aligned(PAGE_SIZE);
2160
+
2161
+DEFINE_PER_CPU(struct svsm_ca *, svsm_caa);
2162
+DEFINE_PER_CPU(u64, svsm_caa_pa);
2163
+
2164
+/*
2165
+ * Nothing shall interrupt this code path while holding the per-CPU
2166
+ * GHCB. The backup GHCB is only for NMIs interrupting this path.
2167
+ *
2168
+ * Callers must disable local interrupts around it.
2169
+ */
2170
+noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
2171
+{
2172
+    struct sev_es_runtime_data *data;
2173
+    struct ghcb *ghcb;
2174
+
2175
+    WARN_ON(!irqs_disabled());
2176
+
2177
+    data = this_cpu_read(runtime_data);
2178
+    ghcb = &data->ghcb_page;
2179
+
2180
+    if (unlikely(data->ghcb_active)) {
2181
+        /* GHCB is already in use - save its contents */
2182
+
2183
+        if (unlikely(data->backup_ghcb_active)) {
2184
+            /*
2185
+             * Backup-GHCB is also already in use. There is no way
2186
+             * to continue here so just kill the machine. To make
2187
+             * panic() work, mark GHCBs inactive so that messages
2188
+             * can be printed out.
2189
+             */
2190
+            data->ghcb_active = false;
2191
+            data->backup_ghcb_active = false;
2192
+
2193
+            instrumentation_begin();
2194
+            panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
2195
+            instrumentation_end();
2196
+        }
2197
+
2198
+        /* Mark backup_ghcb active before writing to it */
2199
+        data->backup_ghcb_active = true;
2200
+
2201
+        state->ghcb = &data->backup_ghcb;
2202
+
2203
+        /* Backup GHCB content */
2204
+        *state->ghcb = *ghcb;
2205
+    } else {
2206
+        state->ghcb = NULL;
2207
+        data->ghcb_active = true;
2208
+    }
2209
+
2210
+    return ghcb;
2211
+}
2212
+
2213
+static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
2214
+                unsigned char *buffer)
2215
+{
2216
+    return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
2217
+}
2218
+
2219
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
2220
+{
2221
+    char buffer[MAX_INSN_SIZE];
2222
+    int insn_bytes;
2223
+
2224
+    insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
2225
+    if (insn_bytes == 0) {
2226
+        /* Nothing could be copied */
2227
+        ctxt->fi.vector = X86_TRAP_PF;
2228
+        ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
2229
+        ctxt->fi.cr2 = ctxt->regs->ip;
2230
+        return ES_EXCEPTION;
2231
+    } else if (insn_bytes == -EINVAL) {
2232
+        /* Effective RIP could not be calculated */
2233
+        ctxt->fi.vector = X86_TRAP_GP;
2234
+        ctxt->fi.error_code = 0;
2235
+        ctxt->fi.cr2 = 0;
2236
+        return ES_EXCEPTION;
2237
+    }
2238
+
2239
+    if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
2240
+        return ES_DECODE_FAILED;
2241
+
2242
+    if (ctxt->insn.immediate.got)
2243
+        return ES_OK;
2244
+    else
2245
+        return ES_DECODE_FAILED;
2246
+}
2247
+
2248
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
2249
+{
2250
+    char buffer[MAX_INSN_SIZE];
2251
+    int res, ret;
2252
+
2253
+    res = vc_fetch_insn_kernel(ctxt, buffer);
2254
+    if (res) {
2255
+        ctxt->fi.vector = X86_TRAP_PF;
2256
+        ctxt->fi.error_code = X86_PF_INSTR;
2257
+        ctxt->fi.cr2 = ctxt->regs->ip;
2258
+        return ES_EXCEPTION;
2259
+    }
2260
+
2261
+    ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
2262
+    if (ret < 0)
2263
+        return ES_DECODE_FAILED;
2264
+    else
2265
+        return ES_OK;
2266
+}
2267
+
2268
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
2269
+{
2270
+    if (user_mode(ctxt->regs))
2271
+        return __vc_decode_user_insn(ctxt);
2272
+    else
2273
+        return __vc_decode_kern_insn(ctxt);
2274
+}
2275
+
2276
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
2277
+                 char *dst, char *buf, size_t size)
2278
+{
2279
+    unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
2280
+
2281
+    /*
2282
+     * This function uses __put_user() independent of whether kernel or user
2283
+     * memory is accessed. This works fine because __put_user() does no
2284
+     * sanity checks of the pointer being accessed. All that it does is
2285
+     * to report when the access failed.
2286
+     *
2287
+     * Also, this function runs in atomic context, so __put_user() is not
2288
+     * allowed to sleep. The page-fault handler detects that it is running
2289
+     * in atomic context and will not try to take mmap_sem and handle the
2290
+     * fault, so additional pagefault_enable()/disable() calls are not
2291
+     * needed.
2292
+     *
2293
+     * The access can't be done via copy_to_user() here because
2294
+     * vc_write_mem() must not use string instructions to access unsafe
2295
+     * memory. The reason is that MOVS is emulated by the #VC handler by
2296
+     * splitting the move up into a read and a write and taking a nested #VC
2297
+     * exception on whatever of them is the MMIO access. Using string
2298
+     * instructions here would cause infinite nesting.
2299
+     */
2300
+    switch (size) {
2301
+    case 1: {
2302
+        u8 d1;
2303
+        u8 __user *target = (u8 __user *)dst;
2304
+
2305
+        memcpy(&d1, buf, 1);
2306
+        if (__put_user(d1, target))
2307
+            goto fault;
2308
+        break;
2309
+    }
2310
+    case 2: {
2311
+        u16 d2;
2312
+        u16 __user *target = (u16 __user *)dst;
2313
+
2314
+        memcpy(&d2, buf, 2);
2315
+        if (__put_user(d2, target))
2316
+            goto fault;
2317
+        break;
2318
+    }
2319
+    case 4: {
2320
+        u32 d4;
2321
+        u32 __user *target = (u32 __user *)dst;
2322
+
2323
+        memcpy(&d4, buf, 4);
2324
+        if (__put_user(d4, target))
2325
+            goto fault;
2326
+        break;
2327
+    }
2328
+    case 8: {
2329
+        u64 d8;
2330
+        u64 __user *target = (u64 __user *)dst;
2331
+
2332
+        memcpy(&d8, buf, 8);
2333
+        if (__put_user(d8, target))
2334
+            goto fault;
2335
+        break;
2336
+    }
2337
+    default:
2338
+        WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
2339
+        return ES_UNSUPPORTED;
2340
+    }
2341
+
2342
+    return ES_OK;
2343
+
2344
+fault:
2345
+    if (user_mode(ctxt->regs))
2346
+        error_code |= X86_PF_USER;
2347
+
2348
+    ctxt->fi.vector = X86_TRAP_PF;
2349
+    ctxt->fi.error_code = error_code;
2350
+    ctxt->fi.cr2 = (unsigned long)dst;
2351
+
2352
+    return ES_EXCEPTION;
2353
+}
2354
+
2355
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
2356
+                 char *src, char *buf, size_t size)
2357
+{
2358
+    unsigned long error_code = X86_PF_PROT;
2359
+
2360
+    /*
2361
+     * This function uses __get_user() independent of whether kernel or user
2362
+     * memory is accessed. This works fine because __get_user() does no
2363
+     * sanity checks of the pointer being accessed. All that it does is
2364
+     * to report when the access failed.
2365
+     *
2366
+     * Also, this function runs in atomic context, so __get_user() is not
2367
+     * allowed to sleep. The page-fault handler detects that it is running
2368
+     * in atomic context and will not try to take mmap_sem and handle the
2369
+     * fault, so additional pagefault_enable()/disable() calls are not
2370
+     * needed.
2371
+     *
2372
+     * The access can't be done via copy_from_user() here because
2373
+     * vc_read_mem() must not use string instructions to access unsafe
2374
+     * memory. The reason is that MOVS is emulated by the #VC handler by
2375
+     * splitting the move up into a read and a write and taking a nested #VC
2376
+     * exception on whatever of them is the MMIO access. Using string
2377
+     * instructions here would cause infinite nesting.
2378
+     */
2379
+    switch (size) {
2380
+    case 1: {
2381
+        u8 d1;
2382
+        u8 __user *s = (u8 __user *)src;
2383
+
2384
+        if (__get_user(d1, s))
2385
+            goto fault;
2386
+        memcpy(buf, &d1, 1);
2387
+        break;
2388
+    }
2389
+    case 2: {
2390
+        u16 d2;
2391
+        u16 __user *s = (u16 __user *)src;
2392
+
2393
+        if (__get_user(d2, s))
2394
+            goto fault;
2395
+        memcpy(buf, &d2, 2);
2396
+        break;
2397
+    }
2398
+    case 4: {
2399
+        u32 d4;
2400
+        u32 __user *s = (u32 __user *)src;
2401
+
2402
+        if (__get_user(d4, s))
2403
+            goto fault;
2404
+        memcpy(buf, &d4, 4);
2405
+        break;
2406
+    }
2407
+    case 8: {
2408
+        u64 d8;
2409
+        u64 __user *s = (u64 __user *)src;
2410
+        if (__get_user(d8, s))
2411
+            goto fault;
2412
+        memcpy(buf, &d8, 8);
2413
+        break;
2414
+    }
2415
+    default:
2416
+        WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
2417
+        return ES_UNSUPPORTED;
2418
+    }
2419
+
2420
+    return ES_OK;
2421
+
2422
+fault:
2423
+    if (user_mode(ctxt->regs))
2424
+        error_code |= X86_PF_USER;
2425
+
2426
+    ctxt->fi.vector = X86_TRAP_PF;
2427
+    ctxt->fi.error_code = error_code;
2428
+    ctxt->fi.cr2 = (unsigned long)src;
2429
+
2430
+    return ES_EXCEPTION;
2431
+}
2432
+
2433
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
2434
+                     unsigned long vaddr, phys_addr_t *paddr)
2435
+{
2436
+    unsigned long va = (unsigned long)vaddr;
2437
+    unsigned int level;
2438
+    phys_addr_t pa;
2439
+    pgd_t *pgd;
2440
+    pte_t *pte;
2441
+
2442
+    pgd = __va(read_cr3_pa());
2443
+    pgd = &pgd[pgd_index(va)];
2444
+    pte = lookup_address_in_pgd(pgd, va, &level);
2445
+    if (!pte) {
2446
+        ctxt->fi.vector = X86_TRAP_PF;
2447
+        ctxt->fi.cr2 = vaddr;
2448
+        ctxt->fi.error_code = 0;
2449
+
2450
+        if (user_mode(ctxt->regs))
2451
+            ctxt->fi.error_code |= X86_PF_USER;
2452
+
2453
+        return ES_EXCEPTION;
2454
+    }
2455
+
2456
+    if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
2457
+        /* Emulated MMIO to/from encrypted memory not supported */
2458
+        return ES_UNSUPPORTED;
2459
+
2460
+    pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
2461
+    pa |= va & ~page_level_mask(level);
2462
+
2463
+    *paddr = pa;
2464
+
2465
+    return ES_OK;
2466
+}
2467
+
2468
+static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size)
2469
+{
2470
+    BUG_ON(size > 4);
2471
+
2472
+    if (user_mode(ctxt->regs)) {
2473
+        struct thread_struct *t = &current->thread;
2474
+        struct io_bitmap *iobm = t->io_bitmap;
2475
+        size_t idx;
2476
+
2477
+        if (!iobm)
2478
+            goto fault;
2479
+
2480
+        for (idx = port; idx < port + size; ++idx) {
2481
+            if (test_bit(idx, iobm->bitmap))
2482
+                goto fault;
2483
+        }
2484
+    }
2485
+
2486
+    return ES_OK;
2487
+
2488
+fault:
2489
+    ctxt->fi.vector = X86_TRAP_GP;
2490
+    ctxt->fi.error_code = 0;
2491
+
2492
+    return ES_EXCEPTION;
2493
+}
2494
+
2495
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
2496
+{
2497
+    long error_code = ctxt->fi.error_code;
2498
+    int trapnr = ctxt->fi.vector;
2499
+
2500
+    ctxt->regs->orig_ax = ctxt->fi.error_code;
2501
+
2502
+    switch (trapnr) {
2503
+    case X86_TRAP_GP:
2504
+        exc_general_protection(ctxt->regs, error_code);
2505
+        break;
2506
+    case X86_TRAP_UD:
2507
+        exc_invalid_op(ctxt->regs);
2508
+        break;
2509
+    case X86_TRAP_PF:
2510
+        write_cr2(ctxt->fi.cr2);
2511
+        exc_page_fault(ctxt->regs, error_code);
2512
+        break;
2513
+    case X86_TRAP_AC:
2514
+        exc_alignment_check(ctxt->regs, error_code);
2515
+        break;
2516
+    default:
2517
+        pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
2518
+        BUG();
2519
+    }
2520
+}
2521
+
2522
+/* Include code shared with pre-decompression boot stage */
2523
+#include "shared.c"
2524
+
2525
+noinstr void __sev_put_ghcb(struct ghcb_state *state)
2526
+{
2527
+    struct sev_es_runtime_data *data;
2528
+    struct ghcb *ghcb;
2529
+
2530
+    WARN_ON(!irqs_disabled());
2531
+
2532
+    data = this_cpu_read(runtime_data);
2533
+    ghcb = &data->ghcb_page;
2534
+
2535
+    if (state->ghcb) {
2536
+        /* Restore GHCB from Backup */
2537
+        *ghcb = *state->ghcb;
2538
+        data->backup_ghcb_active = false;
2539
+        state->ghcb = NULL;
2540
+    } else {
2541
+        /*
2542
+         * Invalidate the GHCB so a VMGEXIT instruction issued
2543
+         * from userspace won't appear to be valid.
2544
+         */
2545
+        vc_ghcb_invalidate(ghcb);
2546
+        data->ghcb_active = false;
2547
+    }
2548
+}
2549
+
2550
+int svsm_perform_call_protocol(struct svsm_call *call)
2551
+{
2552
+    struct ghcb_state state;
2553
+    unsigned long flags;
2554
+    struct ghcb *ghcb;
2555
+    int ret;
2556
+
2557
+    /*
2558
+     * This can be called very early in the boot, use native functions in
2559
+     * order to avoid paravirt issues.
2560
+     */
2561
+    flags = native_local_irq_save();
2562
+
2563
+    /*
2564
+     * Use rip-relative references when called early in the boot. If
2565
+     * ghcbs_initialized is set, then it is late in the boot and no need
2566
+     * to worry about rip-relative references in called functions.
2567
+     */
2568
+    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
2569
+        ghcb = __sev_get_ghcb(&state);
2570
+    else if (RIP_REL_REF(boot_ghcb))
2571
+        ghcb = RIP_REL_REF(boot_ghcb);
2572
+    else
2573
+        ghcb = NULL;
2574
+
2575
+    do {
2576
+        ret = ghcb ? svsm_perform_ghcb_protocol(ghcb, call)
2577
+             : svsm_perform_msr_protocol(call);
2578
+    } while (ret == -EAGAIN);
2579
+
2580
+    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
2581
+        __sev_put_ghcb(&state);
2582
+
2583
+    native_local_irq_restore(flags);
2584
+
2585
+    return ret;
2586
+}
2587
+
2588
+void __head
2589
+early_set_pages_state(unsigned long vaddr, unsigned long paddr,
2590
+         unsigned long npages, enum psc_op op)
2591
+{
2592
+    unsigned long paddr_end;
2593
+    u64 val;
2594
+
2595
+    vaddr = vaddr & PAGE_MASK;
2596
+
2597
+    paddr = paddr & PAGE_MASK;
2598
+    paddr_end = paddr + (npages << PAGE_SHIFT);
2599
+
2600
+    while (paddr < paddr_end) {
2601
+        /* Page validation must be rescinded before changing to shared */
2602
+        if (op == SNP_PAGE_STATE_SHARED)
2603
+            pvalidate_4k_page(vaddr, paddr, false);
2604
+
2605
+        /*
2606
+         * Use the MSR protocol because this function can be called before
2607
+         * the GHCB is established.
2608
+         */
2609
+        sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op));
2610
+        VMGEXIT();
2611
+
2612
+        val = sev_es_rd_ghcb_msr();
2613
+
2614
+        if (GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP)
2615
+            goto e_term;
2616
+
2617
+        if (GHCB_MSR_PSC_RESP_VAL(val))
2618
+            goto e_term;
2619
+
2620
+        /* Page validation must be performed after changing to private */
2621
+        if (op == SNP_PAGE_STATE_PRIVATE)
2622
+            pvalidate_4k_page(vaddr, paddr, true);
2623
+
2624
+        vaddr += PAGE_SIZE;
2625
+        paddr += PAGE_SIZE;
2626
+    }
2627
+
2628
+    return;
2629
+
2630
+e_term:
2631
+    sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
2632
+}
2633
+
2634
+void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
2635
+                     unsigned long npages)
2636
+{
2637
+    /*
2638
+     * This can be invoked in early boot while running identity mapped, so
2639
+     * use an open coded check for SNP instead of using cc_platform_has().
2640
+     * This eliminates worries about jump tables or checking boot_cpu_data
2641
+     * in the cc_platform_has() function.
2642
+     */
2643
+    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
2644
+        return;
2645
+
2646
+     /*
2647
+     * Ask the hypervisor to mark the memory pages as private in the RMP
2648
+     * table.
2649
+     */
2650
+    early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE);
2651
+}
2652
+
2653
+void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
2654
+                    unsigned long npages)
2655
+{
2656
+    /*
2657
+     * This can be invoked in early boot while running identity mapped, so
2658
+     * use an open coded check for SNP instead of using cc_platform_has().
2659
+     * This eliminates worries about jump tables or checking boot_cpu_data
2660
+     * in the cc_platform_has() function.
2661
+     */
2662
+    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
2663
+        return;
2664
+
2665
+     /* Ask hypervisor to mark the memory pages shared in the RMP table. */
2666
+    early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED);
2667
+}
2668
+
2669
+/* Writes to the SVSM CAA MSR are ignored */
2670
+static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write)
2671
+{
2672
+    if (write)
2673
+        return ES_OK;
2674
+
2675
+    regs->ax = lower_32_bits(this_cpu_read(svsm_caa_pa));
2676
+    regs->dx = upper_32_bits(this_cpu_read(svsm_caa_pa));
2677
+
2678
+    return ES_OK;
2679
+}
2680
+
2681
+/*
2682
+ * TSC related accesses should not exit to the hypervisor when a guest is
2683
+ * executing with Secure TSC enabled, so special handling is required for
2684
+ * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ.
2685
+ */
2686
+static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write)
2687
+{
2688
+    u64 tsc;
2689
+
2690
+    /*
2691
+     * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled.
2692
+     * Terminate the SNP guest when the interception is enabled.
2693
+     */
2694
+    if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ)
2695
+        return ES_VMM_ERROR;
2696
+
2697
+    /*
2698
+     * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC
2699
+     * to return undefined values, so ignore all writes.
2700
+     *
2701
+     * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use
2702
+     * the value returned by rdtsc_ordered().
2703
+     */
2704
+    if (write) {
2705
+        WARN_ONCE(1, "TSC MSR writes are verboten!\n");
2706
+        return ES_OK;
2707
+    }
2708
+
2709
+    tsc = rdtsc_ordered();
2710
+    regs->ax = lower_32_bits(tsc);
2711
+    regs->dx = upper_32_bits(tsc);
2712
+
2713
+    return ES_OK;
2714
+}
2715
+
2716
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
2717
+{
2718
+    struct pt_regs *regs = ctxt->regs;
2719
+    enum es_result ret;
2720
+    bool write;
2721
+
2722
+    /* Is it a WRMSR? */
2723
+    write = ctxt->insn.opcode.bytes[1] == 0x30;
2724
+
2725
+    switch (regs->cx) {
2726
+    case MSR_SVSM_CAA:
2727
+        return __vc_handle_msr_caa(regs, write);
2728
+    case MSR_IA32_TSC:
2729
+    case MSR_AMD64_GUEST_TSC_FREQ:
2730
+        if (sev_status & MSR_AMD64_SNP_SECURE_TSC)
2731
+            return __vc_handle_secure_tsc_msrs(regs, write);
2732
+        break;
2733
+    default:
2734
+        break;
2735
+    }
2736
+
2737
+    ghcb_set_rcx(ghcb, regs->cx);
2738
+    if (write) {
2739
+        ghcb_set_rax(ghcb, regs->ax);
2740
+        ghcb_set_rdx(ghcb, regs->dx);
2741
+    }
2742
+
2743
+    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, write, 0);
2744
+
2745
+    if ((ret == ES_OK) && !write) {
2746
+        regs->ax = ghcb->save.rax;
2747
+        regs->dx = ghcb->save.rdx;
2748
+    }
2749
+
2750
+    return ret;
2751
+}
2752
+
2753
+static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
2754
+{
2755
+    int trapnr = ctxt->fi.vector;
2756
+
2757
+    if (trapnr == X86_TRAP_PF)
2758
+        native_write_cr2(ctxt->fi.cr2);
2759
+
2760
+    ctxt->regs->orig_ax = ctxt->fi.error_code;
2761
+    do_early_exception(ctxt->regs, trapnr);
2762
+}
2763
+
2764
+static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
2765
+{
2766
+    long *reg_array;
2767
+    int offset;
2768
+
2769
+    reg_array = (long *)ctxt->regs;
2770
+    offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
2771
+
2772
+    if (offset < 0)
2773
+        return NULL;
2774
+
2775
+    offset /= sizeof(long);
2776
+
2777
+    return reg_array + offset;
2778
+}
2779
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
2780
+                 unsigned int bytes, bool read)
2781
+{
2782
+    u64 exit_code, exit_info_1, exit_info_2;
2783
+    unsigned long ghcb_pa = __pa(ghcb);
2784
+    enum es_result res;
2785
+    phys_addr_t paddr;
2786
+    void __user *ref;
2787
+
2788
+    ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
2789
+    if (ref == (void __user *)-1L)
2790
+        return ES_UNSUPPORTED;
2791
+
2792
+    exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
2793
+
2794
+    res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
2795
+    if (res != ES_OK) {
2796
+        if (res == ES_EXCEPTION && !read)
2797
+            ctxt->fi.error_code |= X86_PF_WRITE;
2798
+
2799
+        return res;
2800
+    }
2801
+
2802
+    exit_info_1 = paddr;
2803
+    /* Can never be greater than 8 */
2804
+    exit_info_2 = bytes;
2805
+
2806
+    ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
2807
+
2808
+    return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
2809
+}
2810
+
2811
+/*
2812
+ * The MOVS instruction has two memory operands, which raises the
2813
+ * problem that it is not known whether the access to the source or the
2814
+ * destination caused the #VC exception (and hence whether an MMIO read
2815
+ * or write operation needs to be emulated).
2816
+ *
2817
+ * Instead of playing games with walking page-tables and trying to guess
2818
+ * whether the source or destination is an MMIO range, split the move
2819
+ * into two operations, a read and a write with only one memory operand.
2820
+ * This will cause a nested #VC exception on the MMIO address which can
2821
+ * then be handled.
2822
+ *
2823
+ * This implementation has the benefit that it also supports MOVS where
2824
+ * source _and_ destination are MMIO regions.
2825
+ *
2826
+ * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
2827
+ * rare operation. If it turns out to be a performance problem the split
2828
+ * operations can be moved to memcpy_fromio() and memcpy_toio().
2829
+ */
2830
+static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
2831
+                     unsigned int bytes)
2832
+{
2833
+    unsigned long ds_base, es_base;
2834
+    unsigned char *src, *dst;
2835
+    unsigned char buffer[8];
2836
+    enum es_result ret;
2837
+    bool rep;
2838
+    int off;
2839
+
2840
+    ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
2841
+    es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
2842
+
2843
+    if (ds_base == -1L || es_base == -1L) {
2844
+        ctxt->fi.vector = X86_TRAP_GP;
2845
+        ctxt->fi.error_code = 0;
2846
+        return ES_EXCEPTION;
2847
+    }
2848
+
2849
+    src = ds_base + (unsigned char *)ctxt->regs->si;
2850
+    dst = es_base + (unsigned char *)ctxt->regs->di;
2851
+
2852
+    ret = vc_read_mem(ctxt, src, buffer, bytes);
2853
+    if (ret != ES_OK)
2854
+        return ret;
2855
+
2856
+    ret = vc_write_mem(ctxt, dst, buffer, bytes);
2857
+    if (ret != ES_OK)
2858
+        return ret;
2859
+
2860
+    if (ctxt->regs->flags & X86_EFLAGS_DF)
2861
+        off = -bytes;
2862
+    else
2863
+        off = bytes;
2864
+
2865
+    ctxt->regs->si += off;
2866
+    ctxt->regs->di += off;
2867
+
2868
+    rep = insn_has_rep_prefix(&ctxt->insn);
2869
+    if (rep)
2870
+        ctxt->regs->cx -= 1;
2871
+
2872
+    if (!rep || ctxt->regs->cx == 0)
2873
+        return ES_OK;
2874
+    else
2875
+        return ES_RETRY;
2876
+}
2877
+
2878
+static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
2879
+{
2880
+    struct insn *insn = &ctxt->insn;
2881
+    enum insn_mmio_type mmio;
2882
+    unsigned int bytes = 0;
2883
+    enum es_result ret;
2884
+    u8 sign_byte;
2885
+    long *reg_data;
2886
+
2887
+    mmio = insn_decode_mmio(insn, &bytes);
2888
+    if (mmio == INSN_MMIO_DECODE_FAILED)
2889
+        return ES_DECODE_FAILED;
2890
+
2891
+    if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) {
2892
+        reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs);
2893
+        if (!reg_data)
2894
+            return ES_DECODE_FAILED;
2895
+    }
2896
+
2897
+    if (user_mode(ctxt->regs))
2898
+        return ES_UNSUPPORTED;
2899
+
2900
+    switch (mmio) {
2901
+    case INSN_MMIO_WRITE:
2902
+        memcpy(ghcb->shared_buffer, reg_data, bytes);
2903
+        ret = vc_do_mmio(ghcb, ctxt, bytes, false);
2904
+        break;
2905
+    case INSN_MMIO_WRITE_IMM:
2906
+        memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
2907
+        ret = vc_do_mmio(ghcb, ctxt, bytes, false);
2908
+        break;
2909
+    case INSN_MMIO_READ:
2910
+        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
2911
+        if (ret)
2912
+            break;
2913
+
2914
+        /* Zero-extend for 32-bit operation */
2915
+        if (bytes == 4)
2916
+            *reg_data = 0;
2917
+
2918
+        memcpy(reg_data, ghcb->shared_buffer, bytes);
2919
+        break;
2920
+    case INSN_MMIO_READ_ZERO_EXTEND:
2921
+        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
2922
+        if (ret)
2923
+            break;
2924
+
2925
+        /* Zero extend based on operand size */
2926
+        memset(reg_data, 0, insn->opnd_bytes);
2927
+        memcpy(reg_data, ghcb->shared_buffer, bytes);
2928
+        break;
2929
+    case INSN_MMIO_READ_SIGN_EXTEND:
2930
+        ret = vc_do_mmio(ghcb, ctxt, bytes, true);
2931
+        if (ret)
2932
+            break;
2933
+
2934
+        if (bytes == 1) {
2935
+            u8 *val = (u8 *)ghcb->shared_buffer;
2936
+
2937
+            sign_byte = (*val & 0x80) ? 0xff : 0x00;
2938
+        } else {
2939
+            u16 *val = (u16 *)ghcb->shared_buffer;
2940
+
2941
+            sign_byte = (*val & 0x8000) ? 0xff : 0x00;
2942
+        }
2943
+
2944
+        /* Sign extend based on operand size */
2945
+        memset(reg_data, sign_byte, insn->opnd_bytes);
2946
+        memcpy(reg_data, ghcb->shared_buffer, bytes);
2947
+        break;
2948
+    case INSN_MMIO_MOVS:
2949
+        ret = vc_handle_mmio_movs(ctxt, bytes);
2950
+        break;
2951
+    default:
2952
+        ret = ES_UNSUPPORTED;
2953
+        break;
2954
+    }
2955
+
2956
+    return ret;
2957
+}
2958
+
2959
+static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
2960
+                     struct es_em_ctxt *ctxt)
2961
+{
2962
+    struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
2963
+    long val, *reg = vc_insn_get_rm(ctxt);
2964
+    enum es_result ret;
2965
+
2966
+    if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
2967
+        return ES_VMM_ERROR;
2968
+
2969
+    if (!reg)
2970
+        return ES_DECODE_FAILED;
2971
+
2972
+    val = *reg;
2973
+
2974
+    /* Upper 32 bits must be written as zeroes */
2975
+    if (val >> 32) {
2976
+        ctxt->fi.vector = X86_TRAP_GP;
2977
+        ctxt->fi.error_code = 0;
2978
+        return ES_EXCEPTION;
2979
+    }
2980
+
2981
+    /* Clear out other reserved bits and set bit 10 */
2982
+    val = (val & 0xffff23ffL) | BIT(10);
2983
+
2984
+    /* Early non-zero writes to DR7 are not supported */
2985
+    if (!data && (val & ~DR7_RESET_VALUE))
2986
+        return ES_UNSUPPORTED;
2987
+
2988
+    /* Using a value of 0 for ExitInfo1 means RAX holds the value */
2989
+    ghcb_set_rax(ghcb, val);
2990
+    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
2991
+    if (ret != ES_OK)
2992
+        return ret;
2993
+
2994
+    if (data)
2995
+        data->dr7 = val;
2996
+
2997
+    return ES_OK;
2998
+}
2999
+
3000
+static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
3001
+                     struct es_em_ctxt *ctxt)
3002
+{
3003
+    struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
3004
+    long *reg = vc_insn_get_rm(ctxt);
3005
+
3006
+    if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
3007
+        return ES_VMM_ERROR;
3008
+
3009
+    if (!reg)
3010
+        return ES_DECODE_FAILED;
3011
+
3012
+    if (data)
3013
+        *reg = data->dr7;
3014
+    else
3015
+        *reg = DR7_RESET_VALUE;
3016
+
3017
+    return ES_OK;
3018
+}
3019
+
3020
+static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
3021
+                 struct es_em_ctxt *ctxt)
3022
+{
3023
+    return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
3024
+}
3025
+
3026
+static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
3027
+{
3028
+    enum es_result ret;
3029
+
3030
+    ghcb_set_rcx(ghcb, ctxt->regs->cx);
3031
+
3032
+    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
3033
+    if (ret != ES_OK)
3034
+        return ret;
3035
+
3036
+    if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
3037
+        return ES_VMM_ERROR;
3038
+
3039
+    ctxt->regs->ax = ghcb->save.rax;
3040
+    ctxt->regs->dx = ghcb->save.rdx;
3041
+
3042
+    return ES_OK;
3043
+}
3044
+
3045
+static enum es_result vc_handle_monitor(struct ghcb *ghcb,
3046
+                    struct es_em_ctxt *ctxt)
3047
+{
3048
+    /*
3049
+     * Treat it as a NOP and do not leak a physical address to the
3050
+     * hypervisor.
3051
+     */
3052
+    return ES_OK;
3053
+}
3054
+
3055
+static enum es_result vc_handle_mwait(struct ghcb *ghcb,
3056
+                 struct es_em_ctxt *ctxt)
3057
+{
3058
+    /* Treat the same as MONITOR/MONITORX */
3059
+    return ES_OK;
3060
+}
3061
+
3062
+static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
3063
+                    struct es_em_ctxt *ctxt)
3064
+{
3065
+    enum es_result ret;
3066
+
3067
+    ghcb_set_rax(ghcb, ctxt->regs->ax);
3068
+    ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
3069
+
3070
+    if (x86_platform.hyper.sev_es_hcall_prepare)
3071
+        x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
3072
+
3073
+    ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
3074
+    if (ret != ES_OK)
3075
+        return ret;
3076
+
3077
+    if (!ghcb_rax_is_valid(ghcb))
3078
+        return ES_VMM_ERROR;
3079
+
3080
+    ctxt->regs->ax = ghcb->save.rax;
3081
+
3082
+    /*
3083
+     * Call sev_es_hcall_finish() after regs->ax is already set.
3084
+     * This allows the hypervisor handler to overwrite it again if
3085
+     * necessary.
3086
+     */
3087
+    if (x86_platform.hyper.sev_es_hcall_finish &&
3088
+     !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
3089
+        return ES_VMM_ERROR;
3090
+
3091
+    return ES_OK;
3092
+}
3093
+
3094
+static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
3095
+                    struct es_em_ctxt *ctxt)
3096
+{
3097
+    /*
3098
+     * Calling ecx_alignment_check() directly does not work, because it
3099
+     * enables IRQs and the GHCB is active. Forward the exception and call
3100
+     * it later from vc_forward_exception().
3101
+     */
3102
+    ctxt->fi.vector = X86_TRAP_AC;
3103
+    ctxt->fi.error_code = 0;
3104
+    return ES_EXCEPTION;
3105
+}
3106
+
3107
+static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
3108
+                     struct ghcb *ghcb,
3109
+                     unsigned long exit_code)
3110
+{
3111
+    enum es_result result = vc_check_opcode_bytes(ctxt, exit_code);
3112
+
3113
+    if (result != ES_OK)
3114
+        return result;
3115
+
3116
+    switch (exit_code) {
3117
+    case SVM_EXIT_READ_DR7:
3118
+        result = vc_handle_dr7_read(ghcb, ctxt);
3119
+        break;
3120
+    case SVM_EXIT_WRITE_DR7:
3121
+        result = vc_handle_dr7_write(ghcb, ctxt);
3122
+        break;
3123
+    case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
3124
+        result = vc_handle_trap_ac(ghcb, ctxt);
3125
+        break;
3126
+    case SVM_EXIT_RDTSC:
3127
+    case SVM_EXIT_RDTSCP:
3128
+        result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
3129
+        break;
3130
+    case SVM_EXIT_RDPMC:
3131
+        result = vc_handle_rdpmc(ghcb, ctxt);
3132
+        break;
3133
+    case SVM_EXIT_INVD:
3134
+        pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
3135
+        result = ES_UNSUPPORTED;
3136
+        break;
3137
+    case SVM_EXIT_CPUID:
3138
+        result = vc_handle_cpuid(ghcb, ctxt);
3139
+        break;
3140
+    case SVM_EXIT_IOIO:
3141
+        result = vc_handle_ioio(ghcb, ctxt);
3142
+        break;
3143
+    case SVM_EXIT_MSR:
3144
+        result = vc_handle_msr(ghcb, ctxt);
3145
+        break;
3146
+    case SVM_EXIT_VMMCALL:
3147
+        result = vc_handle_vmmcall(ghcb, ctxt);
3148
+        break;
3149
+    case SVM_EXIT_WBINVD:
3150
+        result = vc_handle_wbinvd(ghcb, ctxt);
3151
+        break;
3152
+    case SVM_EXIT_MONITOR:
3153
+        result = vc_handle_monitor(ghcb, ctxt);
3154
+        break;
3155
+    case SVM_EXIT_MWAIT:
3156
+        result = vc_handle_mwait(ghcb, ctxt);
3157
+        break;
3158
+    case SVM_EXIT_NPF:
3159
+        result = vc_handle_mmio(ghcb, ctxt);
3160
+        break;
3161
+    default:
3162
+        /*
3163
+         * Unexpected #VC exception
3164
+         */
3165
+        result = ES_UNSUPPORTED;
3166
+    }
3167
+
3168
+    return result;
3169
+}
3170
+
3171
+static __always_inline bool is_vc2_stack(unsigned long sp)
3172
+{
3173
+    return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
3174
+}
3175
+
3176
+static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
3177
+{
3178
+    unsigned long sp, prev_sp;
3179
+
3180
+    sp = (unsigned long)regs;
3181
+    prev_sp = regs->sp;
3182
+
3183
+    /*
3184
+     * If the code was already executing on the VC2 stack when the #VC
3185
+     * happened, let it proceed to the normal handling routine. This way the
3186
+     * code executing on the VC2 stack can cause #VC exceptions to get handled.
3187
+     */
3188
+    return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
3189
+}
3190
+
3191
+static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
3192
+{
3193
+    struct ghcb_state state;
3194
+    struct es_em_ctxt ctxt;
3195
+    enum es_result result;
3196
+    struct ghcb *ghcb;
3197
+    bool ret = true;
3198
+
3199
+    ghcb = __sev_get_ghcb(&state);
3200
+
3201
+    vc_ghcb_invalidate(ghcb);
3202
+    result = vc_init_em_ctxt(&ctxt, regs, error_code);
3203
+
3204
+    if (result == ES_OK)
3205
+        result = vc_handle_exitcode(&ctxt, ghcb, error_code);
3206
+
3207
+    __sev_put_ghcb(&state);
3208
+
3209
+    /* Done - now check the result */
3210
+    switch (result) {
3211
+    case ES_OK:
3212
+        vc_finish_insn(&ctxt);
3213
+        break;
3214
+    case ES_UNSUPPORTED:
3215
+        pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
3216
+                 error_code, regs->ip);
3217
+        ret = false;
3218
+        break;
3219
+    case ES_VMM_ERROR:
3220
+        pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
3221
+                 error_code, regs->ip);
3222
+        ret = false;
3223
+        break;
3224
+    case ES_DECODE_FAILED:
3225
+        pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
3226
+                 error_code, regs->ip);
3227
+        ret = false;
3228
+        break;
3229
+    case ES_EXCEPTION:
3230
+        vc_forward_exception(&ctxt);
3231
+        break;
3232
+    case ES_RETRY:
3233
+        /* Nothing to do */
3234
+        break;
3235
+    default:
3236
+        pr_emerg("Unknown result in %s():%d\n", __func__, result);
3237
+        /*
3238
+         * Emulating the instruction which caused the #VC exception
3239
+         * failed - can't continue so print debug information
3240
+         */
3241
+        BUG();
3242
+    }
3243
+
3244
+    return ret;
3245
+}
3246
+
3247
+static __always_inline bool vc_is_db(unsigned long error_code)
3248
+{
3249
+    return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
3250
+}
3251
+
3252
+/*
3253
+ * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
3254
+ * and will panic when an error happens.
3255
+ */
3256
+DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
3257
+{
3258
+    irqentry_state_t irq_state;
3259
+
3260
+    /*
3261
+     * With the current implementation it is always possible to switch to a
3262
+     * safe stack because #VC exceptions only happen at known places, like
3263
+     * intercepted instructions or accesses to MMIO areas/IO ports. They can
3264
+     * also happen with code instrumentation when the hypervisor intercepts
3265
+     * #DB, but the critical paths are forbidden to be instrumented, so #DB
3266
+     * exceptions currently also only happen in safe places.
3267
+     *
3268
+     * But keep this here in case the noinstr annotations are violated due
3269
+     * to bug elsewhere.
3270
+     */
3271
+    if (unlikely(vc_from_invalid_context(regs))) {
3272
+        instrumentation_begin();
3273
+        panic("Can't handle #VC exception from unsupported context\n");
3274
+        instrumentation_end();
3275
+    }
3276
+
3277
+    /*
3278
+     * Handle #DB before calling into !noinstr code to avoid recursive #DB.
3279
+     */
3280
+    if (vc_is_db(error_code)) {
3281
+        exc_debug(regs);
3282
+        return;
3283
+    }
3284
+
3285
+    irq_state = irqentry_nmi_enter(regs);
3286
+
3287
+    instrumentation_begin();
3288
+
3289
+    if (!vc_raw_handle_exception(regs, error_code)) {
3290
+        /* Show some debug info */
3291
+        show_regs(regs);
3292
+
3293
+        /* Ask hypervisor to sev_es_terminate */
3294
+        sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
3295
+
3296
+        /* If that fails and we get here - just panic */
3297
+        panic("Returned from Terminate-Request to Hypervisor\n");
3298
+    }
3299
+
3300
+    instrumentation_end();
3301
+    irqentry_nmi_exit(regs, irq_state);
3302
+}
3303
+
3304
+/*
3305
+ * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
3306
+ * and will kill the current task with SIGBUS when an error happens.
3307
+ */
3308
+DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
3309
+{
3310
+    /*
3311
+     * Handle #DB before calling into !noinstr code to avoid recursive #DB.
3312
+     */
3313
+    if (vc_is_db(error_code)) {
3314
+        noist_exc_debug(regs);
3315
+        return;
3316
+    }
3317
+
3318
+    irqentry_enter_from_user_mode(regs);
3319
+    instrumentation_begin();
3320
+
3321
+    if (!vc_raw_handle_exception(regs, error_code)) {
3322
+        /*
3323
+         * Do not kill the machine if user-space triggered the
3324
+         * exception. Send SIGBUS instead and let user-space deal with
3325
+         * it.
3326
+         */
3327
+        force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
3328
+    }
3329
+
3330
+    instrumentation_end();
3331
+    irqentry_exit_to_user_mode(regs);
3332
+}
3333
+
3334
+bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
3335
+{
3336
+    unsigned long exit_code = regs->orig_ax;
3337
+    struct es_em_ctxt ctxt;
3338
+    enum es_result result;
3339
+
3340
+    vc_ghcb_invalidate(boot_ghcb);
3341
+
3342
+    result = vc_init_em_ctxt(&ctxt, regs, exit_code);
3343
+    if (result == ES_OK)
3344
+        result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
3345
+
3346
+    /* Done - now check the result */
3347
+    switch (result) {
3348
+    case ES_OK:
3349
+        vc_finish_insn(&ctxt);
3350
+        break;
3351
+    case ES_UNSUPPORTED:
3352
+        early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
3353
+                exit_code, regs->ip);
3354
+        goto fail;
3355
+    case ES_VMM_ERROR:
3356
+        early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
3357
+                exit_code, regs->ip);
3358
+        goto fail;
3359
+    case ES_DECODE_FAILED:
3360
+        early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
3361
+                exit_code, regs->ip);
3362
+        goto fail;
3363
+    case ES_EXCEPTION:
3364
+        vc_early_forward_exception(&ctxt);
3365
+        break;
3366
+    case ES_RETRY:
3367
+        /* Nothing to do */
3368
+        break;
3369
+    default:
3370
+        BUG();
3371
+    }
3372
+
3373
+    return true;
3374
+
3375
+fail:
3376
+    show_regs(regs);
3377
+
3378
+    sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ);
3379
+}
3380
+
3381
+/*
3382
+ * Initial set up of SNP relies on information provided by the
3383
+ * Confidential Computing blob, which can be passed to the kernel
3384
+ * in the following ways, depending on how it is booted:
3385
+ *
3386
+ * - when booted via the boot/decompress kernel:
3387
+ * - via boot_params
3388
+ *
3389
+ * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH):
3390
+ * - via a setup_data entry, as defined by the Linux Boot Protocol
3391
+ *
3392
+ * Scan for the blob in that order.
3393
+ */
3394
+static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
3395
+{
3396
+    struct cc_blob_sev_info *cc_info;
3397
+
3398
+    /* Boot kernel would have passed the CC blob via boot_params. */
3399
+    if (bp->cc_blob_address) {
3400
+        cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address;
3401
+        goto found_cc_info;
3402
+    }
3403
+
3404
+    /*
3405
+     * If kernel was booted directly, without the use of the
3406
+     * boot/decompression kernel, the CC blob may have been passed via
3407
+     * setup_data instead.
3408
+     */
3409
+    cc_info = find_cc_blob_setup_data(bp);
3410
+    if (!cc_info)
3411
+        return NULL;
3412
+
3413
+found_cc_info:
3414
+    if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC)
3415
+        snp_abort();
3416
+
3417
+    return cc_info;
3418
+}
3419
+
3420
+static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
3421
+{
3422
+    struct svsm_call call = {};
3423
+    int ret;
3424
+    u64 pa;
3425
+
3426
+    /*
3427
+     * Record the SVSM Calling Area address (CAA) if the guest is not
3428
+     * running at VMPL0. The CA will be used to communicate with the
3429
+     * SVSM to perform the SVSM services.
3430
+     */
3431
+    if (!svsm_setup_ca(cc_info))
3432
+        return;
3433
+
3434
+    /*
3435
+     * It is very early in the boot and the kernel is running identity
3436
+     * mapped but without having adjusted the pagetables to where the
3437
+     * kernel was loaded (physbase), so the get the CA address using
3438
+     * RIP-relative addressing.
3439
+     */
3440
+    pa = (u64)rip_rel_ptr(&boot_svsm_ca_page);
3441
+
3442
+    /*
3443
+     * Switch over to the boot SVSM CA while the current CA is still
3444
+     * addressable. There is no GHCB at this point so use the MSR protocol.
3445
+     *
3446
+     * SVSM_CORE_REMAP_CA call:
3447
+     * RAX = 0 (Protocol=0, CallID=0)
3448
+     * RCX = New CA GPA
3449
+     */
3450
+    call.caa = svsm_get_caa();
3451
+    call.rax = SVSM_CORE_CALL(SVSM_CORE_REMAP_CA);
3452
+    call.rcx = pa;
3453
+    ret = svsm_perform_call_protocol(&call);
3454
+    if (ret)
3455
+        sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
3456
+
3457
+    RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
3458
+    RIP_REL_REF(boot_svsm_caa_pa) = pa;
3459
+}
3460
+
3461
+bool __head snp_init(struct boot_params *bp)
3462
+{
3463
+    struct cc_blob_sev_info *cc_info;
3464
+
3465
+    if (!bp)
3466
+        return false;
3467
+
3468
+    cc_info = find_cc_blob(bp);
3469
+    if (!cc_info)
3470
+        return false;
3471
+
3472
+    if (cc_info->secrets_phys && cc_info->secrets_len == PAGE_SIZE)
3473
+        secrets_pa = cc_info->secrets_phys;
3474
+    else
3475
+        return false;
3476
+
3477
+    setup_cpuid_table(cc_info);
3478
+
3479
+    svsm_setup(cc_info);
3480
+
3481
+    /*
3482
+     * The CC blob will be used later to access the secrets page. Cache
3483
+     * it here like the boot kernel does.
3484
+     */
3485
+    bp->cc_blob_address = (u32)(unsigned long)cc_info;
3486
+
3487
+    return true;
3488
+}
3489
+
3490
+void __head __noreturn snp_abort(void)
3491
+{
3492
+    sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
3493
+}
66
--
3494
--
67
2.49.0.472.ge94155a9ec-goog
3495
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Linus expressed a strong preference for arch-specific asm code (i.e.,
3
Move the SEV startup code into arch/x86/boot/startup/, where it will
4
virtually all of it) to reside under arch/ rather than anywhere else.
4
reside along with other code that executes extremely early, and
5
5
therefore needs to be built in a special manner.
6
So move the EFI mixed mode startup code back, and put it under
7
arch/x86/boot/startup/ where all shared x86 startup code is going to
8
live.
9
6
10
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
7
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
11
---
8
---
12
arch/x86/boot/startup/Makefile | 3 +++
9
arch/x86/boot/compressed/sev.c | 2 +-
13
drivers/firmware/efi/libstub/x86-mixed.S => arch/x86/boot/startup/efi-mixed.S | 0
10
arch/x86/boot/startup/Makefile | 2 +-
14
drivers/firmware/efi/libstub/Makefile | 1 -
11
arch/x86/{coco/sev/shared.c => boot/startup/sev-shared.c} | 0
15
3 files changed, 3 insertions(+), 1 deletion(-)
12
arch/x86/{coco/sev/startup.c => boot/startup/sev-startup.c} | 2 +-
13
arch/x86/coco/sev/Makefile | 21 +-------------------
14
5 files changed, 4 insertions(+), 23 deletions(-)
16
15
16
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/arch/x86/boot/compressed/sev.c
19
+++ b/arch/x86/boot/compressed/sev.c
20
@@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call);
21
u8 snp_vmpl;
22
23
/* Include code for early handlers */
24
-#include "../../coco/sev/shared.c"
25
+#include "../../boot/startup/sev-shared.c"
26
27
int svsm_perform_call_protocol(struct svsm_call *call)
28
{
17
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
29
diff --git a/arch/x86/boot/startup/Makefile b/arch/x86/boot/startup/Makefile
18
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
19
--- a/arch/x86/boot/startup/Makefile
31
--- a/arch/x86/boot/startup/Makefile
20
+++ b/arch/x86/boot/startup/Makefile
32
+++ b/arch/x86/boot/startup/Makefile
33
@@ -XXX,XX +XXX,XX @@ UBSAN_SANITIZE    := n
34
KCOV_INSTRUMENT    := n
35
36
obj-$(CONFIG_X86_64)        += gdt_idt.o map_kernel.o
37
-obj-$(CONFIG_AMD_MEM_ENCRYPT)    += sme.o
38
+obj-$(CONFIG_AMD_MEM_ENCRYPT)    += sme.o sev-startup.o
39
40
lib-$(CONFIG_X86_64)        += la57toggle.o
41
lib-$(CONFIG_EFI_MIXED)        += efi-mixed.o
42
diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/boot/startup/sev-shared.c
43
similarity index 100%
44
rename from arch/x86/coco/sev/shared.c
45
rename to arch/x86/boot/startup/sev-shared.c
46
diff --git a/arch/x86/coco/sev/startup.c b/arch/x86/boot/startup/sev-startup.c
47
similarity index 99%
48
rename from arch/x86/coco/sev/startup.c
49
rename to arch/x86/boot/startup/sev-startup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/arch/x86/coco/sev/startup.c
52
+++ b/arch/x86/boot/startup/sev-startup.c
53
@@ -XXX,XX +XXX,XX @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
54
}
55
56
/* Include code shared with pre-decompression boot stage */
57
-#include "shared.c"
58
+#include "sev-shared.c"
59
60
noinstr void __sev_put_ghcb(struct ghcb_state *state)
61
{
62
diff --git a/arch/x86/coco/sev/Makefile b/arch/x86/coco/sev/Makefile
63
index XXXXXXX..XXXXXXX 100644
64
--- a/arch/x86/coco/sev/Makefile
65
+++ b/arch/x86/coco/sev/Makefile
21
@@ -XXX,XX +XXX,XX @@
66
@@ -XXX,XX +XXX,XX @@
22
# SPDX-License-Identifier: GPL-2.0
67
# SPDX-License-Identifier: GPL-2.0
23
68
24
+KBUILD_AFLAGS        += -D__DISABLE_EXPORTS
69
-obj-y += core.o startup.o
25
+
70
-
26
lib-$(CONFIG_X86_64)        += la57toggle.o
71
-# jump tables are emitted using absolute references in non-PIC code
27
+lib-$(CONFIG_EFI_MIXED)        += efi-mixed.o
72
-# so they cannot be used in the early SEV startup code
28
diff --git a/drivers/firmware/efi/libstub/x86-mixed.S b/arch/x86/boot/startup/efi-mixed.S
73
-CFLAGS_startup.o += -fno-jump-tables
29
similarity index 100%
74
-
30
rename from drivers/firmware/efi/libstub/x86-mixed.S
75
-ifdef CONFIG_FUNCTION_TRACER
31
rename to arch/x86/boot/startup/efi-mixed.S
76
-CFLAGS_REMOVE_startup.o = -pg
32
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
77
-endif
33
index XXXXXXX..XXXXXXX 100644
78
-
34
--- a/drivers/firmware/efi/libstub/Makefile
79
-KASAN_SANITIZE_startup.o    := n
35
+++ b/drivers/firmware/efi/libstub/Makefile
80
-KMSAN_SANITIZE_startup.o    := n
36
@@ -XXX,XX +XXX,XX @@ lib-$(CONFIG_EFI_GENERIC_STUB)    += efi-stub.o string.o intrinsics.o systable.o \
81
-KCOV_INSTRUMENT_startup.o    := n
37
lib-$(CONFIG_ARM)        += arm32-stub.o
82
-
38
lib-$(CONFIG_ARM64)        += kaslr.o arm64.o arm64-stub.o smbios.o
83
-# With some compiler versions the generated code results in boot hangs, caused
39
lib-$(CONFIG_X86)        += x86-stub.o smbios.o
84
-# by several compilation units. To be safe, disable all instrumentation.
40
-lib-$(CONFIG_EFI_MIXED)        += x86-mixed.o
85
-KCSAN_SANITIZE        := n
41
lib-$(CONFIG_X86_64)        += x86-5lvl.o
86
-
42
lib-$(CONFIG_RISCV)        += kaslr.o riscv.o riscv-stub.o
87
-# Clang 14 and older may fail to respect __no_sanitize_undefined when inlining
43
lib-$(CONFIG_LOONGARCH)        += loongarch.o loongarch-stub.o
88
-UBSAN_SANITIZE        := n
89
+obj-y += core.o
44
--
90
--
45
2.49.0.472.ge94155a9ec-goog
91
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
New patch
1
From: Ard Biesheuvel <ardb@kernel.org>
1
2
3
Now that the early SEV code is built with -fPIC, RIP_REL_REF() has no
4
effect and can be dropped.
5
6
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
7
---
8
arch/x86/boot/startup/sev-shared.c | 26 +++++++++-----------
9
arch/x86/boot/startup/sev-startup.c | 16 ++++++------
10
arch/x86/include/asm/sev-internal.h | 18 +++-----------
11
3 files changed, 23 insertions(+), 37 deletions(-)
12
13
diff --git a/arch/x86/boot/startup/sev-shared.c b/arch/x86/boot/startup/sev-shared.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/arch/x86/boot/startup/sev-shared.c
16
+++ b/arch/x86/boot/startup/sev-shared.c
17
@@ -XXX,XX +XXX,XX @@ static int svsm_perform_ghcb_protocol(struct ghcb *ghcb, struct svsm_call *call)
18
     * Fill in protocol and format specifiers. This can be called very early
19
     * in the boot, so use rip-relative references as needed.
20
     */
21
-    ghcb->protocol_version = RIP_REL_REF(ghcb_version);
22
+    ghcb->protocol_version = ghcb_version;
23
    ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
24
25
    ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_SNP_RUN_VMPL);
26
@@ -XXX,XX +XXX,XX @@ snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
27
        leaf->eax = leaf->ebx = leaf->ecx = leaf->edx = 0;
28
29
        /* Skip post-processing for out-of-range zero leafs. */
30
-        if (!(leaf->fn <= RIP_REL_REF(cpuid_std_range_max) ||
31
-         (leaf->fn >= 0x40000000 && leaf->fn <= RIP_REL_REF(cpuid_hyp_range_max)) ||
32
-         (leaf->fn >= 0x80000000 && leaf->fn <= RIP_REL_REF(cpuid_ext_range_max))))
33
+        if (!(leaf->fn <= cpuid_std_range_max ||
34
+         (leaf->fn >= 0x40000000 && leaf->fn <= cpuid_hyp_range_max) ||
35
+         (leaf->fn >= 0x80000000 && leaf->fn <= cpuid_ext_range_max)))
36
            return 0;
37
    }
38
39
@@ -XXX,XX +XXX,XX @@ static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
40
        const struct snp_cpuid_fn *fn = &cpuid_table->fn[i];
41
42
        if (fn->eax_in == 0x0)
43
-            RIP_REL_REF(cpuid_std_range_max) = fn->eax;
44
+            cpuid_std_range_max = fn->eax;
45
        else if (fn->eax_in == 0x40000000)
46
-            RIP_REL_REF(cpuid_hyp_range_max) = fn->eax;
47
+            cpuid_hyp_range_max = fn->eax;
48
        else if (fn->eax_in == 0x80000000)
49
-            RIP_REL_REF(cpuid_ext_range_max) = fn->eax;
50
+            cpuid_ext_range_max = fn->eax;
51
    }
52
}
53
54
@@ -XXX,XX +XXX,XX @@ static void __head pvalidate_4k_page(unsigned long vaddr, unsigned long paddr,
55
{
56
    int ret;
57
58
-    /*
59
-     * This can be called very early during boot, so use rIP-relative
60
-     * references as needed.
61
-     */
62
-    if (RIP_REL_REF(snp_vmpl)) {
63
+    if (snp_vmpl) {
64
        svsm_pval_4k_page(paddr, validate);
65
    } else {
66
        ret = pvalidate(vaddr, RMP_PG_SIZE_4K, validate);
67
@@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
68
    if (!secrets_page->svsm_guest_vmpl)
69
        sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_VMPL0);
70
71
-    RIP_REL_REF(snp_vmpl) = secrets_page->svsm_guest_vmpl;
72
+    snp_vmpl = secrets_page->svsm_guest_vmpl;
73
74
    caa = secrets_page->svsm_caa;
75
76
@@ -XXX,XX +XXX,XX @@ static bool __head svsm_setup_ca(const struct cc_blob_sev_info *cc_info)
77
     * The CA is identity mapped when this routine is called, both by the
78
     * decompressor code and the early kernel code.
79
     */
80
-    RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)caa;
81
-    RIP_REL_REF(boot_svsm_caa_pa) = caa;
82
+    boot_svsm_caa = (struct svsm_ca *)caa;
83
+    boot_svsm_caa_pa = caa;
84
85
    /* Advertise the SVSM presence via CPUID. */
86
    cpuid_table = (struct snp_cpuid_table *)snp_cpuid_get_table();
87
diff --git a/arch/x86/boot/startup/sev-startup.c b/arch/x86/boot/startup/sev-startup.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/arch/x86/boot/startup/sev-startup.c
90
+++ b/arch/x86/boot/startup/sev-startup.c
91
@@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call)
92
     * ghcbs_initialized is set, then it is late in the boot and no need
93
     * to worry about rip-relative references in called functions.
94
     */
95
-    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
96
+    if (sev_cfg.ghcbs_initialized)
97
        ghcb = __sev_get_ghcb(&state);
98
-    else if (RIP_REL_REF(boot_ghcb))
99
-        ghcb = RIP_REL_REF(boot_ghcb);
100
+    else if (boot_ghcb)
101
+        ghcb = boot_ghcb;
102
    else
103
        ghcb = NULL;
104
105
@@ -XXX,XX +XXX,XX @@ int svsm_perform_call_protocol(struct svsm_call *call)
106
             : svsm_perform_msr_protocol(call);
107
    } while (ret == -EAGAIN);
108
109
-    if (RIP_REL_REF(sev_cfg).ghcbs_initialized)
110
+    if (sev_cfg.ghcbs_initialized)
111
        __sev_put_ghcb(&state);
112
113
    native_local_irq_restore(flags);
114
@@ -XXX,XX +XXX,XX @@ void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
115
     * This eliminates worries about jump tables or checking boot_cpu_data
116
     * in the cc_platform_has() function.
117
     */
118
-    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
119
+    if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
120
        return;
121
122
     /*
123
@@ -XXX,XX +XXX,XX @@ void __head early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr
124
     * This eliminates worries about jump tables or checking boot_cpu_data
125
     * in the cc_platform_has() function.
126
     */
127
-    if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED))
128
+    if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
129
        return;
130
131
     /* Ask hypervisor to mark the memory pages shared in the RMP table. */
132
@@ -XXX,XX +XXX,XX @@ static __head void svsm_setup(struct cc_blob_sev_info *cc_info)
133
    if (ret)
134
        sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SVSM_CA_REMAP_FAIL);
135
136
-    RIP_REL_REF(boot_svsm_caa) = (struct svsm_ca *)pa;
137
-    RIP_REL_REF(boot_svsm_caa_pa) = pa;
138
+    boot_svsm_caa = (struct svsm_ca *)pa;
139
+    boot_svsm_caa_pa = pa;
140
}
141
142
bool __head snp_init(struct boot_params *bp)
143
diff --git a/arch/x86/include/asm/sev-internal.h b/arch/x86/include/asm/sev-internal.h
144
index XXXXXXX..XXXXXXX 100644
145
--- a/arch/x86/include/asm/sev-internal.h
146
+++ b/arch/x86/include/asm/sev-internal.h
147
@@ -XXX,XX +XXX,XX @@ extern u64 boot_svsm_caa_pa;
148
149
static __always_inline struct svsm_ca *svsm_get_caa(void)
150
{
151
-    /*
152
-     * Use rIP-relative references when called early in the boot. If
153
-     * ->use_cas is set, then it is late in the boot and no need
154
-     * to worry about rIP-relative references.
155
-     */
156
-    if (RIP_REL_REF(sev_cfg).use_cas)
157
+    if (sev_cfg.use_cas)
158
        return this_cpu_read(svsm_caa);
159
    else
160
-        return RIP_REL_REF(boot_svsm_caa);
161
+        return boot_svsm_caa;
162
}
163
164
static __always_inline u64 svsm_get_caa_pa(void)
165
{
166
-    /*
167
-     * Use rIP-relative references when called early in the boot. If
168
-     * ->use_cas is set, then it is late in the boot and no need
169
-     * to worry about rIP-relative references.
170
-     */
171
-    if (RIP_REL_REF(sev_cfg).use_cas)
172
+    if (sev_cfg.use_cas)
173
        return this_cpu_read(svsm_caa_pa);
174
    else
175
-        return RIP_REL_REF(boot_svsm_caa_pa);
176
+        return boot_svsm_caa_pa;
177
}
178
179
int svsm_perform_call_protocol(struct svsm_call *call);
180
--
181
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib
1
From: Ard Biesheuvel <ardb@kernel.org>
1
From: Ard Biesheuvel <ardb@kernel.org>
2
2
3
Merge the local include "pgtable.h" -which declares the API of the
3
Now that all users have been moved into startup/ where PIC codegen is
4
5-level paging trampoline- into <asm/boot.h> so that its implementation
4
used, RIP_REL_REF() is no longer needed. Remove it.
5
in la57toggle.S as well as the calling code can be decoupled from the
6
traditional decompressor.
7
5
8
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
6
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
9
---
7
---
10
arch/x86/boot/compressed/head_64.S | 1 -
8
arch/x86/include/asm/asm.h | 5 -----
11
arch/x86/boot/compressed/la57toggle.S | 1 -
9
1 file changed, 5 deletions(-)
12
arch/x86/boot/compressed/misc.c | 1 -
13
arch/x86/boot/compressed/pgtable.h | 18 ------------------
14
arch/x86/boot/compressed/pgtable_64.c | 1 -
15
arch/x86/include/asm/boot.h | 10 ++++++++++
16
6 files changed, 10 insertions(+), 22 deletions(-)
17
10
18
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
11
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
19
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
20
--- a/arch/x86/boot/compressed/head_64.S
13
--- a/arch/x86/include/asm/asm.h
21
+++ b/arch/x86/boot/compressed/head_64.S
14
+++ b/arch/x86/include/asm/asm.h
22
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@ static __always_inline __pure void *rip_rel_ptr(void *p)
23
#include <asm/bootparam.h>
16
24
#include <asm/desc_defs.h>
17
    return p;
25
#include <asm/trapnr.h>
18
}
26
-#include "pgtable.h"
19
-#ifndef __pic__
20
-#define RIP_REL_REF(var)    (*(typeof(&(var)))rip_rel_ptr(&(var)))
21
-#else
22
-#define RIP_REL_REF(var)    (var)
23
-#endif
24
#endif
27
25
28
/*
26
/*
29
* Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
30
diff --git a/arch/x86/boot/compressed/la57toggle.S b/arch/x86/boot/compressed/la57toggle.S
31
index XXXXXXX..XXXXXXX 100644
32
--- a/arch/x86/boot/compressed/la57toggle.S
33
+++ b/arch/x86/boot/compressed/la57toggle.S
34
@@ -XXX,XX +XXX,XX @@
35
#include <asm/boot.h>
36
#include <asm/msr.h>
37
#include <asm/processor-flags.h>
38
-#include "pgtable.h"
39
40
/*
41
* This is the 32-bit trampoline that will be copied over to low memory. It
42
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/arch/x86/boot/compressed/misc.c
45
+++ b/arch/x86/boot/compressed/misc.c
46
@@ -XXX,XX +XXX,XX @@
47
48
#include "misc.h"
49
#include "error.h"
50
-#include "pgtable.h"
51
#include "../string.h"
52
#include "../voffset.h"
53
#include <asm/bootparam_utils.h>
54
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
55
deleted file mode 100644
56
index XXXXXXX..XXXXXXX
57
--- a/arch/x86/boot/compressed/pgtable.h
58
+++ /dev/null
59
@@ -XXX,XX +XXX,XX @@
60
-#ifndef BOOT_COMPRESSED_PAGETABLE_H
61
-#define BOOT_COMPRESSED_PAGETABLE_H
62
-
63
-#define TRAMPOLINE_32BIT_SIZE        (2 * PAGE_SIZE)
64
-
65
-#define TRAMPOLINE_32BIT_CODE_OFFSET    PAGE_SIZE
66
-#define TRAMPOLINE_32BIT_CODE_SIZE    0xA0
67
-
68
-#ifndef __ASSEMBLER__
69
-
70
-extern unsigned long *trampoline_32bit;
71
-
72
-extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
73
-
74
-extern const u16 trampoline_ljmp_imm_offset;
75
-
76
-#endif /* __ASSEMBLER__ */
77
-#endif /* BOOT_COMPRESSED_PAGETABLE_H */
78
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/arch/x86/boot/compressed/pgtable_64.c
81
+++ b/arch/x86/boot/compressed/pgtable_64.c
82
@@ -XXX,XX +XXX,XX @@
83
#include <asm/bootparam_utils.h>
84
#include <asm/e820/types.h>
85
#include <asm/processor.h>
86
-#include "pgtable.h"
87
#include "../string.h"
88
#include "efi.h"
89
90
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
91
index XXXXXXX..XXXXXXX 100644
92
--- a/arch/x86/include/asm/boot.h
93
+++ b/arch/x86/include/asm/boot.h
94
@@ -XXX,XX +XXX,XX @@
95
# define BOOT_STACK_SIZE    0x1000
96
#endif
97
98
+#define TRAMPOLINE_32BIT_SIZE        (2 * PAGE_SIZE)
99
+
100
+#define TRAMPOLINE_32BIT_CODE_OFFSET    PAGE_SIZE
101
+#define TRAMPOLINE_32BIT_CODE_SIZE    0xA0
102
+
103
#ifndef __ASSEMBLER__
104
extern unsigned int output_len;
105
extern const unsigned long kernel_text_size;
106
@@ -XXX,XX +XXX,XX @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
107
                void (*error)(char *x));
108
109
extern struct boot_params *boot_params_ptr;
110
+extern unsigned long *trampoline_32bit;
111
+extern const u16 trampoline_ljmp_imm_offset;
112
+
113
+void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
114
+
115
#endif
116
117
#endif /* _ASM_X86_BOOT_H */
118
--
27
--
119
2.49.0.472.ge94155a9ec-goog
28
2.49.0.504.g3bcea36a83-goog
diff view generated by jsdifflib