[SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM

Evgeny Yakovlev posted 1 patch 5 years, 4 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/seabios tags/patchew/1544622308-29067-1-git-send-email-wrfsh@yandex-team.ru
src/config.h    | 16 ++++++++++------
src/fw/shadow.c | 37 ++++++++++++++++++++++++++++---------
src/misc.c      |  8 ++++++++
3 files changed, 46 insertions(+), 15 deletions(-)
[SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM
Posted by Evgeny Yakovlev 5 years, 4 months ago
Currently make_bios_writable_intel will call __make_bios_writeable_intel
from high rom memory by manually correcting its offset to make sure that
we safely execute it while overriding memory mapping through PAMs

However we still may call code from low memory, when
__make_bios_writeable_intel itself calls other code without manual
pointer adjustments. Right now it calls pci_config_readl and
pci_config_writel.

Consider this scenario:
0. Linker puts pci_config_writel in F-segment.
1. first pci_config_writel is called to reprogram PAM0-3, which means
remap regions 0xF0000-0xFFFFF and 0xD0000 - 0xC7FFF.
2. second pci_config_writel is called to reprogram PAM4-7 but code in
F-segment is no longer valid, including pci_config_writel.

However we don't crash due to instruction cache being hot between two
calls. Adding manual i-cache flush (by reloading the same CS segment for
example) between two calls finally crashes the firmware.

This change wraps a call to __make_bios_writable_bios by setting code
segment at base 0xFFF00000. This way simple nested function calls work
as long as no data fetches happen during execution through CS segment
(i.e. something like dprintf is still not safe to call).

Signed-off-by: Evgeny Yakovlev <wrfsh@yandex-team.ru>
---
 src/config.h    | 16 ++++++++++------
 src/fw/shadow.c | 37 ++++++++++++++++++++++++++++---------
 src/misc.c      |  8 ++++++++
 3 files changed, 46 insertions(+), 15 deletions(-)

diff --git a/src/config.h b/src/config.h
index 93c8dbc..87c4118 100644
--- a/src/config.h
+++ b/src/config.h
@@ -60,13 +60,17 @@
 #define SEG_BDA      0x0040
 #define SEG_BIOS     0xf000
 
+// On the emulators, the bios at 0xf0000 is also at 0xffff0000
+#define BIOS_SRC_OFFSET 0xfff00000
+
 // Segment definitions in protected mode (see rombios32_gdt in misc.c)
-#define SEG32_MODE32_CS    (1 << 3)
-#define SEG32_MODE32_DS    (2 << 3)
-#define SEG32_MODE16_CS    (3 << 3)
-#define SEG32_MODE16_DS    (4 << 3)
-#define SEG32_MODE16BIG_CS (5 << 3)
-#define SEG32_MODE16BIG_DS (6 << 3)
+#define SEG32_MODE32_CS         (1 << 3)
+#define SEG32_MODE32_DS         (2 << 3)
+#define SEG32_MODE16_CS         (3 << 3)
+#define SEG32_MODE16_DS         (4 << 3)
+#define SEG32_MODE16BIG_CS      (5 << 3)
+#define SEG32_MODE16BIG_DS      (6 << 3)
+#define SEG32_MODE32_HIGH_CS    (7 << 3)
 
 // Debugging levels.  If non-zero and CONFIG_DEBUG_LEVEL is greater
 // than the specified value, then the corresponding irq handler will
diff --git a/src/fw/shadow.c b/src/fw/shadow.c
index 4c627a8..80d0889 100644
--- a/src/fw/shadow.c
+++ b/src/fw/shadow.c
@@ -18,9 +18,6 @@
 #include "util.h" // make_bios_writable
 #include "x86.h" // wbinvd
 
-// On the emulators, the bios at 0xf0000 is also at 0xffff0000
-#define BIOS_SRC_OFFSET 0xfff00000
-
 union pamdata_u {
     u8 data8[8];
     u32 data32[2];
@@ -56,6 +53,30 @@ __make_bios_writable_intel(u16 bdf, u32 pam0)
                , SYMBOL(code32flat_end) - SYMBOL(code32flat_start));
 }
 
+// A wrapper for executing __make_bios_writable_intel in high memory code segment
+static void
+__make_bios_writable_intel_highmem(u16 bdf, u32 pam0)
+{
+    // Save whatever CS segment we've had before entering here
+    // and switch to highmem code
+    u16 prev_cs;
+    __asm__ __volatile__ ("mov  %%cs, %0":"=rm"(prev_cs));
+    __asm__ __volatile__ ("ljmp %0, $__make_bios_writable_intel_highmem__enter\n"
+                          "__make_bios_writable_intel_highmem__enter:\n"
+                          :
+                          : "n"(SEG32_MODE32_HIGH_CS));
+
+    __make_bios_writable_intel(bdf, pam0);
+
+    // Far jump to return to previous segment
+    __asm__ __volatile__ ("push %0\n"
+                          "push $__make_bios_writable_intel_highmem__exit\n"
+                          "retf\n"
+                          "__make_bios_writable_intel_highmem__exit:\n"
+                          :
+                          : "rm"((u32)prev_cs));
+}
+
 static void
 make_bios_writable_intel(u16 bdf, u32 pam0)
 {
@@ -65,13 +86,11 @@ make_bios_writable_intel(u16 bdf, u32 pam0)
         // if ram isn't backing the bios segment when shadowing is
         // disabled, the code itself won't be in memory.  So, run the
         // code from the high-memory flash location.
-        u32 pos = (u32)__make_bios_writable_intel + BIOS_SRC_OFFSET;
-        void (*func)(u16 bdf, u32 pam0) = (void*)pos;
-        func(bdf, pam0);
-        return;
+        __make_bios_writable_intel_highmem(bdf, pam0);
+    } else {
+        // Ram already present - just enable writes
+        __make_bios_writable_intel(bdf, pam0);
     }
-    // Ram already present - just enable writes
-    __make_bios_writable_intel(bdf, pam0);
 }
 
 static void
diff --git a/src/misc.c b/src/misc.c
index b511730..cb931a5 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -160,6 +160,14 @@ u64 rombios32_gdt[] VARFSEG __aligned(8) = {
     GDT_GRANLIMIT(0xffffffff) | GDT_CODE | GDT_BASE(BUILD_BIOS_ADDR),
     // 16 bit data segment base=0 limit=0xffffffff (SEG32_MODE16BIG_DS)
     GDT_GRANLIMIT(0xffffffff) | GDT_DATA,
+
+    //
+    // Following segments are used when enabling shadow memory
+    // where we need to execute code strictly from base 0xFFF00000
+    //
+
+    // 32 bit code segment in high memory
+    GDT_GRANLIMIT(0xfffff) | GDT_CODE | GDT_B | GDT_BASE(BIOS_SRC_OFFSET),
 };
 
 // GDT descriptor
-- 
2.7.4


_______________________________________________
SeaBIOS mailing list
SeaBIOS@seabios.org
https://mail.coreboot.org/mailman/listinfo/seabios
Re: [SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM
Posted by Kevin O'Connor 5 years, 4 months ago
On Wed, Dec 12, 2018 at 04:45:08PM +0300, Evgeny Yakovlev wrote:
> Currently make_bios_writable_intel will call __make_bios_writeable_intel
> from high rom memory by manually correcting its offset to make sure that
> we safely execute it while overriding memory mapping through PAMs
> 
> However we still may call code from low memory, when
> __make_bios_writeable_intel itself calls other code without manual
> pointer adjustments. Right now it calls pci_config_readl and
> pci_config_writel.
> 
> Consider this scenario:
> 0. Linker puts pci_config_writel in F-segment.
> 1. first pci_config_writel is called to reprogram PAM0-3, which means
> remap regions 0xF0000-0xFFFFF and 0xD0000 - 0xC7FFF.
> 2. second pci_config_writel is called to reprogram PAM4-7 but code in
> F-segment is no longer valid, including pci_config_writel.

The x86 instruction set uses relative function calls by default.  So,
a call to pci_config_writel() calls the copy of that function also
located in 0xFFF00000.

Are you seeing an error in practice?  It's known that
__make_bios_writeable_intel() is an ugly hack - it's there because
qemu doesn't support "write back" mode of the pam registers.  So the
code needs to run at a different location when making that area
writable.  It is specific to qemu, so we only need it to run okay on
qemu.

-Kevin

_______________________________________________
SeaBIOS mailing list
SeaBIOS@seabios.org
https://mail.coreboot.org/mailman/listinfo/seabios
Re: [SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM
Posted by Евгений Яковлев 5 years, 4 months ago
Hi Kevin,


Yep, we're seeing this in practice (more on that follows). However you 
are absolutely right about call using a displacement (and i didn't know 
that!):

e8 c1 f5 ff ff          call   ed161 <pci_config_writel>

Right now i don't see anything like that in disassembly, but we're still 
kind of one compiler-generated absolute jump/call/fetch away from this 
happening..


Anyway, answering your question, yes we have reproduced a case where we 
are executing junk after returning from pci_config_writel emulation.

However the actual case is somewhat more complicated and i would 
appreciate your advise on how to fix it.

I think we have a problem right now if during emulation first pci config 
write in __make_bios_writeable_intel we decide to issue qmp 
system_reset. Based on what i could gather from crashed instances we 
have something like this:

1. seabios issues pci_config_writel to reprogram 0xF0000-0xFFFFF and 
0xD0000 - 0xC7FFF. VCPU thread exits into qemu usermode to emulate this.

2. At this time we issue system_reset through qmp. When VCPU thread 
decided to return to KVM it releases BQL, main event loop sees 
system_reset, stops cpus and calls reset handlers. So, we're doing a 
soft reset.

3. Q35 ICH9 reset emulation does not reset PAMs to default values

4. Upon re-entering reset vector seabios checks for PAM0 & 0x10 and 
decides that we have enabled ram previously and does not jump to 
__make_bios_writeable_intel in high memory relying on the fact that code 
is in low memory already. However this assumption only holds true for 
0xC8FFFF - 0xEFFFF because we didn't have a chance to reprogram those 
PAMs during previous runs. We also didn't memcpy anything previously, so 
we're now may execute junk from F-segment.


What do you think about this scenario? I would be happy to fix this but 
i would be happy to get your advise on how to proceed and if all of my 
assumptions are correct.

Also, maybe original fix is still useful since we're not relying on all 
jumps/calls to be relative anymore?


-Evgeny


On 13.12.2018 19:42, Kevin O'Connor wrote:
> On Wed, Dec 12, 2018 at 04:45:08PM +0300, Evgeny Yakovlev wrote:
>> Currently make_bios_writable_intel will call __make_bios_writeable_intel
>> from high rom memory by manually correcting its offset to make sure that
>> we safely execute it while overriding memory mapping through PAMs
>>
>> However we still may call code from low memory, when
>> __make_bios_writeable_intel itself calls other code without manual
>> pointer adjustments. Right now it calls pci_config_readl and
>> pci_config_writel.
>>
>> Consider this scenario:
>> 0. Linker puts pci_config_writel in F-segment.
>> 1. first pci_config_writel is called to reprogram PAM0-3, which means
>> remap regions 0xF0000-0xFFFFF and 0xD0000 - 0xC7FFF.
>> 2. second pci_config_writel is called to reprogram PAM4-7 but code in
>> F-segment is no longer valid, including pci_config_writel.
> The x86 instruction set uses relative function calls by default.  So,
> a call to pci_config_writel() calls the copy of that function also
> located in 0xFFF00000.
>
> Are you seeing an error in practice?  It's known that
> __make_bios_writeable_intel() is an ugly hack - it's there because
> qemu doesn't support "write back" mode of the pam registers.  So the
> code needs to run at a different location when making that area
> writable.  It is specific to qemu, so we only need it to run okay on
> qemu.
>
> -Kevin

_______________________________________________
SeaBIOS mailing list
SeaBIOS@seabios.org
https://mail.coreboot.org/mailman/listinfo/seabios
Re: [SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM
Posted by Kevin O'Connor 5 years, 4 months ago
On Fri, Dec 14, 2018 at 01:08:46PM +0300, Евгений Яковлев wrote:
> I think we have a problem right now if during emulation first pci config
> write in __make_bios_writeable_intel we decide to issue qmp system_reset.
> Based on what i could gather from crashed instances we have something like
> this:
> 
> 1. seabios issues pci_config_writel to reprogram 0xF0000-0xFFFFF and 0xD0000
> - 0xC7FFF. VCPU thread exits into qemu usermode to emulate this.
> 
> 2. At this time we issue system_reset through qmp. When VCPU thread decided
> to return to KVM it releases BQL, main event loop sees system_reset, stops
> cpus and calls reset handlers. So, we're doing a soft reset.
> 
> 3. Q35 ICH9 reset emulation does not reset PAMs to default values
> 
> 4. Upon re-entering reset vector seabios checks for PAM0 & 0x10 and decides
> that we have enabled ram previously and does not jump to
> __make_bios_writeable_intel in high memory relying on the fact that code is
> in low memory already. However this assumption only holds true for 0xC8FFFF
> - 0xEFFFF because we didn't have a chance to reprogram those PAMs during
> previous runs. We also didn't memcpy anything previously, so we're now may
> execute junk from F-segment.
> 
> 
> What do you think about this scenario? I would be happy to fix this but i
> would be happy to get your advise on how to proceed and if all of my
> assumptions are correct.

Interesting.  So, to summarize, there is a race condition with a reset
signal if it arrives between the point SeaBIOS sets the pam registers
and completes its memcpy.

Not sure how to best address that.  Is it even possible to close that
race?  (That is, if QEMU doesn't reset the pam registers, could the
reboot immediately start execution at junk f-segment code?)

Is this only a problem if the reset signal occurs within the first few
milliseconds of the the very first boot?

> Also, maybe original fix is still useful since we're not relying on all
> jumps/calls to be relative anymore?

My personal opinion is that the x86 segment/offset stuff is so arcane
that it's best to avoid it.  I don't think gcc will (in practice) emit
an absolute function call.

-Kevin

_______________________________________________
SeaBIOS mailing list
SeaBIOS@seabios.org
https://mail.coreboot.org/mailman/listinfo/seabios
Re: [SeaBIOS] [PATCH] shadow: set code segment to high rom region when enabling RAM
Posted by Евгений Яковлев 5 years, 4 months ago
On 14.12.2018 21:45, Kevin O'Connor wrote:
> On Fri, Dec 14, 2018 at 01:08:46PM +0300, Евгений Яковлев wrote:
>> I think we have a problem right now if during emulation first pci config
>> write in __make_bios_writeable_intel we decide to issue qmp system_reset.
>> Based on what i could gather from crashed instances we have something like
>> this:
>>
>> 1. seabios issues pci_config_writel to reprogram 0xF0000-0xFFFFF and 0xD0000
>> - 0xC7FFF. VCPU thread exits into qemu usermode to emulate this.
>>
>> 2. At this time we issue system_reset through qmp. When VCPU thread decided
>> to return to KVM it releases BQL, main event loop sees system_reset, stops
>> cpus and calls reset handlers. So, we're doing a soft reset.
>>
>> 3. Q35 ICH9 reset emulation does not reset PAMs to default values
>>
>> 4. Upon re-entering reset vector seabios checks for PAM0 & 0x10 and decides
>> that we have enabled ram previously and does not jump to
>> __make_bios_writeable_intel in high memory relying on the fact that code is
>> in low memory already. However this assumption only holds true for 0xC8FFFF
>> - 0xEFFFF because we didn't have a chance to reprogram those PAMs during
>> previous runs. We also didn't memcpy anything previously, so we're now may
>> execute junk from F-segment.
>>
>>
>> What do you think about this scenario? I would be happy to fix this but i
>> would be happy to get your advise on how to proceed and if all of my
>> assumptions are correct.
> Interesting.  So, to summarize, there is a race condition with a reset
> signal if it arrives between the point SeaBIOS sets the pam registers
> and completes its memcpy.


This is my current understanding, yes.


>
> Not sure how to best address that.  Is it even possible to close that
> race?  (That is, if QEMU doesn't reset the pam registers, could the
> reboot immediately start execution at junk f-segment code?)
>
> Is this only a problem if the reset signal occurs within the first few
> milliseconds of the the very first boot?


Yes, exactly between first and second call to pci_config_writel in 
__make_bios_writable_intel to be specific.

I will be making attempts to fix this because this is a problem for us. 
I am not that familiar with seabios code though, but i've been working 
with Tiano a lot and the way they do it now is to unlock PAM segments 
for writing first, modify ram and then enable read decode from ram. I am 
not sure (yet) if this works on q35 in qemu, but i will verify.

I was thinking previously to just reset PAMs in qemu in q35 and fx440 
mch reset handler, however Intel spec is not specific on whether 
conforming devices should actually do this on soft reset signal.. I 
don't actually think they should.


>> Also, maybe original fix is still useful since we're not relying on all
>> jumps/calls to be relative anymore?
> My personal opinion is that the x86 segment/offset stuff is so arcane
> that it's best to avoid it.  I don't think gcc will (in practice) emit
> an absolute function call.


That's reasonable. I would have still coded __make_bios_writable_intel 
completely in assembly *juts to make sure* :) But this is your call, Kevin.


>
> -Kevin

_______________________________________________
SeaBIOS mailing list
SeaBIOS@seabios.org
https://mail.coreboot.org/mailman/listinfo/seabios