[PATCH 3/8] x86/traps: Avoid OoB accesses to print the data selectors

Andrew Cooper posted 8 patches 9 months, 1 week ago
There is a newer version of this series
[PATCH 3/8] x86/traps: Avoid OoB accesses to print the data selectors
Posted by Andrew Cooper 9 months, 1 week ago
_show_registers() prints the data selectors from struct cpu_user_regs, but
these fields are sometimes out-of-bounds.  See commit 6065a05adf15
("x86/traps: 'Fix' safety of read_registers() in #DF path").

There are 3 callers of _show_registers():

 1. vcpu_show_registers(), which always operates on a scheduled-out vCPU,
    where v->arch.user_regs (or aux_regs on the stack) is always in-bounds.

 2. show_registers() where regs is always an on-stack frame.  regs is copied
    into a local variable first (which is an OoB read for constructs such as
    WARN()), before being modified (so no OoB write).

 3. do_double_fault(), where regs is adjacent to the stack guard page, and
    written into directly.  This is an out of bounds read and write, with a
    bodge to avoid the writes hitting the guard page.

Include the data segment selectors in struct extra_state, and use those fields
instead of the fields in regs.  This resolves the OoB write on the #DF path.

Resolve the OoB read in show_registers() by doing a partial memcpy() rather
than full structure copy.  This is temporary until we've finished untangling
the vm86 fields fully.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/x86_64/traps.c | 39 +++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
index 01b4f0623282..23622cdb1440 100644
--- a/xen/arch/x86/x86_64/traps.c
+++ b/xen/arch/x86/x86_64/traps.c
@@ -27,6 +27,7 @@ struct extra_state
 {
     unsigned long cr0, cr2, cr3, cr4;
     unsigned long fsb, gsb, gss;
+    uint16_t ds, es, fs, gs;
 };
 
 static void print_xen_info(void)
@@ -40,18 +41,21 @@ static void print_xen_info(void)
 
 enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest };
 
-static void read_registers(struct cpu_user_regs *regs, struct extra_state *state)
+static void read_registers(struct extra_state *state)
 {
     state->cr0 = read_cr0();
     state->cr2 = read_cr2();
     state->cr3 = read_cr3();
     state->cr4 = read_cr4();
 
-    read_sregs(regs);
-
     state->fsb = read_fs_base();
     state->gsb = read_gs_base();
     state->gss = read_gs_shadow();
+
+    asm ( "mov %%ds, %0" : "=m" (state->ds) );
+    asm ( "mov %%es, %0" : "=m" (state->es) );
+    asm ( "mov %%fs, %0" : "=m" (state->fs) );
+    asm ( "mov %%gs, %0" : "=m" (state->gs) );
 }
 
 static void get_hvm_registers(struct vcpu *v, struct cpu_user_regs *regs,
@@ -68,17 +72,17 @@ static void get_hvm_registers(struct vcpu *v, struct cpu_user_regs *regs,
     regs->cs = sreg.sel;
 
     hvm_get_segment_register(v, x86_seg_ds, &sreg);
-    regs->ds = sreg.sel;
+    state->ds = sreg.sel;
 
     hvm_get_segment_register(v, x86_seg_es, &sreg);
-    regs->es = sreg.sel;
+    state->es = sreg.sel;
 
     hvm_get_segment_register(v, x86_seg_fs, &sreg);
-    regs->fs = sreg.sel;
+    state->fs = sreg.sel;
     state->fsb = sreg.base;
 
     hvm_get_segment_register(v, x86_seg_gs, &sreg);
-    regs->gs = sreg.sel;
+    state->gs = sreg.sel;
     state->gsb = sreg.base;
 
     hvm_get_segment_register(v, x86_seg_ss, &sreg);
@@ -124,17 +128,23 @@ static void _show_registers(
            state->fsb, state->gsb, state->gss);
     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
            "ss: %04x   cs: %04x\n",
-           regs->ds, regs->es, regs->fs,
-           regs->gs, regs->ss, regs->cs);
+           state->ds, state->es, state->fs,
+           state->gs, regs->ss, regs->cs);
 }
 
 void show_registers(const struct cpu_user_regs *regs)
 {
-    struct cpu_user_regs fault_regs = *regs;
+    struct cpu_user_regs fault_regs;
     struct extra_state fault_state;
     enum context context;
     struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
 
+    /*
+     * Don't read beyond the end of the hardware frame.  It is out of bounds
+     * for WARN()/etc.
+     */
+    memcpy(&fault_regs, regs, offsetof(struct cpu_user_regs, es));
+
     if ( guest_mode(regs) && is_hvm_vcpu(v) )
     {
         get_hvm_registers(v, &fault_regs, &fault_state);
@@ -142,7 +152,7 @@ void show_registers(const struct cpu_user_regs *regs)
     }
     else
     {
-        read_registers(&fault_regs, &fault_state);
+        read_registers(&fault_state);
 
         if ( guest_mode(regs) )
         {
@@ -209,6 +219,11 @@ void vcpu_show_registers(struct vcpu *v)
         state.gsb = gsb;
         state.gss = gss;
 
+        state.ds = v->arch.user_regs.ds;
+        state.es = v->arch.user_regs.es;
+        state.fs = v->arch.user_regs.fs;
+        state.gs = v->arch.user_regs.gs;
+
         context = CTXT_pv_guest;
     }
 
@@ -291,7 +306,7 @@ void asmlinkage do_double_fault(struct cpu_user_regs *regs)
     printk("*** DOUBLE FAULT ***\n");
     print_xen_info();
 
-    read_registers(regs, &state);
+    read_registers(&state);
 
     printk("CPU:    %d\n", cpu);
     _show_registers(regs, &state, CTXT_hypervisor, NULL);
-- 
2.39.5


Re: [PATCH 3/8] x86/traps: Avoid OoB accesses to print the data selectors
Posted by Jan Beulich 9 months, 1 week ago
On 11.03.2025 22:10, Andrew Cooper wrote:
> _show_registers() prints the data selectors from struct cpu_user_regs, but
> these fields are sometimes out-of-bounds.  See commit 6065a05adf15
> ("x86/traps: 'Fix' safety of read_registers() in #DF path").
> 
> There are 3 callers of _show_registers():
> 
>  1. vcpu_show_registers(), which always operates on a scheduled-out vCPU,
>     where v->arch.user_regs (or aux_regs on the stack) is always in-bounds.
> 
>  2. show_registers() where regs is always an on-stack frame.  regs is copied
>     into a local variable first (which is an OoB read for constructs such as
>     WARN()), before being modified (so no OoB write).
> 
>  3. do_double_fault(), where regs is adjacent to the stack guard page, and
>     written into directly.  This is an out of bounds read and write, with a
>     bodge to avoid the writes hitting the guard page.
> 
> Include the data segment selectors in struct extra_state, and use those fields
> instead of the fields in regs.  This resolves the OoB write on the #DF path.
> 
> Resolve the OoB read in show_registers() by doing a partial memcpy() rather
> than full structure copy.  This is temporary until we've finished untangling
> the vm86 fields fully.
> 
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

> @@ -124,17 +128,23 @@ static void _show_registers(
>             state->fsb, state->gsb, state->gss);
>      printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
>             "ss: %04x   cs: %04x\n",
> -           regs->ds, regs->es, regs->fs,
> -           regs->gs, regs->ss, regs->cs);
> +           state->ds, state->es, state->fs,
> +           state->gs, regs->ss, regs->cs);
>  }
>  
>  void show_registers(const struct cpu_user_regs *regs)
>  {
> -    struct cpu_user_regs fault_regs = *regs;
> +    struct cpu_user_regs fault_regs;
>      struct extra_state fault_state;
>      enum context context;
>      struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
>  
> +    /*
> +     * Don't read beyond the end of the hardware frame.  It is out of bounds
> +     * for WARN()/etc.
> +     */
> +    memcpy(&fault_regs, regs, offsetof(struct cpu_user_regs, es));

I don't like this (especially the assumption on es being special, much like
e.g. get_stack_bottom() also does) very much, but I hope this is going to
disappear at some point anyway.

Jan
Re: [PATCH 3/8] x86/traps: Avoid OoB accesses to print the data selectors
Posted by Andrew Cooper 9 months, 1 week ago
On 17/03/2025 11:00 am, Jan Beulich wrote:
> On 11.03.2025 22:10, Andrew Cooper wrote:
>> _show_registers() prints the data selectors from struct cpu_user_regs, but
>> these fields are sometimes out-of-bounds.  See commit 6065a05adf15
>> ("x86/traps: 'Fix' safety of read_registers() in #DF path").
>>
>> There are 3 callers of _show_registers():
>>
>>  1. vcpu_show_registers(), which always operates on a scheduled-out vCPU,
>>     where v->arch.user_regs (or aux_regs on the stack) is always in-bounds.
>>
>>  2. show_registers() where regs is always an on-stack frame.  regs is copied
>>     into a local variable first (which is an OoB read for constructs such as
>>     WARN()), before being modified (so no OoB write).
>>
>>  3. do_double_fault(), where regs is adjacent to the stack guard page, and
>>     written into directly.  This is an out of bounds read and write, with a
>>     bodge to avoid the writes hitting the guard page.
>>
>> Include the data segment selectors in struct extra_state, and use those fields
>> instead of the fields in regs.  This resolves the OoB write on the #DF path.
>>
>> Resolve the OoB read in show_registers() by doing a partial memcpy() rather
>> than full structure copy.  This is temporary until we've finished untangling
>> the vm86 fields fully.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>
>> @@ -124,17 +128,23 @@ static void _show_registers(
>>             state->fsb, state->gsb, state->gss);
>>      printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
>>             "ss: %04x   cs: %04x\n",
>> -           regs->ds, regs->es, regs->fs,
>> -           regs->gs, regs->ss, regs->cs);
>> +           state->ds, state->es, state->fs,
>> +           state->gs, regs->ss, regs->cs);
>>  }
>>  
>>  void show_registers(const struct cpu_user_regs *regs)
>>  {
>> -    struct cpu_user_regs fault_regs = *regs;
>> +    struct cpu_user_regs fault_regs;
>>      struct extra_state fault_state;
>>      enum context context;
>>      struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
>>  
>> +    /*
>> +     * Don't read beyond the end of the hardware frame.  It is out of bounds
>> +     * for WARN()/etc.
>> +     */
>> +    memcpy(&fault_regs, regs, offsetof(struct cpu_user_regs, es));
> I don't like this (especially the assumption on es being special, much like
> e.g. get_stack_bottom() also does) very much, but I hope this is going to
> disappear at some point anyway.

As noted, it's temporary, and goes away in patch 8.

~Andrew