show_guest_stack() does nothing for HVM. Introduce a HVM-specific
dumping function, paralleling the 64- and 32-bit PV ones. We don't know
the real stack size, so only dump up to the next page boundary.
Rather than adding a vcpu parameter to hvm_copy_from_guest_linear(),
introduce hvm_copy_from_vcpu_linear() which - for now at least - in
return won't need a "pfinfo" parameter.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
TBD: The bypassing of the output interleaving avoidance isn't nice, but
I've not been able to think of an alternative. Avoiding the call to
hvm_vcpu_virtual_to_linear() would be in principle possible (adding
in the SS base directly), but one way or another we need to access
guest memory and hence can't sensibly avoid using the P2M layer.
However, commit 0996e0f38540 ("x86/traps: prevent interleaving of
concurrent cpu state dumps") introduced this logic here while
really only talking about show_execution_state().
vcpu_show_execution_state() is imo much less prone to interleaving
of its output: It's uses from the keyhandler are sequential already
anyway, and the only other use is from hvm_triple_fault(). Instead
of making the locking conditional, it may therefore be an option to
drop it again altogether.
TBD: For now this dumps also user mode stacks. We may want to restrict
this.
TBD: An alternative to putting this next to {,compat_}show_guest_stack()
is to put it in hvm.c, eliminating the need to introduce
hvm_copy_from_vcpu_linear(), but then requiring extra parameters to
be passed.
TBD: Technically this makes unnecessary the earlier added entering/
leaving if the VMCS. Yet to avoid a series of non-trivial
enter/exit pairs, I think leaving that in is still beneficial. In
which case here perhaps merely the associate comment may want
tweaking.
---
v3: New.
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -3408,6 +3408,15 @@ enum hvm_translation_result hvm_copy_fro
PFEC_page_present | pfec, pfinfo);
}
+enum hvm_translation_result hvm_copy_from_vcpu_linear(
+ void *buf, unsigned long addr, unsigned int size, struct vcpu *v,
+ unsigned int pfec)
+{
+ return __hvm_copy(buf, addr, size, v,
+ HVMCOPY_from_guest | HVMCOPY_linear,
+ PFEC_page_present | pfec, NULL);
+}
+
unsigned int copy_to_user_hvm(void *to, const void *from, unsigned int len)
{
int rc;
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -364,6 +364,71 @@ static void show_guest_stack(struct vcpu
printk("\n");
}
+static void show_hvm_stack(struct vcpu *v, const struct cpu_user_regs *regs)
+{
+#ifdef CONFIG_HVM
+ unsigned long sp = regs->rsp, addr;
+ unsigned int i, bytes, words_per_line, pfec = PFEC_page_present;
+ struct segment_register ss, cs;
+
+ hvm_get_segment_register(v, x86_seg_ss, &ss);
+ hvm_get_segment_register(v, x86_seg_cs, &cs);
+
+ if ( hvm_long_mode_active(v) && cs.l )
+ i = 16, bytes = 8;
+ else
+ {
+ sp = ss.db ? (uint32_t)sp : (uint16_t)sp;
+ i = ss.db ? 8 : 4;
+ bytes = cs.db ? 4 : 2;
+ }
+
+ if ( bytes == 8 || (ss.db && !ss.base) )
+ printk("Guest stack trace from sp=%0*lx:", i, sp);
+ else
+ printk("Guest stack trace from ss:sp=%04x:%0*lx:", ss.sel, i, sp);
+
+ if ( !hvm_vcpu_virtual_to_linear(v, x86_seg_ss, &ss, sp, bytes,
+ hvm_access_read, &cs, &addr) )
+ {
+ printk(" Guest-inaccessible memory\n");
+ return;
+ }
+
+ if ( ss.dpl == 3 )
+ pfec |= PFEC_user_mode;
+
+ words_per_line = stack_words_per_line * (sizeof(void *) / bytes);
+ for ( i = 0; i < debug_stack_lines * words_per_line; )
+ {
+ unsigned long val = 0;
+
+ if ( (addr ^ (addr + bytes - 1)) & PAGE_SIZE )
+ break;
+
+ if ( !(i++ % words_per_line) )
+ printk("\n ");
+
+ if ( hvm_copy_from_vcpu_linear(&val, addr, bytes, v,
+ pfec) != HVMTRANS_okay )
+ {
+ printk(" Fault while accessing guest memory.");
+ break;
+ }
+
+ printk(" %0*lx", 2 * bytes, val);
+
+ addr += bytes;
+ if ( !(addr & (PAGE_SIZE - 1)) )
+ break;
+ }
+
+ if ( !i )
+ printk(" Stack empty.");
+ printk("\n");
+#endif
+}
+
/*
* Notes for get_{stack,shstk}*_bottom() helpers
*
@@ -629,7 +694,7 @@ void show_execution_state(const struct c
void vcpu_show_execution_state(struct vcpu *v)
{
- unsigned long flags;
+ unsigned long flags = 0;
if ( test_bit(_VPF_down, &v->pause_flags) )
{
@@ -663,14 +728,22 @@ void vcpu_show_execution_state(struct vc
}
#endif
- /* Prevent interleaving of output. */
- flags = console_lock_recursive_irqsave();
+ /*
+ * Prevent interleaving of output if possible. For HVM we can't do so, as
+ * the necessary P2M lookups involve locking, which has to occur with IRQs
+ * enabled.
+ */
+ if ( !is_hvm_vcpu(v) )
+ flags = console_lock_recursive_irqsave();
vcpu_show_registers(v);
- if ( guest_kernel_mode(v, &v->arch.user_regs) )
+ if ( is_hvm_vcpu(v) )
+ show_hvm_stack(v, &v->arch.user_regs);
+ else if ( guest_kernel_mode(v, &v->arch.user_regs) )
show_guest_stack(v, &v->arch.user_regs);
- console_unlock_recursive_irqrestore(flags);
+ if ( !is_hvm_vcpu(v) )
+ console_unlock_recursive_irqrestore(flags);
#ifdef CONFIG_HVM
if ( cpu_has_vmx && is_hvm_vcpu(v) )
--- a/xen/include/asm-x86/hvm/support.h
+++ b/xen/include/asm-x86/hvm/support.h
@@ -101,6 +101,9 @@ enum hvm_translation_result hvm_copy_to_
enum hvm_translation_result hvm_copy_from_guest_linear(
void *buf, unsigned long addr, unsigned int size, uint32_t pfec,
pagefault_info_t *pfinfo);
+enum hvm_translation_result hvm_copy_from_vcpu_linear(
+ void *buf, unsigned long addr, unsigned int size, struct vcpu *v,
+ unsigned int pfec);
/*
* Get a reference on the page under an HVM physical or linear address. If
On Tue, Sep 21, 2021 at 09:20:00AM +0200, Jan Beulich wrote: > show_guest_stack() does nothing for HVM. Introduce a HVM-specific > dumping function, paralleling the 64- and 32-bit PV ones. We don't know > the real stack size, so only dump up to the next page boundary. > > Rather than adding a vcpu parameter to hvm_copy_from_guest_linear(), > introduce hvm_copy_from_vcpu_linear() which - for now at least - in > return won't need a "pfinfo" parameter. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > --- > TBD: The bypassing of the output interleaving avoidance isn't nice, but > I've not been able to think of an alternative. Avoiding the call to > hvm_vcpu_virtual_to_linear() would be in principle possible (adding > in the SS base directly), but one way or another we need to access > guest memory and hence can't sensibly avoid using the P2M layer. > However, commit 0996e0f38540 ("x86/traps: prevent interleaving of > concurrent cpu state dumps") introduced this logic here while > really only talking about show_execution_state(). > vcpu_show_execution_state() is imo much less prone to interleaving > of its output: It's uses from the keyhandler are sequential already > anyway, and the only other use is from hvm_triple_fault(). Instead > of making the locking conditional, it may therefore be an option to > drop it again altogether. > TBD: For now this dumps also user mode stacks. We may want to restrict > this. > TBD: An alternative to putting this next to {,compat_}show_guest_stack() > is to put it in hvm.c, eliminating the need to introduce > hvm_copy_from_vcpu_linear(), but then requiring extra parameters to > be passed. > TBD: Technically this makes unnecessary the earlier added entering/ > leaving if the VMCS. Yet to avoid a series of non-trivial > enter/exit pairs, I think leaving that in is still beneficial. In > which case here perhaps merely the associate comment may want > tweaking. > --- > v3: New. > > --- a/xen/arch/x86/hvm/hvm.c > +++ b/xen/arch/x86/hvm/hvm.c > @@ -3408,6 +3408,15 @@ enum hvm_translation_result hvm_copy_fro > PFEC_page_present | pfec, pfinfo); > } > > +enum hvm_translation_result hvm_copy_from_vcpu_linear( > + void *buf, unsigned long addr, unsigned int size, struct vcpu *v, > + unsigned int pfec) Even if your current use case doesn't need it, would it be worth adding a pagefault_info_t parameter? > +{ > + return __hvm_copy(buf, addr, size, v, > + HVMCOPY_from_guest | HVMCOPY_linear, > + PFEC_page_present | pfec, NULL); > +} > + > unsigned int copy_to_user_hvm(void *to, const void *from, unsigned int len) > { > int rc; > --- a/xen/arch/x86/traps.c > +++ b/xen/arch/x86/traps.c > @@ -364,6 +364,71 @@ static void show_guest_stack(struct vcpu > printk("\n"); > } > > +static void show_hvm_stack(struct vcpu *v, const struct cpu_user_regs *regs) > +{ > +#ifdef CONFIG_HVM > + unsigned long sp = regs->rsp, addr; > + unsigned int i, bytes, words_per_line, pfec = PFEC_page_present; > + struct segment_register ss, cs; > + > + hvm_get_segment_register(v, x86_seg_ss, &ss); > + hvm_get_segment_register(v, x86_seg_cs, &cs); > + > + if ( hvm_long_mode_active(v) && cs.l ) > + i = 16, bytes = 8; > + else > + { > + sp = ss.db ? (uint32_t)sp : (uint16_t)sp; > + i = ss.db ? 8 : 4; > + bytes = cs.db ? 4 : 2; > + } > + > + if ( bytes == 8 || (ss.db && !ss.base) ) > + printk("Guest stack trace from sp=%0*lx:", i, sp); > + else > + printk("Guest stack trace from ss:sp=%04x:%0*lx:", ss.sel, i, sp); > + > + if ( !hvm_vcpu_virtual_to_linear(v, x86_seg_ss, &ss, sp, bytes, > + hvm_access_read, &cs, &addr) ) > + { > + printk(" Guest-inaccessible memory\n"); > + return; > + } > + > + if ( ss.dpl == 3 ) > + pfec |= PFEC_user_mode; > + > + words_per_line = stack_words_per_line * (sizeof(void *) / bytes); > + for ( i = 0; i < debug_stack_lines * words_per_line; ) > + { > + unsigned long val = 0; > + > + if ( (addr ^ (addr + bytes - 1)) & PAGE_SIZE ) > + break; > + > + if ( !(i++ % words_per_line) ) > + printk("\n "); > + > + if ( hvm_copy_from_vcpu_linear(&val, addr, bytes, v, > + pfec) != HVMTRANS_okay ) I think I'm confused, but what about guests without paging enabled? Don't you need to use hvm_copy_from_guest_phys (likely transformed into hvm_copy_from_vcpu_phys)? > + { > + printk(" Fault while accessing guest memory."); > + break; > + } > + > + printk(" %0*lx", 2 * bytes, val); > + > + addr += bytes; > + if ( !(addr & (PAGE_SIZE - 1)) ) > + break; > + } > + > + if ( !i ) > + printk(" Stack empty."); > + printk("\n"); > +#endif > +} > + > /* > * Notes for get_{stack,shstk}*_bottom() helpers > * > @@ -629,7 +694,7 @@ void show_execution_state(const struct c > > void vcpu_show_execution_state(struct vcpu *v) > { > - unsigned long flags; > + unsigned long flags = 0; > > if ( test_bit(_VPF_down, &v->pause_flags) ) > { > @@ -663,14 +728,22 @@ void vcpu_show_execution_state(struct vc > } > #endif > > - /* Prevent interleaving of output. */ > - flags = console_lock_recursive_irqsave(); > + /* > + * Prevent interleaving of output if possible. For HVM we can't do so, as > + * the necessary P2M lookups involve locking, which has to occur with IRQs > + * enabled. > + */ > + if ( !is_hvm_vcpu(v) ) > + flags = console_lock_recursive_irqsave(); > > vcpu_show_registers(v); > - if ( guest_kernel_mode(v, &v->arch.user_regs) ) > + if ( is_hvm_vcpu(v) ) > + show_hvm_stack(v, &v->arch.user_regs); Would it make sense to unlock in show_hvm_stack, and thus keep the printing of vcpu_show_registers locked even when in HVM context? TBH I've never found the guest stack dump to be helpful for debugging purposes, but maybe others do. Thanks, Roger.
On Thu, Sep 23, 2021 at 12:31:14PM +0200, Roger Pau Monné wrote: > On Tue, Sep 21, 2021 at 09:20:00AM +0200, Jan Beulich wrote: > > - /* Prevent interleaving of output. */ > > - flags = console_lock_recursive_irqsave(); > > + /* > > + * Prevent interleaving of output if possible. For HVM we can't do so, as > > + * the necessary P2M lookups involve locking, which has to occur with IRQs > > + * enabled. > > + */ > > + if ( !is_hvm_vcpu(v) ) > > + flags = console_lock_recursive_irqsave(); > > > > vcpu_show_registers(v); > > - if ( guest_kernel_mode(v, &v->arch.user_regs) ) > > + if ( is_hvm_vcpu(v) ) > > + show_hvm_stack(v, &v->arch.user_regs); > > Would it make sense to unlock in show_hvm_stack, and thus keep the > printing of vcpu_show_registers locked even when in HVM context? To clarify, the unlock should be limited around the call to show_hvm_stack, not to be performed inside of the function itself (since we would have to pass flags around then). Thanks, Roger.
On 23.09.2021 12:31, Roger Pau Monné wrote: > On Tue, Sep 21, 2021 at 09:20:00AM +0200, Jan Beulich wrote: >> --- a/xen/arch/x86/hvm/hvm.c >> +++ b/xen/arch/x86/hvm/hvm.c >> @@ -3408,6 +3408,15 @@ enum hvm_translation_result hvm_copy_fro >> PFEC_page_present | pfec, pfinfo); >> } >> >> +enum hvm_translation_result hvm_copy_from_vcpu_linear( >> + void *buf, unsigned long addr, unsigned int size, struct vcpu *v, >> + unsigned int pfec) > > Even if your current use case doesn't need it, would it be worth > adding a pagefault_info_t parameter? I'd prefer to add such parameters only once they become necessary. >> --- a/xen/arch/x86/traps.c >> +++ b/xen/arch/x86/traps.c >> @@ -364,6 +364,71 @@ static void show_guest_stack(struct vcpu >> printk("\n"); >> } >> >> +static void show_hvm_stack(struct vcpu *v, const struct cpu_user_regs *regs) >> +{ >> +#ifdef CONFIG_HVM >> + unsigned long sp = regs->rsp, addr; >> + unsigned int i, bytes, words_per_line, pfec = PFEC_page_present; >> + struct segment_register ss, cs; >> + >> + hvm_get_segment_register(v, x86_seg_ss, &ss); >> + hvm_get_segment_register(v, x86_seg_cs, &cs); >> + >> + if ( hvm_long_mode_active(v) && cs.l ) >> + i = 16, bytes = 8; >> + else >> + { >> + sp = ss.db ? (uint32_t)sp : (uint16_t)sp; >> + i = ss.db ? 8 : 4; >> + bytes = cs.db ? 4 : 2; >> + } >> + >> + if ( bytes == 8 || (ss.db && !ss.base) ) >> + printk("Guest stack trace from sp=%0*lx:", i, sp); >> + else >> + printk("Guest stack trace from ss:sp=%04x:%0*lx:", ss.sel, i, sp); >> + >> + if ( !hvm_vcpu_virtual_to_linear(v, x86_seg_ss, &ss, sp, bytes, >> + hvm_access_read, &cs, &addr) ) >> + { >> + printk(" Guest-inaccessible memory\n"); >> + return; >> + } >> + >> + if ( ss.dpl == 3 ) >> + pfec |= PFEC_user_mode; >> + >> + words_per_line = stack_words_per_line * (sizeof(void *) / bytes); >> + for ( i = 0; i < debug_stack_lines * words_per_line; ) >> + { >> + unsigned long val = 0; >> + >> + if ( (addr ^ (addr + bytes - 1)) & PAGE_SIZE ) >> + break; >> + >> + if ( !(i++ % words_per_line) ) >> + printk("\n "); >> + >> + if ( hvm_copy_from_vcpu_linear(&val, addr, bytes, v, >> + pfec) != HVMTRANS_okay ) > > I think I'm confused, but what about guests without paging enabled? > Don't you need to use hvm_copy_from_guest_phys (likely transformed > into hvm_copy_from_vcpu_phys)? __hvm_copy() calls hvm_translate_get_page() telling it whether the input is a linear or physical address. hvm_translate_get_page() will use paging_gva_to_gfn() in this case. The HAP backing function changes when the guest {en,dis}ables paging, while shadow code deals with paging disabled by installing an identity mapping (d->arch.paging.shadow.unpaged_pagetable) which it would then end up (needlessly) walking. It really is - afaict - intentional for callers to not have to deal with the special case. >> @@ -663,14 +728,22 @@ void vcpu_show_execution_state(struct vc >> } >> #endif >> >> - /* Prevent interleaving of output. */ >> - flags = console_lock_recursive_irqsave(); >> + /* >> + * Prevent interleaving of output if possible. For HVM we can't do so, as >> + * the necessary P2M lookups involve locking, which has to occur with IRQs >> + * enabled. >> + */ >> + if ( !is_hvm_vcpu(v) ) >> + flags = console_lock_recursive_irqsave(); >> >> vcpu_show_registers(v); >> - if ( guest_kernel_mode(v, &v->arch.user_regs) ) >> + if ( is_hvm_vcpu(v) ) >> + show_hvm_stack(v, &v->arch.user_regs); > > Would it make sense to unlock in show_hvm_stack, and thus keep the > printing of vcpu_show_registers locked even when in HVM context? Perhaps not _in_, but before calling it, yet - why not. > TBH I've never found the guest stack dump to be helpful for debugging > purposes, but maybe others do. I can't count how many times I did use the stack dumps of Dom0 to actually pinpoint problems. Jan
On Thu, Sep 23, 2021 at 12:47:26PM +0200, Jan Beulich wrote: > On 23.09.2021 12:31, Roger Pau Monné wrote: > > On Tue, Sep 21, 2021 at 09:20:00AM +0200, Jan Beulich wrote: > >> --- a/xen/arch/x86/hvm/hvm.c > >> +++ b/xen/arch/x86/hvm/hvm.c > >> @@ -3408,6 +3408,15 @@ enum hvm_translation_result hvm_copy_fro > >> PFEC_page_present | pfec, pfinfo); > >> } > >> > >> +enum hvm_translation_result hvm_copy_from_vcpu_linear( > >> + void *buf, unsigned long addr, unsigned int size, struct vcpu *v, > >> + unsigned int pfec) > > > > Even if your current use case doesn't need it, would it be worth > > adding a pagefault_info_t parameter? > > I'd prefer to add such parameters only once they become necessary. > > >> --- a/xen/arch/x86/traps.c > >> +++ b/xen/arch/x86/traps.c > >> @@ -364,6 +364,71 @@ static void show_guest_stack(struct vcpu > >> printk("\n"); > >> } > >> > >> +static void show_hvm_stack(struct vcpu *v, const struct cpu_user_regs *regs) > >> +{ > >> +#ifdef CONFIG_HVM > >> + unsigned long sp = regs->rsp, addr; > >> + unsigned int i, bytes, words_per_line, pfec = PFEC_page_present; > >> + struct segment_register ss, cs; > >> + > >> + hvm_get_segment_register(v, x86_seg_ss, &ss); > >> + hvm_get_segment_register(v, x86_seg_cs, &cs); > >> + > >> + if ( hvm_long_mode_active(v) && cs.l ) > >> + i = 16, bytes = 8; > >> + else > >> + { > >> + sp = ss.db ? (uint32_t)sp : (uint16_t)sp; > >> + i = ss.db ? 8 : 4; > >> + bytes = cs.db ? 4 : 2; > >> + } > >> + > >> + if ( bytes == 8 || (ss.db && !ss.base) ) > >> + printk("Guest stack trace from sp=%0*lx:", i, sp); > >> + else > >> + printk("Guest stack trace from ss:sp=%04x:%0*lx:", ss.sel, i, sp); > >> + > >> + if ( !hvm_vcpu_virtual_to_linear(v, x86_seg_ss, &ss, sp, bytes, > >> + hvm_access_read, &cs, &addr) ) > >> + { > >> + printk(" Guest-inaccessible memory\n"); > >> + return; > >> + } > >> + > >> + if ( ss.dpl == 3 ) > >> + pfec |= PFEC_user_mode; > >> + > >> + words_per_line = stack_words_per_line * (sizeof(void *) / bytes); > >> + for ( i = 0; i < debug_stack_lines * words_per_line; ) > >> + { > >> + unsigned long val = 0; > >> + > >> + if ( (addr ^ (addr + bytes - 1)) & PAGE_SIZE ) > >> + break; > >> + > >> + if ( !(i++ % words_per_line) ) > >> + printk("\n "); > >> + > >> + if ( hvm_copy_from_vcpu_linear(&val, addr, bytes, v, > >> + pfec) != HVMTRANS_okay ) > > > > I think I'm confused, but what about guests without paging enabled? > > Don't you need to use hvm_copy_from_guest_phys (likely transformed > > into hvm_copy_from_vcpu_phys)? > > __hvm_copy() calls hvm_translate_get_page() telling it whether the > input is a linear or physical address. hvm_translate_get_page() will > use paging_gva_to_gfn() in this case. The HAP backing function > changes when the guest {en,dis}ables paging, while shadow code deals > with paging disabled by installing an identity mapping > (d->arch.paging.shadow.unpaged_pagetable) which it would then end up > (needlessly) walking. > > It really is - afaict - intentional for callers to not have to deal > with the special case. I always forget that we change the paging_mode handlers when switching between modes. > >> @@ -663,14 +728,22 @@ void vcpu_show_execution_state(struct vc > >> } > >> #endif > >> > >> - /* Prevent interleaving of output. */ > >> - flags = console_lock_recursive_irqsave(); > >> + /* > >> + * Prevent interleaving of output if possible. For HVM we can't do so, as > >> + * the necessary P2M lookups involve locking, which has to occur with IRQs > >> + * enabled. > >> + */ > >> + if ( !is_hvm_vcpu(v) ) > >> + flags = console_lock_recursive_irqsave(); > >> > >> vcpu_show_registers(v); > >> - if ( guest_kernel_mode(v, &v->arch.user_regs) ) > >> + if ( is_hvm_vcpu(v) ) > >> + show_hvm_stack(v, &v->arch.user_regs); > > > > Would it make sense to unlock in show_hvm_stack, and thus keep the > > printing of vcpu_show_registers locked even when in HVM context? > > Perhaps not _in_, but before calling it, yet - why not. Indeed, with that: Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> Thanks, Roger.
© 2016 - 2024 Red Hat, Inc.