[PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths

Andrew Cooper posted 8 patches 5 months, 1 week ago
There is a newer version of this series
[PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths
Posted by Andrew Cooper 5 months, 1 week ago
Use %r12 to hold an ist_exit boolean.  This register is zero elsewhere in the
entry/exit asm, so it only needs setting in the IST path.

As this is subtle and fragile, add check_ist_exit() to be used in debugging
builds to cross-check that the ist_exit boolean matches the entry vector.

Write check_ist_exit() it in C, because it's debug only and the logic more
complicated than I care about maintaining in asm.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/arch/x86/traps.c               | 13 +++++++++++++
 xen/arch/x86/x86_64/compat/entry.S |  9 ++++++++-
 xen/arch/x86/x86_64/entry.S        | 23 ++++++++++++++++++++---
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index dead728ce329..0a005f088bca 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2259,6 +2259,19 @@ void asm_domain_crash_synchronous(unsigned long addr)
         do_softirq();
 }
 
+#ifdef CONFIG_DEBUG
+void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit)
+{
+    const unsigned int ist_mask =
+        (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) |
+        (1U << X86_EXC_DF)  | (1U << X86_EXC_MC);
+    uint8_t ev = regs->entry_vector;
+    bool is_ist = (ev < X86_EXC_NUM) && ((1U << ev) & ist_mask);
+
+    ASSERT(is_ist == ist_exit);
+}
+#endif
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
index bd5abd8040bd..7504bfb4f326 100644
--- a/xen/arch/x86/x86_64/compat/entry.S
+++ b/xen/arch/x86/x86_64/compat/entry.S
@@ -117,8 +117,15 @@ compat_process_trap:
         call  compat_create_bounce_frame
         jmp   compat_test_all_events
 
-/* %rbx: struct vcpu, interrupts disabled */
+/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
 ENTRY(compat_restore_all_guest)
+
+#ifdef CONFIG_DEBUG
+        mov   %rsp, %rdi
+        mov   %r12, %rsi
+        call  check_ist_exit
+#endif
+
         ASSERT_INTERRUPTS_DISABLED
         mov   $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11d
         and   UREGS_eflags(%rsp),%r11d
diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
index 525877e97330..da084a7e8e54 100644
--- a/xen/arch/x86/x86_64/entry.S
+++ b/xen/arch/x86/x86_64/entry.S
@@ -142,10 +142,16 @@ process_trap:
 
         .section .text.entry, "ax", @progbits
 
-/* %rbx: struct vcpu, interrupts disabled */
+/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
 restore_all_guest:
-        ASSERT_INTERRUPTS_DISABLED
 
+#ifdef CONFIG_DEBUG
+        mov   %rsp, %rdi
+        mov   %r12, %rsi
+        call  check_ist_exit
+#endif
+
+        ASSERT_INTERRUPTS_DISABLED
         /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
         mov VCPU_arch_msrs(%rbx), %rdx
         mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
@@ -659,8 +665,15 @@ ENTRY(early_page_fault)
         .section .text.entry, "ax", @progbits
 
         ALIGN
-/* No special register assumptions. */
+/* %r12=ist_exit */
 restore_all_xen:
+
+#ifdef CONFIG_DEBUG
+        mov   %rsp, %rdi
+        mov   %r12, %rsi
+        call  check_ist_exit
+#endif
+
         /*
          * Check whether we need to switch to the per-CPU page tables, in
          * case we return to late PV exit code (from an NMI or #MC).
@@ -1087,6 +1100,10 @@ handle_ist_exception:
 .L_ist_dispatch_done:
         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
         mov   %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
+
+        /* This is an IST exit */
+        mov   $1, %r12
+
         cmpb  $X86_EXC_NMI, UREGS_entry_vector(%rsp)
         jne   ret_from_intr
 
-- 
2.30.2


Re: [PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths
Posted by Jan Beulich 5 months, 1 week ago
On 13.09.2023 22:27, Andrew Cooper wrote:
> --- a/xen/arch/x86/x86_64/compat/entry.S
> +++ b/xen/arch/x86/x86_64/compat/entry.S
> @@ -117,8 +117,15 @@ compat_process_trap:
>          call  compat_create_bounce_frame
>          jmp   compat_test_all_events
>  
> -/* %rbx: struct vcpu, interrupts disabled */
> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>  ENTRY(compat_restore_all_guest)
> +
> +#ifdef CONFIG_DEBUG
> +        mov   %rsp, %rdi
> +        mov   %r12, %rsi
> +        call  check_ist_exit
> +#endif
> +
>          ASSERT_INTERRUPTS_DISABLED
>          mov   $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11d
>          and   UREGS_eflags(%rsp),%r11d

Without having peeked ahead, is there any use of %r12 going to appear
on this path? I thought it's only going to be restore_all_xen?

> --- a/xen/arch/x86/x86_64/entry.S
> +++ b/xen/arch/x86/x86_64/entry.S
> @@ -142,10 +142,16 @@ process_trap:
>  
>          .section .text.entry, "ax", @progbits
>  
> -/* %rbx: struct vcpu, interrupts disabled */
> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>  restore_all_guest:
> -        ASSERT_INTERRUPTS_DISABLED
>  
> +#ifdef CONFIG_DEBUG
> +        mov   %rsp, %rdi
> +        mov   %r12, %rsi
> +        call  check_ist_exit
> +#endif
> +
> +        ASSERT_INTERRUPTS_DISABLED
>          /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
>          mov VCPU_arch_msrs(%rbx), %rdx
>          mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d

Even here I don't think I see a need for the addition. Plus if the check
is warranted here, is it really necessary for it to live ahead of the
interrupts-disabled check? Further, seeing that being marco-ized, would
it make sense to have a CHECK_IST_EXIT macro in case more than a single
use site remains?

> @@ -1087,6 +1100,10 @@ handle_ist_exception:
>  .L_ist_dispatch_done:
>          mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
>          mov   %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
> +
> +        /* This is an IST exit */
> +        mov   $1, %r12
> +
>          cmpb  $X86_EXC_NMI, UREGS_entry_vector(%rsp)
>          jne   ret_from_intr

Could I talk you into using a less than 7-byte insn here? "or $1, %r12"
would be 4 bytes, "mov $1, %r12b", "inc %r12", and "not %r12" would be
just 3. All have certain downsides, yes, but I wonder whether switching
isn't worth it. Even "mov $1, %r12d" would be at least one byte less,
without any downsides. And the OR and INC variants would allow the
remaining 63 bits to be used for another purpose down the road.

Jan
Re: [PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths
Posted by Andrew Cooper 5 months, 1 week ago
On 14/09/2023 10:32 am, Jan Beulich wrote:
> On 13.09.2023 22:27, Andrew Cooper wrote:
>> --- a/xen/arch/x86/x86_64/compat/entry.S
>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>> @@ -117,8 +117,15 @@ compat_process_trap:
>>          call  compat_create_bounce_frame
>>          jmp   compat_test_all_events
>>  
>> -/* %rbx: struct vcpu, interrupts disabled */
>> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>>  ENTRY(compat_restore_all_guest)
>> +
>> +#ifdef CONFIG_DEBUG
>> +        mov   %rsp, %rdi
>> +        mov   %r12, %rsi
>> +        call  check_ist_exit
>> +#endif
>> +
>>          ASSERT_INTERRUPTS_DISABLED
>>          mov   $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11d
>>          and   UREGS_eflags(%rsp),%r11d
> Without having peeked ahead, is there any use of %r12 going to appear
> on this path? I thought it's only going to be restore_all_xen?

For now, we only need to change behaviour based on ist_exit in
restore_all_xen.

But, we do get here in IST context, and I'm not interested in having to
re-do the analysis to determine if this is safe.  ist_exit is a global
property of exiting Xen, so should be kept correct from the outset.

>
>> --- a/xen/arch/x86/x86_64/entry.S
>> +++ b/xen/arch/x86/x86_64/entry.S
>> @@ -142,10 +142,16 @@ process_trap:
>>  
>>          .section .text.entry, "ax", @progbits
>>  
>> -/* %rbx: struct vcpu, interrupts disabled */
>> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>>  restore_all_guest:
>> -        ASSERT_INTERRUPTS_DISABLED
>>  
>> +#ifdef CONFIG_DEBUG
>> +        mov   %rsp, %rdi
>> +        mov   %r12, %rsi
>> +        call  check_ist_exit
>> +#endif
>> +
>> +        ASSERT_INTERRUPTS_DISABLED
>>          /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
>>          mov VCPU_arch_msrs(%rbx), %rdx
>>          mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
> Even here I don't think I see a need for the addition. Plus if the check
> is warranted here, is it really necessary for it to live ahead of the
> interrupts-disabled check?

What makes you think there is a relevance to the order of two assertions
in fully irqs-off code?

The checks are in the same order as the comment stating the invariants.

>  Further, seeing that being marco-ized, would
> it make sense to have a CHECK_IST_EXIT macro in case more than a single
> use site remains?
>
>> @@ -1087,6 +1100,10 @@ handle_ist_exception:
>>  .L_ist_dispatch_done:
>>          mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
>>          mov   %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14)
>> +
>> +        /* This is an IST exit */
>> +        mov   $1, %r12
>> +
>>          cmpb  $X86_EXC_NMI, UREGS_entry_vector(%rsp)
>>          jne   ret_from_intr
> Could I talk you into using a less than 7-byte insn here? "or $1, %r12"
> would be 4 bytes, "mov $1, %r12b", "inc %r12", and "not %r12" would be
> just 3. All have certain downsides, yes, but I wonder whether switching
> isn't worth it. Even "mov $1, %r12d" would be at least one byte less,
> without any downsides. And the OR and INC variants would allow the
> remaining 63 bits to be used for another purpose down the road.

This is a 2Hz-at-most path.  The size of one instruction is not
something to care about.

But I did mean to use the %r12d form, so I'll go with that.  Everything
else depends on the behaviour of earlier logic.

~Andrew

Re: [PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths
Posted by Jan Beulich 5 months, 1 week ago
On 14.09.2023 21:44, Andrew Cooper wrote:
> On 14/09/2023 10:32 am, Jan Beulich wrote:
>> On 13.09.2023 22:27, Andrew Cooper wrote:
>>> --- a/xen/arch/x86/x86_64/compat/entry.S
>>> +++ b/xen/arch/x86/x86_64/compat/entry.S
>>> @@ -117,8 +117,15 @@ compat_process_trap:
>>>          call  compat_create_bounce_frame
>>>          jmp   compat_test_all_events
>>>  
>>> -/* %rbx: struct vcpu, interrupts disabled */
>>> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>>>  ENTRY(compat_restore_all_guest)
>>> +
>>> +#ifdef CONFIG_DEBUG
>>> +        mov   %rsp, %rdi
>>> +        mov   %r12, %rsi
>>> +        call  check_ist_exit
>>> +#endif
>>> +
>>>          ASSERT_INTERRUPTS_DISABLED
>>>          mov   $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11d
>>>          and   UREGS_eflags(%rsp),%r11d
>> Without having peeked ahead, is there any use of %r12 going to appear
>> on this path? I thought it's only going to be restore_all_xen?
> 
> For now, we only need to change behaviour based on ist_exit in
> restore_all_xen.
> 
> But, we do get here in IST context, and I'm not interested in having to
> re-do the analysis to determine if this is safe.  ist_exit is a global
> property of exiting Xen, so should be kept correct from the outset.

Would be nice to mention this just-in-case aspect in the description.

>>> --- a/xen/arch/x86/x86_64/entry.S
>>> +++ b/xen/arch/x86/x86_64/entry.S
>>> @@ -142,10 +142,16 @@ process_trap:
>>>  
>>>          .section .text.entry, "ax", @progbits
>>>  
>>> -/* %rbx: struct vcpu, interrupts disabled */
>>> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>>>  restore_all_guest:
>>> -        ASSERT_INTERRUPTS_DISABLED
>>>  
>>> +#ifdef CONFIG_DEBUG
>>> +        mov   %rsp, %rdi
>>> +        mov   %r12, %rsi
>>> +        call  check_ist_exit
>>> +#endif
>>> +
>>> +        ASSERT_INTERRUPTS_DISABLED
>>>          /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
>>>          mov VCPU_arch_msrs(%rbx), %rdx
>>>          mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
>> Even here I don't think I see a need for the addition. Plus if the check
>> is warranted here, is it really necessary for it to live ahead of the
>> interrupts-disabled check?
> 
> What makes you think there is a relevance to the order of two assertions
> in fully irqs-off code?

You explicitly making it more churn than strictly needed. IOW I was
simply wondering whether I was overlooking some aspect.

> The checks are in the same order as the comment stating the invariants.

If that's the only criteria, then okay (but still slightly odd to
see more churn than necessary).

Jan

Re: [PATCH 6/8] x86/entry: Track the IST-ness of an entry for the exit paths
Posted by Andrew Cooper 5 months, 1 week ago
On 15/09/2023 8:13 am, Jan Beulich wrote:
> On 14.09.2023 21:44, Andrew Cooper wrote:
>> On 14/09/2023 10:32 am, Jan Beulich wrote:
>>> On 13.09.2023 22:27, Andrew Cooper wrote:
>>>> --- a/xen/arch/x86/x86_64/entry.S
>>>> +++ b/xen/arch/x86/x86_64/entry.S
>>>> @@ -142,10 +142,16 @@ process_trap:
>>>>  
>>>>          .section .text.entry, "ax", @progbits
>>>>  
>>>> -/* %rbx: struct vcpu, interrupts disabled */
>>>> +/* %rbx: struct vcpu, %r12: ist_exit, interrupts disabled */
>>>>  restore_all_guest:
>>>> -        ASSERT_INTERRUPTS_DISABLED
>>>>  
>>>> +#ifdef CONFIG_DEBUG
>>>> +        mov   %rsp, %rdi
>>>> +        mov   %r12, %rsi
>>>> +        call  check_ist_exit
>>>> +#endif
>>>> +
>>>> +        ASSERT_INTERRUPTS_DISABLED
>>>>          /* Stash guest SPEC_CTRL value while we can read struct vcpu. */
>>>>          mov VCPU_arch_msrs(%rbx), %rdx
>>>>          mov VCPUMSR_spec_ctrl_raw(%rdx), %r15d
>>> Even here I don't think I see a need for the addition. Plus if the check
>>> is warranted here, is it really necessary for it to live ahead of the
>>> interrupts-disabled check?
>> What makes you think there is a relevance to the order of two assertions
>> in fully irqs-off code?
> You explicitly making it more churn than strictly needed. IOW I was
> simply wondering whether I was overlooking some aspect.

That was just the diff algorithm after accidentally removing the newline
after the ASSERT.  I've undone that, and the hunk is simple additions
for the check, like it is in the other two hunks.

~Andrew