It is pointless to write all 6 entries and only consume the useful subset.
bloat-o-meter shows quite how obscene the overhead is in vmx_vmexit_handler(),
weighing in at 12% of the function arranging unread zeroes on the stack, and
8% for svm_vmexit_handler().
add/remove: 0/0 grow/shrink: 0/20 up/down: 0/-1929 (-1929)
Function old new delta
hvm_msr_write_intercept 1049 1033 -16
vmx_enable_intr_window 238 214 -24
svm_enable_intr_window 337 313 -24
hvmemul_write_xcr 115 91 -24
hvmemul_write_cr 350 326 -24
hvmemul_read_xcr 115 91 -24
hvmemul_read_cr 146 122 -24
hvm_mov_to_cr 438 414 -24
hvm_mov_from_cr 253 229 -24
vmx_intr_assist 1150 1118 -32
svm_intr_assist 459 427 -32
hvm_rdtsc_intercept 138 106 -32
hvm_msr_read_intercept 898 866 -32
vmx_vmenter_helper 1142 1094 -48
vmx_inject_event 813 765 -48
svm_vmenter_helper 238 187 -51
hvm_hlt 197 146 -51
svm_inject_event 1678 1614 -64
svm_vmexit_handler 5880 5392 -488
vmx_vmexit_handler 7281 6438 -843
Total: Before=3644277, After=3642348, chg -0.05%
Adjust all users of HVMTRACE_ND(), using TRC_PAR_LONG() where appropriate
instead of opencoding it.
The 0 case needs a little help. All object in C must have a unique address
and _d is passed by pointer. Explicitly permit the optimiser to drop the
array.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
v2:
* Adjust callers of HVMTRACE_ND() too
* Drop _d[] for the 0 case.
Normally I wouldn't recommend patches like this for backport, but
{vmx,svm}_vmexit_handler() are fastpaths and this is a *lot* of I-cache lines
dropped...
---
xen/arch/x86/hvm/svm/svm.c | 8 +++-----
xen/arch/x86/hvm/vmx/vmx.c | 9 ++++-----
xen/include/asm-x86/hvm/trace.h | 28 ++++++++++------------------
3 files changed, 17 insertions(+), 28 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index afb1ccb342c2..f0e10dec046e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1052,7 +1052,7 @@ void svm_vmenter_helper(const struct cpu_user_regs *regs)
if ( unlikely(tb_init_done) )
HVMTRACE_ND(VMENTRY,
nestedhvm_vcpu_in_guestmode(curr) ? TRC_HVM_NESTEDFLAG : 0,
- 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+ 1/*cycles*/);
svm_sync_vmcb(curr, vmcb_needs_vmsave);
@@ -2565,12 +2565,10 @@ void svm_vmexit_handler(struct cpu_user_regs *regs)
if ( hvm_long_mode_active(v) )
HVMTRACE_ND(VMEXIT64, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
- 1/*cycles*/, 3, exit_reason,
- regs->eip, regs->rip >> 32, 0, 0, 0);
+ 1/*cycles*/, exit_reason, TRC_PAR_LONG(regs->rip));
else
HVMTRACE_ND(VMEXIT, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
- 1/*cycles*/, 2, exit_reason,
- regs->eip, 0, 0, 0, 0);
+ 1/*cycles*/, exit_reason, regs->eip);
if ( vcpu_guestmode )
{
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b0a42d05f86a..d403e2d8060a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3864,11 +3864,10 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
__vmread(VM_EXIT_REASON, &exit_reason);
if ( hvm_long_mode_active(v) )
- HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, 3, exit_reason,
- regs->eip, regs->rip >> 32, 0, 0, 0);
+ HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, exit_reason,
+ TRC_PAR_LONG(regs->rip));
else
- HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, 2, exit_reason,
- regs->eip, 0, 0, 0, 0);
+ HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, exit_reason, regs->eip);
perfc_incra(vmexits, exit_reason);
@@ -4645,7 +4644,7 @@ bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
if ( unlikely(curr->arch.hvm.vmx.lbr_flags & LBR_FIXUP_MASK) )
lbr_fixup();
- HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+ HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/);
__vmwrite(GUEST_RIP, regs->rip);
__vmwrite(GUEST_RSP, regs->rsp);
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index 5cd459b855b7..145b59f6ac65 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -67,38 +67,30 @@
#define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
TRACE_6D(_e, d1, d2, d3, d4)
-#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
+#define HVMTRACE_ND(evt, modifier, cycles, ...) \
do { \
if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt ) \
{ \
- struct { \
- u32 d[6]; \
- } _d; \
- _d.d[0]=(d1); \
- _d.d[1]=(d2); \
- _d.d[2]=(d3); \
- _d.d[3]=(d4); \
- _d.d[4]=(d5); \
- _d.d[5]=(d6); \
+ uint32_t _d[] = { __VA_ARGS__ }; \
__trace_var(TRC_HVM_ ## evt | (modifier), cycles, \
- sizeof(*_d.d) * count, &_d); \
+ sizeof(_d), sizeof(_d) ? _d : NULL); \
} \
} while(0)
#define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6) \
- HVMTRACE_ND(evt, 0, 0, 6, d1, d2, d3, d4, d5, d6)
+ HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5, d6)
#define HVMTRACE_5D(evt, d1, d2, d3, d4, d5) \
- HVMTRACE_ND(evt, 0, 0, 5, d1, d2, d3, d4, d5, 0)
+ HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5)
#define HVMTRACE_4D(evt, d1, d2, d3, d4) \
- HVMTRACE_ND(evt, 0, 0, 4, d1, d2, d3, d4, 0, 0)
+ HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4)
#define HVMTRACE_3D(evt, d1, d2, d3) \
- HVMTRACE_ND(evt, 0, 0, 3, d1, d2, d3, 0, 0, 0)
+ HVMTRACE_ND(evt, 0, 0, d1, d2, d3)
#define HVMTRACE_2D(evt, d1, d2) \
- HVMTRACE_ND(evt, 0, 0, 2, d1, d2, 0, 0, 0, 0)
+ HVMTRACE_ND(evt, 0, 0, d1, d2)
#define HVMTRACE_1D(evt, d1) \
- HVMTRACE_ND(evt, 0, 0, 1, d1, 0, 0, 0, 0, 0)
+ HVMTRACE_ND(evt, 0, 0, d1)
#define HVMTRACE_0D(evt) \
- HVMTRACE_ND(evt, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ HVMTRACE_ND(evt, 0, 0)
#define HVMTRACE_LONG_1D(evt, d1) \
HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)
--
2.11.0
On 20.09.2021 19:25, Andrew Cooper wrote:
> v2:
> * Adjust callers of HVMTRACE_ND() too
What does this refer to? The sole difference to v1 that I can spot
is ...
> * Drop _d[] for the 0 case.
... the one corresponding to this line, i.e. ...
> --- a/xen/include/asm-x86/hvm/trace.h
> +++ b/xen/include/asm-x86/hvm/trace.h
> @@ -67,38 +67,30 @@
> #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
> TRACE_6D(_e, d1, d2, d3, d4)
>
> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
> +#define HVMTRACE_ND(evt, modifier, cycles, ...) \
> do { \
> if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt ) \
> { \
> - struct { \
> - u32 d[6]; \
> - } _d; \
> - _d.d[0]=(d1); \
> - _d.d[1]=(d2); \
> - _d.d[2]=(d3); \
> - _d.d[3]=(d4); \
> - _d.d[4]=(d5); \
> - _d.d[5]=(d6); \
> + uint32_t _d[] = { __VA_ARGS__ }; \
> __trace_var(TRC_HVM_ ## evt | (modifier), cycles, \
> - sizeof(*_d.d) * count, &_d); \
> + sizeof(_d), sizeof(_d) ? _d : NULL); \
... the addition of a conditional operator here (which I assume was
something a particular compiler didn't like in v1). FAOD - I'm fine
with the change, but I fear I'm overlooking something (again).
Jan
On 21/09/2021 12:00, Jan Beulich wrote:
> On 20.09.2021 19:25, Andrew Cooper wrote:
>> v2:
>> * Adjust callers of HVMTRACE_ND() too
> What does this refer to? The sole difference to v1 that I can spot
> is ...
Oh - its me who was confused.
I thought I had failed to include the changes in vmx.c/svm.c in v1. In
which case, no change to that in v2.
>> * Drop _d[] for the 0 case.
> ... the one corresponding to this line, i.e. ...
>
>> --- a/xen/include/asm-x86/hvm/trace.h
>> +++ b/xen/include/asm-x86/hvm/trace.h
>> @@ -67,38 +67,30 @@
>> #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
>> TRACE_6D(_e, d1, d2, d3, d4)
>>
>> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
>> +#define HVMTRACE_ND(evt, modifier, cycles, ...) \
>> do { \
>> if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt ) \
>> { \
>> - struct { \
>> - u32 d[6]; \
>> - } _d; \
>> - _d.d[0]=(d1); \
>> - _d.d[1]=(d2); \
>> - _d.d[2]=(d3); \
>> - _d.d[3]=(d4); \
>> - _d.d[4]=(d5); \
>> - _d.d[5]=(d6); \
>> + uint32_t _d[] = { __VA_ARGS__ }; \
>> __trace_var(TRC_HVM_ ## evt | (modifier), cycles, \
>> - sizeof(*_d.d) * count, &_d); \
>> + sizeof(_d), sizeof(_d) ? _d : NULL); \
> ... the addition of a conditional operator here (which I assume was
> something a particular compiler didn't like in v1).
And was covered in the commit message:
> The 0 case needs a little help. All object in C must have a unique address
> and _d is passed by pointer. Explicitly permit the optimiser to drop the
> array.
> FAOD - I'm fine
> with the change, but I fear I'm overlooking something (again).
Thanks,
~Andrew
On 21.09.2021 17:38, Andrew Cooper wrote:
> On 21/09/2021 12:00, Jan Beulich wrote:
>> On 20.09.2021 19:25, Andrew Cooper wrote:
>>> v2:
>>> * Adjust callers of HVMTRACE_ND() too
>> What does this refer to? The sole difference to v1 that I can spot
>> is ...
>
> Oh - its me who was confused.
>
> I thought I had failed to include the changes in vmx.c/svm.c in v1. In
> which case, no change to that in v2.
Good:
Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>> * Drop _d[] for the 0 case.
>> ... the one corresponding to this line, i.e. ...
>>
>>> --- a/xen/include/asm-x86/hvm/trace.h
>>> +++ b/xen/include/asm-x86/hvm/trace.h
>>> @@ -67,38 +67,30 @@
>>> #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
>>> TRACE_6D(_e, d1, d2, d3, d4)
>>>
>>> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
>>> +#define HVMTRACE_ND(evt, modifier, cycles, ...) \
>>> do { \
>>> if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt ) \
>>> { \
>>> - struct { \
>>> - u32 d[6]; \
>>> - } _d; \
>>> - _d.d[0]=(d1); \
>>> - _d.d[1]=(d2); \
>>> - _d.d[2]=(d3); \
>>> - _d.d[3]=(d4); \
>>> - _d.d[4]=(d5); \
>>> - _d.d[5]=(d6); \
>>> + uint32_t _d[] = { __VA_ARGS__ }; \
>>> __trace_var(TRC_HVM_ ## evt | (modifier), cycles, \
>>> - sizeof(*_d.d) * count, &_d); \
>>> + sizeof(_d), sizeof(_d) ? _d : NULL); \
>> ... the addition of a conditional operator here (which I assume was
>> something a particular compiler didn't like in v1).
>
> And was covered in the commit message:
>
>> The 0 case needs a little help. All object in C must have a unique address
>> and _d is passed by pointer. Explicitly permit the optimiser to drop the
>> array.
Right, I had associated text and change this way. It was really just
the revision log which was confusing.
Jan
© 2016 - 2026 Red Hat, Inc.