.../admin-guide/kernel-parameters.txt | 6 + arch/x86/xen/multicalls.c | 120 ++++++++++++++---- 2 files changed, 99 insertions(+), 27 deletions(-)
Today Xen multicall debugging needs to be enabled via modifying a
define in a source file for getting debug data of multicall errors
encountered by users.
Switch multicall debugging to depend on a boot parameter "xen_mc_debug"
instead, enabling affected users to boot with the new parameter set in
order to get better diagnostics.
Add printing all arguments of a single call for better diagnostics.
Signed-off-by: Juergen Gross <jgross@suse.com>
---
.../admin-guide/kernel-parameters.txt | 6 +
arch/x86/xen/multicalls.c | 120 ++++++++++++++----
2 files changed, 99 insertions(+), 27 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 27ec49af1bf2..b33d048e01d8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -7427,6 +7427,12 @@
Crash from Xen panic notifier, without executing late
panic() code such as dumping handler.
+ xen_mc_debug [X86,XEN,EARLY]
+ Enable multicall debugging when running as a Xen PV guest.
+ Enabling this feature will reduce performance a little
+ bit, so it should only be enabled for obtaining extended
+ debug data in case of multicall errors.
+
xen_msr_safe= [X86,XEN,EARLY]
Format: <bool>
Select whether to always use non-faulting (safe) MSR
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 07054572297f..abea216f07f4 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -23,6 +23,8 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/debugfs.h>
+#include <linux/jump_label.h>
+#include <linux/printk.h>
#include <asm/xen/hypercall.h>
@@ -31,18 +33,12 @@
#define MC_BATCH 32
-#define MC_DEBUG 0
-
#define MC_ARGS (MC_BATCH * 16)
struct mc_buffer {
unsigned mcidx, argidx, cbidx;
struct multicall_entry entries[MC_BATCH];
-#if MC_DEBUG
- struct multicall_entry debug[MC_BATCH];
- void *caller[MC_BATCH];
-#endif
unsigned char args[MC_ARGS];
struct callback {
void (*fn)(void *);
@@ -50,13 +46,84 @@ struct mc_buffer {
} callbacks[MC_BATCH];
};
+struct mc_debug_data {
+ struct multicall_entry debug[MC_BATCH];
+ void *caller[MC_BATCH];
+ size_t argsz[MC_BATCH];
+};
+
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
+static struct mc_debug_data __percpu *mc_debug_data;
+static struct mc_debug_data mc_debug_data_early __initdata;
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
+static struct static_key mc_debug __ro_after_init;
+static bool mc_debug_enabled __initdata;
+
+static int __init xen_parse_mc_debug(char *arg)
+{
+ mc_debug_enabled = true;
+ static_key_slow_inc(&mc_debug);
+
+ return 0;
+}
+early_param("xen_mc_debug", xen_parse_mc_debug);
+
+static int __init mc_debug_enable(void)
+{
+ struct mc_debug_data __percpu *mcdb;
+ unsigned long flags;
+
+ if (!mc_debug_enabled)
+ return 0;
+
+ mcdb = alloc_percpu(struct mc_debug_data);
+ if (!mcdb) {
+ pr_err("xen_mc_debug inactive\n");
+ static_key_slow_dec(&mc_debug);
+ return -ENOMEM;
+ }
+
+ /* Be careful when switching to percpu debug data. */
+ local_irq_save(flags);
+ xen_mc_flush();
+ mc_debug_data = mcdb;
+ local_irq_restore(flags);
+
+ pr_info("xen_mc_debug active\n");
+
+ return 0;
+}
+early_initcall(mc_debug_enable);
+
+static void print_debug_data(struct mc_buffer *b, struct mc_debug_data *mcdb,
+ int idx)
+{
+ unsigned int arg;
+
+ pr_err(" call %2d: op=%lu result=%ld caller=%pS", idx + 1,
+ mcdb->debug[idx].op, b->entries[idx].result, mcdb->caller[idx]);
+ if (mcdb->argsz[idx]) {
+ pr_cont(" args=");
+ for (arg = 0; arg < mcdb->argsz[idx] / 8; arg++)
+ pr_cont("%lx ", mcdb->debug[idx].args[arg]);
+ }
+ pr_cont("\n");
+}
+
+static struct mc_debug_data * __ref get_mc_debug_ptr(void)
+{
+ if (likely(mc_debug_data))
+ return this_cpu_ptr(mc_debug_data);
+
+ return &mc_debug_data_early;
+}
+
void xen_mc_flush(void)
{
struct mc_buffer *b = this_cpu_ptr(&mc_buffer);
struct multicall_entry *mc;
+ struct mc_debug_data *mcdb = NULL;
int ret = 0;
unsigned long flags;
int i;
@@ -69,10 +136,11 @@ void xen_mc_flush(void)
trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
-#if MC_DEBUG
- memcpy(b->debug, b->entries,
- b->mcidx * sizeof(struct multicall_entry));
-#endif
+ if (static_key_false(&mc_debug)) {
+ mcdb = get_mc_debug_ptr();
+ memcpy(mcdb->debug, b->entries,
+ b->mcidx * sizeof(struct multicall_entry));
+ }
switch (b->mcidx) {
case 0:
@@ -104,20 +172,15 @@ void xen_mc_flush(void)
ret, b->mcidx, smp_processor_id());
for (i = 0; i < b->mcidx; i++) {
if (b->entries[i].result < 0) {
-#if MC_DEBUG
- pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n",
- i + 1,
- b->debug[i].op,
- b->debug[i].args[0],
- b->entries[i].result,
- b->caller[i]);
-#else
- pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
- i + 1,
- b->entries[i].op,
- b->entries[i].args[0],
- b->entries[i].result);
-#endif
+ if (static_key_false(&mc_debug)) {
+ print_debug_data(b, mcdb, i);
+ } else {
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n",
+ i + 1,
+ b->entries[i].op,
+ b->entries[i].args[0],
+ b->entries[i].result);
+ }
}
}
}
@@ -155,9 +218,12 @@ struct multicall_space __xen_mc_entry(size_t args)
}
ret.mc = &b->entries[b->mcidx];
-#if MC_DEBUG
- b->caller[b->mcidx] = __builtin_return_address(0);
-#endif
+ if (static_key_false(&mc_debug)) {
+ struct mc_debug_data *mcdb = get_mc_debug_ptr();
+
+ mcdb->caller[b->mcidx] = __builtin_return_address(0);
+ mcdb->argsz[b->mcidx] = args;
+ }
b->mcidx++;
ret.args = &b->args[argidx];
b->argidx = argidx + args;
--
2.35.3
On 7/3/24 7:56 AM, Juergen Gross wrote:
>
> #define MC_BATCH 32
>
> -#define MC_DEBUG 0
> -
> #define MC_ARGS (MC_BATCH * 16)
>
>
> struct mc_buffer {
> unsigned mcidx, argidx, cbidx;
> struct multicall_entry entries[MC_BATCH];
> -#if MC_DEBUG
> - struct multicall_entry debug[MC_BATCH];
> - void *caller[MC_BATCH];
> -#endif
> unsigned char args[MC_ARGS];
> struct callback {
> void (*fn)(void *);
> @@ -50,13 +46,84 @@ struct mc_buffer {
> } callbacks[MC_BATCH];
> };
>
> +struct mc_debug_data {
> + struct multicall_entry debug[MC_BATCH];
'entries'? It's a mc_debug_data's copy of mc_buffer's entries.
Also, would it be better to keep these fields as a struct of scalars and
instead have the percpu array of this struct? Otherwise there is a whole
bunch of [MC_BATCH] arrays, all of them really indexed by the same
value. (And while at it, there is no reason to have callbacks[MC_BATCH]
sized like that -- it has nothing to do with batch size and can probably
be made smaller)
> + void *caller[MC_BATCH];
> + size_t argsz[MC_BATCH];
> +};
> +
> static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
> +static struct mc_debug_data __percpu *mc_debug_data;
> +static struct mc_debug_data mc_debug_data_early __initdata;
How about (I think this should work):
static struct mc_debug_data __percpu *mc_debug_data __refdata =
&mc_debug_data_early;
Then you won't need get_mc_debug_ptr().
-boris
On 06.07.24 00:36, boris.ostrovsky@oracle.com wrote:
>
>
> On 7/3/24 7:56 AM, Juergen Gross wrote:
>
>> #define MC_BATCH 32
>> -#define MC_DEBUG 0
>> -
>> #define MC_ARGS (MC_BATCH * 16)
>> struct mc_buffer {
>> unsigned mcidx, argidx, cbidx;
>> struct multicall_entry entries[MC_BATCH];
>> -#if MC_DEBUG
>> - struct multicall_entry debug[MC_BATCH];
>> - void *caller[MC_BATCH];
>> -#endif
>> unsigned char args[MC_ARGS];
>> struct callback {
>> void (*fn)(void *);
>> @@ -50,13 +46,84 @@ struct mc_buffer {
>> } callbacks[MC_BATCH];
>> };
>> +struct mc_debug_data {
>> + struct multicall_entry debug[MC_BATCH];
>
> 'entries'? It's a mc_debug_data's copy of mc_buffer's entries.
Yes, this is better.
> Also, would it be better to keep these fields as a struct of scalars and instead
> have the percpu array of this struct? Otherwise there is a whole bunch of
> [MC_BATCH] arrays, all of them really indexed by the same value. (And while at
> it, there is no reason to have callbacks[MC_BATCH] sized like that -- it has
> nothing to do with batch size and can probably be made smaller)
As today the mc_buffer's entries are copied via a single memcpy(), there
are 3 options:
- make mc_debug_data a percpu pointer to a single array, requiring to
copy the mc_buffer's entries in a loop
- let struct mc_debug_data contain two arrays (entries[] and struct foo {}[],
with struct foo containing the other pointers/values)
- keep the layout as in my patch
Regarding the callbacks: I think the max number of callbacks is indeed MC_BATCH,
as for each batch member one callback might be requested. So I'd rather keep it
the way it is today.
>> + void *caller[MC_BATCH];
>> + size_t argsz[MC_BATCH];
>> +};
>> +
>> static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
>> +static struct mc_debug_data __percpu *mc_debug_data;
>> +static struct mc_debug_data mc_debug_data_early __initdata;
>
> How about (I think this should work):
>
> static struct mc_debug_data __percpu *mc_debug_data __refdata =
> &mc_debug_data_early;
>
> Then you won't need get_mc_debug_ptr().
I like this idea.
Juergen
On 7/8/24 5:04 AM, Jürgen Groß wrote:
> On 06.07.24 00:36, boris.ostrovsky@oracle.com wrote:
>>
>>
>
>> Also, would it be better to keep these fields as a struct of scalars
>> and instead have the percpu array of this struct? Otherwise there is a
>> whole bunch of [MC_BATCH] arrays, all of them really indexed by the
>> same value. (And while at it, there is no reason to have
>> callbacks[MC_BATCH] sized like that -- it has nothing to do with batch
>> size and can probably be made smaller)
>
> As today the mc_buffer's entries are copied via a single memcpy(), there
> are 3 options:
Ah yes, it's memcpy, I didn't think of that. Then leaving it as is is
the best.
>
> - make mc_debug_data a percpu pointer to a single array, requiring to
> copy the mc_buffer's entries in a loop
>
> - let struct mc_debug_data contain two arrays (entries[] and struct foo
> {}[],
> with struct foo containing the other pointers/values)
>
> - keep the layout as in my patch
>
> Regarding the callbacks: I think the max number of callbacks is indeed
> MC_BATCH,
> as for each batch member one callback might be requested. So I'd rather
> keep it
> the way it is today.
Right, I was trying to point out that it's the max number but I suspect
it usually is smaller -- we currently ask for a callback in fewer than
half of the cases where we submit a request.
-boris
© 2016 - 2025 Red Hat, Inc.