Hi all,
Today's linux-next merge of the rcu tree got a conflict in:
kernel/rcu/tree_exp.h
between commit:
9a30ceb4d93e ("rcu: Mark emergency sections in rcu stalls")
from the printk tree and commits:
34863005f96e ("rcu: Extract synchronize_rcu_expedited_stall() from synchronize_rcu_expedited_wait()")
c925e2f61399 ("rcu: Let dump_cpu_task() be used without preemption disabled")
from the rcu tree.
I fixed it up (I think - see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging. You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.
--
Cheers,
Stephen Rothwell
diff --cc kernel/rcu/tree_exp.h
index be2d251e84f8,c3266bf709d5..000000000000
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@@ -543,6 -542,67 +543,68 @@@ static bool synchronize_rcu_expedited_w
return false;
}
+ /*
+ * Print out an expedited RCU CPU stall warning message.
+ */
+ static void synchronize_rcu_expedited_stall(unsigned long jiffies_start, unsigned long j)
+ {
+ int cpu;
+ unsigned long mask;
+ int ndetected;
+ struct rcu_node *rnp;
+ struct rcu_node *rnp_root = rcu_get_root();
+
+ if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
+ pr_err("INFO: %s detected expedited stalls, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
+ return;
+ }
+ pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", rcu_state.name);
+ ndetected = 0;
+ rcu_for_each_leaf_node(rnp) {
+ ndetected += rcu_print_task_exp_stall(rnp);
+ for_each_leaf_node_possible_cpu(rnp, cpu) {
+ struct rcu_data *rdp;
+
+ mask = leaf_node_cpu_bit(rnp, cpu);
+ if (!(READ_ONCE(rnp->expmask) & mask))
+ continue;
+ ndetected++;
+ rdp = per_cpu_ptr(&rcu_data, cpu);
+ pr_cont(" %d-%c%c%c%c", cpu,
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
+ "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
+ }
+ }
+ pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+ j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask),
+ ".T"[!!data_race(rnp_root->exp_tasks)]);
+ if (ndetected) {
+ pr_err("blocking rcu_node structures (internal RCU debug):");
+ rcu_for_each_node_breadth_first(rnp) {
+ if (rnp == rnp_root)
+ continue; /* printed unconditionally */
+ if (sync_rcu_exp_done_unlocked(rnp))
+ continue;
+ pr_cont(" l=%u:%d-%d:%#lx/%c",
+ rnp->level, rnp->grplo, rnp->grphi, data_race(rnp->expmask),
+ ".T"[!!data_race(rnp->exp_tasks)]);
+ }
+ pr_cont("\n");
+ }
+ rcu_for_each_leaf_node(rnp) {
+ for_each_leaf_node_possible_cpu(rnp, cpu) {
+ mask = leaf_node_cpu_bit(rnp, cpu);
+ if (!(READ_ONCE(rnp->expmask) & mask))
+ continue;
+ dump_cpu_task(cpu);
++ nbcon_cpu_emergency_flush();
+ }
+ rcu_exp_print_detail_task_stall_rnp(rnp);
+ }
+ }
+
/*
* Wait for the expedited grace period to elapse, issuing any needed
* RCU CPU stall warnings along the way.
@@@ -597,60 -652,8 +657,11 @@@ static void synchronize_rcu_expedited_w
j = jiffies;
rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
- pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
- rcu_state.name);
- ndetected = 0;
- rcu_for_each_leaf_node(rnp) {
- ndetected += rcu_print_task_exp_stall(rnp);
- for_each_leaf_node_possible_cpu(rnp, cpu) {
- struct rcu_data *rdp;
-
- mask = leaf_node_cpu_bit(rnp, cpu);
- if (!(READ_ONCE(rnp->expmask) & mask))
- continue;
- ndetected++;
- rdp = per_cpu_ptr(&rcu_data, cpu);
- pr_cont(" %d-%c%c%c%c", cpu,
- "O."[!!cpu_online(cpu)],
- "o."[!!(rdp->grpmask & rnp->expmaskinit)],
- "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
- "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
- }
- }
- pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
- j - jiffies_start, rcu_state.expedited_sequence,
- data_race(rnp_root->expmask),
- ".T"[!!data_race(rnp_root->exp_tasks)]);
- if (ndetected) {
- pr_err("blocking rcu_node structures (internal RCU debug):");
- rcu_for_each_node_breadth_first(rnp) {
- if (rnp == rnp_root)
- continue; /* printed unconditionally */
- if (sync_rcu_exp_done_unlocked(rnp))
- continue;
- pr_cont(" l=%u:%d-%d:%#lx/%c",
- rnp->level, rnp->grplo, rnp->grphi,
- data_race(rnp->expmask),
- ".T"[!!data_race(rnp->exp_tasks)]);
- }
- pr_cont("\n");
- }
- rcu_for_each_leaf_node(rnp) {
- for_each_leaf_node_possible_cpu(rnp, cpu) {
- mask = leaf_node_cpu_bit(rnp, cpu);
- if (!(READ_ONCE(rnp->expmask) & mask))
- continue;
- preempt_disable(); // For smp_processor_id() in dump_cpu_task().
- dump_cpu_task(cpu);
- preempt_enable();
- nbcon_cpu_emergency_flush();
- }
- rcu_exp_print_detail_task_stall_rnp(rnp);
- }
+ synchronize_rcu_expedited_stall(jiffies_start, j);
jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
+
+ nbcon_cpu_emergency_exit();
+
panic_on_rcu_stall();
}
}
On Fri 2024-08-09 12:23:21, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the rcu tree got a conflict in:
>
> kernel/rcu/tree_exp.h
>
> between commit:
>
> 9a30ceb4d93e ("rcu: Mark emergency sections in rcu stalls")
>
> from the printk tree and commits:
>
> 34863005f96e ("rcu: Extract synchronize_rcu_expedited_stall() from synchronize_rcu_expedited_wait()")
> c925e2f61399 ("rcu: Let dump_cpu_task() be used without preemption disabled")
>
> from the rcu tree.
>
> I fixed it up (I think - see below) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging. You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
I have removed the conflicting commit from the printk tree for now.
The patchset has to be reworked. And the new version won't include
nbcon_cpu_emergency_flush().
Sigh, this conflict existed already before 6.11 merge window.
At that time, Paul decided the postpone the RCU change to make the life easier,
see https://lore.kernel.org/r/20240703131820.02eb8021@canb.auug.org.au
Unfortunately, Linus did not accept the printk changes during the
merge window for 6.11, see
https://lore.kernel.org/r/CAHk-=whU_woFnFN-3Jv2hNCmwLg_fkrT42AWwxm-=Ha5BmNX4w@mail.gmail.com
I am sorry for all inconveniences.
Best Regards,
Petr
Hi Stephen
On Fri, Aug 09, 2024 at 12:23:21PM +1000, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the rcu tree got a conflict in:
>
> kernel/rcu/tree_exp.h
>
> between commit:
>
> 9a30ceb4d93e ("rcu: Mark emergency sections in rcu stalls")
>
> from the printk tree and commits:
>
> 34863005f96e ("rcu: Extract synchronize_rcu_expedited_stall() from synchronize_rcu_expedited_wait()")
> c925e2f61399 ("rcu: Let dump_cpu_task() be used without preemption disabled")
>
> from the rcu tree.
>
> I fixed it up (I think - see below) and can carry the fix as
> necessary. This is now fixed as far as linux-next is concerned, but any
> non trivial conflicts should be mentioned to your upstream maintainer
> when your tree is submitted for merging. You may also want to consider
> cooperating with the maintainer of the conflicting tree to minimise any
> particularly complex conflicts.
>
Thank you! The resolution looks good to me. I will mention this conflict
during PR submission and coordinate with the maintainer of the printk
tree.
- Neeraj
> --
> Cheers,
> Stephen Rothwell
>
> diff --cc kernel/rcu/tree_exp.h
> index be2d251e84f8,c3266bf709d5..000000000000
> --- a/kernel/rcu/tree_exp.h
> +++ b/kernel/rcu/tree_exp.h
> @@@ -543,6 -542,67 +543,68 @@@ static bool synchronize_rcu_expedited_w
> return false;
> }
>
> + /*
> + * Print out an expedited RCU CPU stall warning message.
> + */
> + static void synchronize_rcu_expedited_stall(unsigned long jiffies_start, unsigned long j)
> + {
> + int cpu;
> + unsigned long mask;
> + int ndetected;
> + struct rcu_node *rnp;
> + struct rcu_node *rnp_root = rcu_get_root();
> +
> + if (READ_ONCE(csd_lock_suppress_rcu_stall) && csd_lock_is_stuck()) {
> + pr_err("INFO: %s detected expedited stalls, but suppressed full report due to a stuck CSD-lock.\n", rcu_state.name);
> + return;
> + }
> + pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", rcu_state.name);
> + ndetected = 0;
> + rcu_for_each_leaf_node(rnp) {
> + ndetected += rcu_print_task_exp_stall(rnp);
> + for_each_leaf_node_possible_cpu(rnp, cpu) {
> + struct rcu_data *rdp;
> +
> + mask = leaf_node_cpu_bit(rnp, cpu);
> + if (!(READ_ONCE(rnp->expmask) & mask))
> + continue;
> + ndetected++;
> + rdp = per_cpu_ptr(&rcu_data, cpu);
> + pr_cont(" %d-%c%c%c%c", cpu,
> + "O."[!!cpu_online(cpu)],
> + "o."[!!(rdp->grpmask & rnp->expmaskinit)],
> + "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
> + "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
> + }
> + }
> + pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
> + j - jiffies_start, rcu_state.expedited_sequence, data_race(rnp_root->expmask),
> + ".T"[!!data_race(rnp_root->exp_tasks)]);
> + if (ndetected) {
> + pr_err("blocking rcu_node structures (internal RCU debug):");
> + rcu_for_each_node_breadth_first(rnp) {
> + if (rnp == rnp_root)
> + continue; /* printed unconditionally */
> + if (sync_rcu_exp_done_unlocked(rnp))
> + continue;
> + pr_cont(" l=%u:%d-%d:%#lx/%c",
> + rnp->level, rnp->grplo, rnp->grphi, data_race(rnp->expmask),
> + ".T"[!!data_race(rnp->exp_tasks)]);
> + }
> + pr_cont("\n");
> + }
> + rcu_for_each_leaf_node(rnp) {
> + for_each_leaf_node_possible_cpu(rnp, cpu) {
> + mask = leaf_node_cpu_bit(rnp, cpu);
> + if (!(READ_ONCE(rnp->expmask) & mask))
> + continue;
> + dump_cpu_task(cpu);
> ++ nbcon_cpu_emergency_flush();
> + }
> + rcu_exp_print_detail_task_stall_rnp(rnp);
> + }
> + }
> +
> /*
> * Wait for the expedited grace period to elapse, issuing any needed
> * RCU CPU stall warnings along the way.
> @@@ -597,60 -652,8 +657,11 @@@ static void synchronize_rcu_expedited_w
> j = jiffies;
> rcu_stall_notifier_call_chain(RCU_STALL_NOTIFY_EXP, (void *)(j - jiffies_start));
> trace_rcu_stall_warning(rcu_state.name, TPS("ExpeditedStall"));
> - pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
> - rcu_state.name);
> - ndetected = 0;
> - rcu_for_each_leaf_node(rnp) {
> - ndetected += rcu_print_task_exp_stall(rnp);
> - for_each_leaf_node_possible_cpu(rnp, cpu) {
> - struct rcu_data *rdp;
> -
> - mask = leaf_node_cpu_bit(rnp, cpu);
> - if (!(READ_ONCE(rnp->expmask) & mask))
> - continue;
> - ndetected++;
> - rdp = per_cpu_ptr(&rcu_data, cpu);
> - pr_cont(" %d-%c%c%c%c", cpu,
> - "O."[!!cpu_online(cpu)],
> - "o."[!!(rdp->grpmask & rnp->expmaskinit)],
> - "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
> - "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
> - }
> - }
> - pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
> - j - jiffies_start, rcu_state.expedited_sequence,
> - data_race(rnp_root->expmask),
> - ".T"[!!data_race(rnp_root->exp_tasks)]);
> - if (ndetected) {
> - pr_err("blocking rcu_node structures (internal RCU debug):");
> - rcu_for_each_node_breadth_first(rnp) {
> - if (rnp == rnp_root)
> - continue; /* printed unconditionally */
> - if (sync_rcu_exp_done_unlocked(rnp))
> - continue;
> - pr_cont(" l=%u:%d-%d:%#lx/%c",
> - rnp->level, rnp->grplo, rnp->grphi,
> - data_race(rnp->expmask),
> - ".T"[!!data_race(rnp->exp_tasks)]);
> - }
> - pr_cont("\n");
> - }
> - rcu_for_each_leaf_node(rnp) {
> - for_each_leaf_node_possible_cpu(rnp, cpu) {
> - mask = leaf_node_cpu_bit(rnp, cpu);
> - if (!(READ_ONCE(rnp->expmask) & mask))
> - continue;
> - preempt_disable(); // For smp_processor_id() in dump_cpu_task().
> - dump_cpu_task(cpu);
> - preempt_enable();
> - nbcon_cpu_emergency_flush();
> - }
> - rcu_exp_print_detail_task_stall_rnp(rnp);
> - }
> + synchronize_rcu_expedited_stall(jiffies_start, j);
> jiffies_stall = 3 * rcu_exp_jiffies_till_stall_check() + 3;
> +
> + nbcon_cpu_emergency_exit();
> +
> panic_on_rcu_stall();
> }
> }
© 2016 - 2026 Red Hat, Inc.