This patch introduces a small enhancement which allows to do a
direct wake-up of synchronize_rcu() callers. It occurs after a
completion of grace period, thus by the gp-kthread.
Number of clients is limited by the hard-coded maximum allowed
threshold. The remaining part, if still exists is deferred to
a main worker.
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
kernel/rcu/tree.c | 31 +++++++++++++++++++++++++++++--
kernel/rcu/tree.h | 6 ++++++
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 88a47a6a658e..96fe9cc122ca 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1642,7 +1642,8 @@ static DECLARE_WORK(sr_normal_gp_cleanup, rcu_sr_normal_gp_cleanup_work);
*/
static void rcu_sr_normal_gp_cleanup(void)
{
- struct llist_node *wait_tail;
+ struct llist_node *wait_tail, *next, *rcu;
+ int done = 0;
wait_tail = rcu_state.srs_wait_tail;
if (wait_tail == NULL)
@@ -1650,12 +1651,38 @@ static void rcu_sr_normal_gp_cleanup(void)
rcu_state.srs_wait_tail = NULL;
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
+ WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail));
+
+ /*
+ * Process (a) and (d) cases. See an illustration. Apart of
+ * that it handles the scenario when all clients are done,
+ * wait-head is released if last. The worker is not kicked.
+ */
+ llist_for_each_safe(rcu, next, wait_tail->next) {
+ if (rcu_sr_is_wait_head(rcu)) {
+ if (!rcu->next) {
+ rcu_sr_put_wait_head(rcu);
+ wait_tail->next = NULL;
+ } else {
+ wait_tail->next = rcu;
+ }
+
+ break;
+ }
+
+ rcu_sr_normal_complete(rcu);
+ // It can be last, update a next on this step.
+ wait_tail->next = next;
+
+ if (++done == SR_MAX_USERS_WAKE_FROM_GP)
+ break;
+ }
// concurrent sr_normal_gp_cleanup work might observe this update.
smp_store_release(&rcu_state.srs_done_tail, wait_tail);
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
- if (wait_tail)
+ if (wait_tail->next)
queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
}
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4c35d7d37647..5d8b71a1caec 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -316,6 +316,12 @@ do { \
__set_current_state(TASK_RUNNING); \
} while (0)
+/*
+ * A max threshold for synchronize_rcu() users which are
+ * awaken directly by the rcu_gp_kthread(). Left part is
+ * deferred to the main worker.
+ */
+#define SR_MAX_USERS_WAKE_FROM_GP 5
#define SR_NORMAL_GP_WAIT_HEAD_MAX 5
struct sr_wait_node {
--
2.39.2
>
> This patch introduces a small enhancement which allows to do a
> direct wake-up of synchronize_rcu() callers. It occurs after a
> completion of grace period, thus by the gp-kthread.
>
> Number of clients is limited by the hard-coded maximum allowed
> threshold. The remaining part, if still exists is deferred to
> a main worker.
>
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
> kernel/rcu/tree.c | 31 +++++++++++++++++++++++++++++--
> kernel/rcu/tree.h | 6 ++++++
> 2 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 88a47a6a658e..96fe9cc122ca 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1642,7 +1642,8 @@ static DECLARE_WORK(sr_normal_gp_cleanup, rcu_sr_normal_gp_cleanup_work);
> */
> static void rcu_sr_normal_gp_cleanup(void)
> {
> - struct llist_node *wait_tail;
> + struct llist_node *wait_tail, *next, *rcu;
> + int done = 0;
>
> wait_tail = rcu_state.srs_wait_tail;
> if (wait_tail == NULL)
> @@ -1650,12 +1651,38 @@ static void rcu_sr_normal_gp_cleanup(void)
>
> rcu_state.srs_wait_tail = NULL;
> ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
> + WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail));
> +
> + /*
> + * Process (a) and (d) cases. See an illustration. Apart of
> + * that it handles the scenario when all clients are done,
> + * wait-head is released if last. The worker is not kicked.
> + */
> + llist_for_each_safe(rcu, next, wait_tail->next) {
> + if (rcu_sr_is_wait_head(rcu)) {
> + if (!rcu->next) {
> + rcu_sr_put_wait_head(rcu);
> + wait_tail->next = NULL;
> + } else {
> + wait_tail->next = rcu;
> + }
> +
> + break;
> + }
> +
> + rcu_sr_normal_complete(rcu);
> + // It can be last, update a next on this step.
> + wait_tail->next = next;
> +
> + if (++done == SR_MAX_USERS_WAKE_FROM_GP)
> + break;
> + }
>
> // concurrent sr_normal_gp_cleanup work might observe this update.
> smp_store_release(&rcu_state.srs_done_tail, wait_tail);
> ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
>
> - if (wait_tail)
> + if (wait_tail->next)
> queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
>
I'm testing these patches :) , one question is as follows:
Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform
wake-up actions? avoid kworker creation failure under memory pressure, causing
the wake-up action to be delayed.
Thanks
Zqiang
> }
>
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 4c35d7d37647..5d8b71a1caec 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -316,6 +316,12 @@ do { \
> __set_current_state(TASK_RUNNING); \
> } while (0)
>
> +/*
> + * A max threshold for synchronize_rcu() users which are
> + * awaken directly by the rcu_gp_kthread(). Left part is
> + * deferred to the main worker.
> + */
> +#define SR_MAX_USERS_WAKE_FROM_GP 5
> #define SR_NORMAL_GP_WAIT_HEAD_MAX 5
>
> struct sr_wait_node {
> --
> 2.39.2
>
>
Hello, Zqiang. > > > > // concurrent sr_normal_gp_cleanup work might observe this update. > > smp_store_release(&rcu_state.srs_done_tail, wait_tail); > > ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail); > > > > - if (wait_tail) > > + if (wait_tail->next) > > queue_work(system_highpri_wq, &sr_normal_gp_cleanup); > > > > I'm testing these patches :) , one question is as follows: > Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform > wake-up actions? avoid kworker creation failure under memory pressure, causing > the wake-up action to be delayed. > I do not have any objections in not doing that, so we can add. Thank for testing this! -- Uladzislau Rezki
> Hello, Zqiang. > > > > > > > // concurrent sr_normal_gp_cleanup work might observe this update. > > > smp_store_release(&rcu_state.srs_done_tail, wait_tail); > > > ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail); > > > > > > - if (wait_tail) > > > + if (wait_tail->next) > > > queue_work(system_highpri_wq, &sr_normal_gp_cleanup); > > > > > > > I'm testing these patches :) , one question is as follows: > > Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform > > wake-up actions? avoid kworker creation failure under memory pressure, causing > > the wake-up action to be delayed. > > > I do not have any objections in not doing that, so we can add. > > Thank for testing this! > I forgot to ask, is your testing simulates a low memory condition so you see the failure you refer to? Or it is just a possible scenario? Thanks! -- Uladzislau Rezki
> > > Hello, Zqiang. > > > > > > > > > > // concurrent sr_normal_gp_cleanup work might observe this update. > > > > smp_store_release(&rcu_state.srs_done_tail, wait_tail); > > > > ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail); > > > > > > > > - if (wait_tail) > > > > + if (wait_tail->next) > > > > queue_work(system_highpri_wq, &sr_normal_gp_cleanup); > > > > > > > > > > I'm testing these patches :) , one question is as follows: > > > Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform > > > wake-up actions? avoid kworker creation failure under memory pressure, causing > > > the wake-up action to be delayed. > > > > > I do not have any objections in not doing that, so we can add. > > > > Thank for testing this! > > > I forgot to ask, is your testing simulates a low memory condition so > you see the failure you refer to? Or it is just a possible scenario? > I'm not currently testing this feature in low memory scenarios, I thought of this possible scenario. I will test it in a low memory scenario later and let you know if it happens :) . Thanks Zqiang > > Thanks! > > -- > Uladzislau Rezki >
© 2016 - 2025 Red Hat, Inc.