Currently psi_trigger_create() does a lot of things:
parses the user text input, allocates and initializes
the psi_trigger structure and turns on the trigger.
It does it slightly different for two existing types
of psi_triggers: system-wide and cgroup-wide.
In order to support a new type of psi triggers, which
will be owned by a bpf program and won't have a user's
text description, let's refactor psi_trigger_create().
1. Introduce psi_trigger_type enum:
currently PSI_SYSTEM and PSI_CGROUP are valid values.
2. Introduce psi_trigger_params structure to avoid passing
a large number of parameters to psi_trigger_create().
3. Move out the user's input parsing into the new
psi_trigger_parse() helper.
4. Move out the capabilities check into the new
psi_file_privileged() helper.
5. Stop relying on t->of for detecting trigger type.
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
include/linux/psi.h | 15 +++++--
include/linux/psi_types.h | 33 ++++++++++++++-
kernel/cgroup/cgroup.c | 14 ++++++-
kernel/sched/psi.c | 87 +++++++++++++++++++++++++--------------
4 files changed, 112 insertions(+), 37 deletions(-)
diff --git a/include/linux/psi.h b/include/linux/psi.h
index e0745873e3f2..8178e998d94b 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);
int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
- enum psi_res res, struct file *file,
- struct kernfs_open_file *of);
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ const struct psi_trigger_params *param);
void psi_trigger_destroy(struct psi_trigger *t);
__poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
poll_table *wait);
+static inline bool psi_file_privileged(struct file *file)
+{
+ /*
+ * Checking the privilege here on file->f_cred implies that a privileged user
+ * could open the file and delegate the write to an unprivileged one.
+ */
+ return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+}
+
#ifdef CONFIG_CGROUPS
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
{
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index f1fd3a8044e0..cea54121d9b9 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -121,7 +121,38 @@ struct psi_window {
u64 prev_growth;
};
+enum psi_trigger_type {
+ PSI_SYSTEM,
+ PSI_CGROUP,
+};
+
+struct psi_trigger_params {
+ /* Trigger type */
+ enum psi_trigger_type type;
+
+ /* Resources that workloads could be stalled on */
+ enum psi_res res;
+
+ /* True if all threads should be stalled to trigger */
+ bool full;
+
+ /* Threshold in us */
+ u32 threshold_us;
+
+ /* Window in us */
+ u32 window_us;
+
+ /* Privileged triggers are treated differently */
+ bool privileged;
+
+ /* Link to kernfs open file, only for PSI_CGROUP */
+ struct kernfs_open_file *of;
+};
+
struct psi_trigger {
+ /* Trigger type */
+ enum psi_trigger_type type;
+
/* PSI state being monitored by the trigger */
enum psi_states state;
@@ -137,7 +168,7 @@ struct psi_trigger {
/* Wait queue for polling */
wait_queue_head_t event_wait;
- /* Kernfs file for cgroup triggers */
+ /* Kernfs file for PSI_CGROUP triggers */
struct kernfs_open_file *of;
/* Pending event flag */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a723b7dc6e4e..9cd3c3a52c21 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3872,6 +3872,12 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
struct psi_trigger *new;
struct cgroup *cgrp;
struct psi_group *psi;
+ struct psi_trigger_params params;
+ int err;
+
+ err = psi_trigger_parse(¶ms, buf);
+ if (err)
+ return err;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
@@ -3887,7 +3893,13 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
}
psi = cgroup_psi(cgrp);
- new = psi_trigger_create(psi, buf, res, of->file, of);
+
+ params.type = PSI_CGROUP;
+ params.res = res;
+ params.privileged = psi_file_privileged(of->file);
+ params.of = of;
+
+ new = psi_trigger_create(psi, ¶ms);
if (IS_ERR(new)) {
cgroup_put(cgrp);
return PTR_ERR(new);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index ad04a5c3162a..e1d8eaeeff17 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -489,7 +489,7 @@ static void update_triggers(struct psi_group *group, u64 now,
/* Generate an event */
if (cmpxchg(&t->event, 0, 1) == 0) {
- if (t->of)
+ if (t->type == PSI_CGROUP)
kernfs_notify(t->of->kn);
else
wake_up_interruptible(&t->event_wait);
@@ -1281,74 +1281,87 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
return 0;
}
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
- enum psi_res res, struct file *file,
- struct kernfs_open_file *of)
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf)
{
- struct psi_trigger *t;
- enum psi_states state;
- u32 threshold_us;
- bool privileged;
- u32 window_us;
+ u32 threshold_us, window_us;
if (static_branch_likely(&psi_disabled))
- return ERR_PTR(-EOPNOTSUPP);
-
- /*
- * Checking the privilege here on file->f_cred implies that a privileged user
- * could open the file and delegate the write to an unprivileged one.
- */
- privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+ return -EOPNOTSUPP;
if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
- state = PSI_IO_SOME + res * 2;
+ params->full = false;
else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
- state = PSI_IO_FULL + res * 2;
+ params->full = true;
else
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
+
+ params->threshold_us = threshold_us;
+ params->window_us = window_us;
+ return 0;
+}
+
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ const struct psi_trigger_params *params)
+{
+ struct psi_trigger *t;
+ enum psi_states state;
+
+ if (static_branch_likely(&psi_disabled))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ state = params->full ? PSI_IO_FULL : PSI_IO_SOME;
+ state += params->res * 2;
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
- if (res == PSI_IRQ && --state != PSI_IRQ_FULL)
+ if (params->res == PSI_IRQ && --state != PSI_IRQ_FULL)
return ERR_PTR(-EINVAL);
#endif
if (state >= PSI_NONIDLE)
return ERR_PTR(-EINVAL);
- if (window_us == 0 || window_us > WINDOW_MAX_US)
+ if (params->window_us == 0 || params->window_us > WINDOW_MAX_US)
return ERR_PTR(-EINVAL);
/*
* Unprivileged users can only use 2s windows so that averages aggregation
* work is used, and no RT threads need to be spawned.
*/
- if (!privileged && window_us % 2000000)
+ if (!params->privileged && params->window_us % 2000000)
return ERR_PTR(-EINVAL);
/* Check threshold */
- if (threshold_us == 0 || threshold_us > window_us)
+ if (params->threshold_us == 0 || params->threshold_us > params->window_us)
return ERR_PTR(-EINVAL);
t = kmalloc(sizeof(*t), GFP_KERNEL);
if (!t)
return ERR_PTR(-ENOMEM);
+ t->type = params->type;
t->group = group;
t->state = state;
- t->threshold = threshold_us * NSEC_PER_USEC;
- t->win.size = window_us * NSEC_PER_USEC;
+ t->threshold = params->threshold_us * NSEC_PER_USEC;
+ t->win.size = params->window_us * NSEC_PER_USEC;
window_reset(&t->win, sched_clock(),
group->total[PSI_POLL][t->state], 0);
t->event = 0;
t->last_event_time = 0;
- t->of = of;
- if (!of)
+
+ switch (params->type) {
+ case PSI_SYSTEM:
init_waitqueue_head(&t->event_wait);
+ break;
+ case PSI_CGROUP:
+ t->of = params->of;
+ break;
+ }
+
t->pending_event = false;
- t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+ t->aggregator = params->privileged ? PSI_POLL : PSI_AVGS;
- if (privileged) {
+ if (params->privileged) {
mutex_lock(&group->rtpoll_trigger_lock);
if (!rcu_access_pointer(group->rtpoll_task)) {
@@ -1401,7 +1414,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
* being accessed later. Can happen if cgroup is deleted from under a
* polling process.
*/
- if (t->of)
+ if (t->type == PSI_CGROUP)
kernfs_notify(t->of->kn);
else
wake_up_interruptible(&t->event_wait);
@@ -1481,7 +1494,7 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
if (!t)
return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
- if (t->of)
+ if (t->type == PSI_CGROUP)
kernfs_generic_poll(t->of, wait);
else
poll_wait(file, &t->event_wait, wait);
@@ -1530,6 +1543,8 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
size_t buf_size;
struct seq_file *seq;
struct psi_trigger *new;
+ struct psi_trigger_params params;
+ int err;
if (static_branch_likely(&psi_disabled))
return -EOPNOTSUPP;
@@ -1543,6 +1558,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
buf[buf_size - 1] = '\0';
+ err = psi_trigger_parse(¶ms, buf);
+ if (err)
+ return err;
+
seq = file->private_data;
/* Take seq->lock to protect seq->private from concurrent writes */
@@ -1554,7 +1573,11 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
return -EBUSY;
}
- new = psi_trigger_create(&psi_system, buf, res, file, NULL);
+ params.type = PSI_SYSTEM;
+ params.res = res;
+ params.privileged = psi_file_privileged(file);
+
+ new = psi_trigger_create(&psi_system, ¶ms);
if (IS_ERR(new)) {
mutex_unlock(&seq->lock);
return PTR_ERR(new);
--
2.50.1
On Mon, Aug 18, 2025 at 10:02 AM Roman Gushchin
<roman.gushchin@linux.dev> wrote:
>
> Currently psi_trigger_create() does a lot of things:
> parses the user text input, allocates and initializes
> the psi_trigger structure and turns on the trigger.
> It does it slightly different for two existing types
> of psi_triggers: system-wide and cgroup-wide.
>
> In order to support a new type of psi triggers, which
> will be owned by a bpf program and won't have a user's
> text description, let's refactor psi_trigger_create().
>
> 1. Introduce psi_trigger_type enum:
> currently PSI_SYSTEM and PSI_CGROUP are valid values.
> 2. Introduce psi_trigger_params structure to avoid passing
> a large number of parameters to psi_trigger_create().
> 3. Move out the user's input parsing into the new
> psi_trigger_parse() helper.
> 4. Move out the capabilities check into the new
> psi_file_privileged() helper.
> 5. Stop relying on t->of for detecting trigger type.
It's worth noting that this is a pure core refactoring without any
functional change (hopefully :))
>
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> ---
> include/linux/psi.h | 15 +++++--
> include/linux/psi_types.h | 33 ++++++++++++++-
> kernel/cgroup/cgroup.c | 14 ++++++-
> kernel/sched/psi.c | 87 +++++++++++++++++++++++++--------------
> 4 files changed, 112 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/psi.h b/include/linux/psi.h
> index e0745873e3f2..8178e998d94b 100644
> --- a/include/linux/psi.h
> +++ b/include/linux/psi.h
> @@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
> void psi_memstall_leave(unsigned long *flags);
>
> int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
> - enum psi_res res, struct file *file,
> - struct kernfs_open_file *of);
> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
> + const struct psi_trigger_params *param);
> void psi_trigger_destroy(struct psi_trigger *t);
>
> __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
> poll_table *wait);
>
> +static inline bool psi_file_privileged(struct file *file)
> +{
> + /*
> + * Checking the privilege here on file->f_cred implies that a privileged user
> + * could open the file and delegate the write to an unprivileged one.
> + */
> + return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
> +}
> +
> #ifdef CONFIG_CGROUPS
> static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
> {
> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> index f1fd3a8044e0..cea54121d9b9 100644
> --- a/include/linux/psi_types.h
> +++ b/include/linux/psi_types.h
> @@ -121,7 +121,38 @@ struct psi_window {
> u64 prev_growth;
> };
>
> +enum psi_trigger_type {
> + PSI_SYSTEM,
> + PSI_CGROUP,
> +};
> +
> +struct psi_trigger_params {
> + /* Trigger type */
> + enum psi_trigger_type type;
> +
> + /* Resources that workloads could be stalled on */
I would describe this as "Resource to be monitored"
> + enum psi_res res;
> +
> + /* True if all threads should be stalled to trigger */
> + bool full;
> +
> + /* Threshold in us */
> + u32 threshold_us;
> +
> + /* Window in us */
> + u32 window_us;
> +
> + /* Privileged triggers are treated differently */
> + bool privileged;
> +
> + /* Link to kernfs open file, only for PSI_CGROUP */
> + struct kernfs_open_file *of;
> +};
> +
> struct psi_trigger {
> + /* Trigger type */
> + enum psi_trigger_type type;
> +
> /* PSI state being monitored by the trigger */
> enum psi_states state;
>
> @@ -137,7 +168,7 @@ struct psi_trigger {
> /* Wait queue for polling */
> wait_queue_head_t event_wait;
>
> - /* Kernfs file for cgroup triggers */
> + /* Kernfs file for PSI_CGROUP triggers */
> struct kernfs_open_file *of;
>
> /* Pending event flag */
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index a723b7dc6e4e..9cd3c3a52c21 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -3872,6 +3872,12 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
> struct psi_trigger *new;
> struct cgroup *cgrp;
> struct psi_group *psi;
> + struct psi_trigger_params params;
> + int err;
> +
> + err = psi_trigger_parse(¶ms, buf);
> + if (err)
> + return err;
>
> cgrp = cgroup_kn_lock_live(of->kn, false);
> if (!cgrp)
> @@ -3887,7 +3893,13 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
> }
>
> psi = cgroup_psi(cgrp);
> - new = psi_trigger_create(psi, buf, res, of->file, of);
> +
> + params.type = PSI_CGROUP;
> + params.res = res;
> + params.privileged = psi_file_privileged(of->file);
> + params.of = of;
> +
> + new = psi_trigger_create(psi, ¶ms);
> if (IS_ERR(new)) {
> cgroup_put(cgrp);
> return PTR_ERR(new);
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index ad04a5c3162a..e1d8eaeeff17 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -489,7 +489,7 @@ static void update_triggers(struct psi_group *group, u64 now,
>
> /* Generate an event */
> if (cmpxchg(&t->event, 0, 1) == 0) {
> - if (t->of)
> + if (t->type == PSI_CGROUP)
> kernfs_notify(t->of->kn);
> else
> wake_up_interruptible(&t->event_wait);
> @@ -1281,74 +1281,87 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
> return 0;
> }
>
> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
> - enum psi_res res, struct file *file,
> - struct kernfs_open_file *of)
> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf)
> {
> - struct psi_trigger *t;
> - enum psi_states state;
> - u32 threshold_us;
> - bool privileged;
> - u32 window_us;
> + u32 threshold_us, window_us;
>
> if (static_branch_likely(&psi_disabled))
> - return ERR_PTR(-EOPNOTSUPP);
> -
> - /*
> - * Checking the privilege here on file->f_cred implies that a privileged user
> - * could open the file and delegate the write to an unprivileged one.
> - */
> - privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
> + return -EOPNOTSUPP;
>
> if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
> - state = PSI_IO_SOME + res * 2;
> + params->full = false;
> else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
> - state = PSI_IO_FULL + res * 2;
> + params->full = true;
> else
> - return ERR_PTR(-EINVAL);
> + return -EINVAL;
> +
> + params->threshold_us = threshold_us;
> + params->window_us = window_us;
> + return 0;
> +}
> +
> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
> + const struct psi_trigger_params *params)
> +{
> + struct psi_trigger *t;
> + enum psi_states state;
> +
> + if (static_branch_likely(&psi_disabled))
> + return ERR_PTR(-EOPNOTSUPP);
> +
> + state = params->full ? PSI_IO_FULL : PSI_IO_SOME;
> + state += params->res * 2;
>
> #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> - if (res == PSI_IRQ && --state != PSI_IRQ_FULL)
> + if (params->res == PSI_IRQ && --state != PSI_IRQ_FULL)
> return ERR_PTR(-EINVAL);
> #endif
>
> if (state >= PSI_NONIDLE)
> return ERR_PTR(-EINVAL);
>
> - if (window_us == 0 || window_us > WINDOW_MAX_US)
> + if (params->window_us == 0 || params->window_us > WINDOW_MAX_US)
> return ERR_PTR(-EINVAL);
>
> /*
> * Unprivileged users can only use 2s windows so that averages aggregation
> * work is used, and no RT threads need to be spawned.
> */
> - if (!privileged && window_us % 2000000)
> + if (!params->privileged && params->window_us % 2000000)
> return ERR_PTR(-EINVAL);
>
> /* Check threshold */
> - if (threshold_us == 0 || threshold_us > window_us)
> + if (params->threshold_us == 0 || params->threshold_us > params->window_us)
> return ERR_PTR(-EINVAL);
>
> t = kmalloc(sizeof(*t), GFP_KERNEL);
> if (!t)
> return ERR_PTR(-ENOMEM);
>
> + t->type = params->type;
> t->group = group;
> t->state = state;
> - t->threshold = threshold_us * NSEC_PER_USEC;
> - t->win.size = window_us * NSEC_PER_USEC;
> + t->threshold = params->threshold_us * NSEC_PER_USEC;
> + t->win.size = params->window_us * NSEC_PER_USEC;
> window_reset(&t->win, sched_clock(),
> group->total[PSI_POLL][t->state], 0);
>
> t->event = 0;
> t->last_event_time = 0;
> - t->of = of;
> - if (!of)
> +
> + switch (params->type) {
> + case PSI_SYSTEM:
> init_waitqueue_head(&t->event_wait);
I think t->of will be left uninitialized here. Let's set it to NULL please.
> + break;
> + case PSI_CGROUP:
> + t->of = params->of;
> + break;
> + }
> +
> t->pending_event = false;
> - t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
> + t->aggregator = params->privileged ? PSI_POLL : PSI_AVGS;
>
> - if (privileged) {
> + if (params->privileged) {
> mutex_lock(&group->rtpoll_trigger_lock);
>
> if (!rcu_access_pointer(group->rtpoll_task)) {
> @@ -1401,7 +1414,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
> * being accessed later. Can happen if cgroup is deleted from under a
> * polling process.
> */
> - if (t->of)
> + if (t->type == PSI_CGROUP)
> kernfs_notify(t->of->kn);
> else
> wake_up_interruptible(&t->event_wait);
> @@ -1481,7 +1494,7 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
> if (!t)
> return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
>
> - if (t->of)
> + if (t->type == PSI_CGROUP)
> kernfs_generic_poll(t->of, wait);
> else
> poll_wait(file, &t->event_wait, wait);
> @@ -1530,6 +1543,8 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
> size_t buf_size;
> struct seq_file *seq;
> struct psi_trigger *new;
> + struct psi_trigger_params params;
> + int err;
>
> if (static_branch_likely(&psi_disabled))
> return -EOPNOTSUPP;
> @@ -1543,6 +1558,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
>
> buf[buf_size - 1] = '\0';
>
> + err = psi_trigger_parse(¶ms, buf);
> + if (err)
> + return err;
> +
> seq = file->private_data;
>
> /* Take seq->lock to protect seq->private from concurrent writes */
> @@ -1554,7 +1573,11 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
> return -EBUSY;
> }
>
> - new = psi_trigger_create(&psi_system, buf, res, file, NULL);
> + params.type = PSI_SYSTEM;
> + params.res = res;
> + params.privileged = psi_file_privileged(file);
> +
> + new = psi_trigger_create(&psi_system, ¶ms);
> if (IS_ERR(new)) {
> mutex_unlock(&seq->lock);
> return PTR_ERR(new);
> --
> 2.50.1
>
Suren Baghdasaryan <surenb@google.com> writes:
> On Mon, Aug 18, 2025 at 10:02 AM Roman Gushchin
> <roman.gushchin@linux.dev> wrote:
>>
>> Currently psi_trigger_create() does a lot of things:
>> parses the user text input, allocates and initializes
>> the psi_trigger structure and turns on the trigger.
>> It does it slightly different for two existing types
>> of psi_triggers: system-wide and cgroup-wide.
>>
>> In order to support a new type of psi triggers, which
>> will be owned by a bpf program and won't have a user's
>> text description, let's refactor psi_trigger_create().
>>
>> 1. Introduce psi_trigger_type enum:
>> currently PSI_SYSTEM and PSI_CGROUP are valid values.
>> 2. Introduce psi_trigger_params structure to avoid passing
>> a large number of parameters to psi_trigger_create().
>> 3. Move out the user's input parsing into the new
>> psi_trigger_parse() helper.
>> 4. Move out the capabilities check into the new
>> psi_file_privileged() helper.
>> 5. Stop relying on t->of for detecting trigger type.
>
> It's worth noting that this is a pure core refactoring without any
> functional change (hopefully :))
Added this to the commit log.
>
>>
>> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
>> ---
>> include/linux/psi.h | 15 +++++--
>> include/linux/psi_types.h | 33 ++++++++++++++-
>> kernel/cgroup/cgroup.c | 14 ++++++-
>> kernel/sched/psi.c | 87 +++++++++++++++++++++++++--------------
>> 4 files changed, 112 insertions(+), 37 deletions(-)
>>
>> diff --git a/include/linux/psi.h b/include/linux/psi.h
>> index e0745873e3f2..8178e998d94b 100644
>> --- a/include/linux/psi.h
>> +++ b/include/linux/psi.h
>> @@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
>> void psi_memstall_leave(unsigned long *flags);
>>
>> int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
>> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
>> - enum psi_res res, struct file *file,
>> - struct kernfs_open_file *of);
>> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
>> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
>> + const struct psi_trigger_params *param);
>> void psi_trigger_destroy(struct psi_trigger *t);
>>
>> __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
>> poll_table *wait);
>>
>> +static inline bool psi_file_privileged(struct file *file)
>> +{
>> + /*
>> + * Checking the privilege here on file->f_cred implies that a privileged user
>> + * could open the file and delegate the write to an unprivileged one.
>> + */
>> + return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
>> +}
>> +
>> #ifdef CONFIG_CGROUPS
>> static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
>> {
>> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
>> index f1fd3a8044e0..cea54121d9b9 100644
>> --- a/include/linux/psi_types.h
>> +++ b/include/linux/psi_types.h
>> @@ -121,7 +121,38 @@ struct psi_window {
>> u64 prev_growth;
>> };
>>
>> +enum psi_trigger_type {
>> + PSI_SYSTEM,
>> + PSI_CGROUP,
>> +};
>> +
>> +struct psi_trigger_params {
>> + /* Trigger type */
>> + enum psi_trigger_type type;
>> +
>> + /* Resources that workloads could be stalled on */
>
> I would describe this as "Resource to be monitored"
Fixed.
>
>> + enum psi_res res;
>> +
>> + /* True if all threads should be stalled to trigger */
>> + bool full;
>> +
>> + /* Threshold in us */
>> + u32 threshold_us;
>> +
>> + /* Window in us */
>> + u32 window_us;
>> +
>> + /* Privileged triggers are treated differently */
>> + bool privileged;
>> +
>> + /* Link to kernfs open file, only for PSI_CGROUP */
>> + struct kernfs_open_file *of;
...
>> t->event = 0;
>> t->last_event_time = 0;
>> - t->of = of;
>> - if (!of)
>> +
>> + switch (params->type) {
>> + case PSI_SYSTEM:
>> init_waitqueue_head(&t->event_wait);
>
> I think t->of will be left uninitialized here. Let's set it to NULL
> please.
Ack.
Thanks!
© 2016 - 2026 Red Hat, Inc.