[PATCH v1 11/14] sched: psi: refactor psi_trigger_create()

Roman Gushchin posted 14 patches 1 month, 2 weeks ago
[PATCH v1 11/14] sched: psi: refactor psi_trigger_create()
Posted by Roman Gushchin 1 month, 2 weeks ago
Currently psi_trigger_create() does a lot of things:
parses the user text input, allocates and initializes
the psi_trigger structure and turns on the trigger.
It does it slightly different for two existing types
of psi_triggers: system-wide and cgroup-wide.

In order to support a new type of psi triggers, which
will be owned by a bpf program and won't have a user's
text description, let's refactor psi_trigger_create().

1. Introduce psi_trigger_type enum:
   currently PSI_SYSTEM and PSI_CGROUP are valid values.
2. Introduce psi_trigger_params structure to avoid passing
   a large number of parameters to psi_trigger_create().
3. Move out the user's input parsing into the new
   psi_trigger_parse() helper.
4. Move out the capabilities check into the new
   psi_file_privileged() helper.
5. Stop relying on t->of for detecting trigger type.

Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
---
 include/linux/psi.h       | 15 +++++--
 include/linux/psi_types.h | 33 ++++++++++++++-
 kernel/cgroup/cgroup.c    | 14 ++++++-
 kernel/sched/psi.c        | 87 +++++++++++++++++++++++++--------------
 4 files changed, 112 insertions(+), 37 deletions(-)

diff --git a/include/linux/psi.h b/include/linux/psi.h
index e0745873e3f2..8178e998d94b 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
-				       enum psi_res res, struct file *file,
-				       struct kernfs_open_file *of);
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+				const struct psi_trigger_params *param);
 void psi_trigger_destroy(struct psi_trigger *t);
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
 			poll_table *wait);
 
+static inline bool psi_file_privileged(struct file *file)
+{
+	/*
+	 * Checking the privilege here on file->f_cred implies that a privileged user
+	 * could open the file and delegate the write to an unprivileged one.
+	 */
+	return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+}
+
 #ifdef CONFIG_CGROUPS
 static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
 {
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index f1fd3a8044e0..cea54121d9b9 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -121,7 +121,38 @@ struct psi_window {
 	u64 prev_growth;
 };
 
+enum psi_trigger_type {
+	PSI_SYSTEM,
+	PSI_CGROUP,
+};
+
+struct psi_trigger_params {
+	/* Trigger type */
+	enum psi_trigger_type type;
+
+	/* Resources that workloads could be stalled on */
+	enum psi_res res;
+
+	/* True if all threads should be stalled to trigger */
+	bool full;
+
+	/* Threshold in us */
+	u32 threshold_us;
+
+	/* Window in us */
+	u32 window_us;
+
+	/* Privileged triggers are treated differently */
+	bool privileged;
+
+	/* Link to kernfs open file, only for PSI_CGROUP */
+	struct kernfs_open_file *of;
+};
+
 struct psi_trigger {
+	/* Trigger type */
+	enum psi_trigger_type type;
+
 	/* PSI state being monitored by the trigger */
 	enum psi_states state;
 
@@ -137,7 +168,7 @@ struct psi_trigger {
 	/* Wait queue for polling */
 	wait_queue_head_t event_wait;
 
-	/* Kernfs file for cgroup triggers */
+	/* Kernfs file for PSI_CGROUP triggers */
 	struct kernfs_open_file *of;
 
 	/* Pending event flag */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index a723b7dc6e4e..9cd3c3a52c21 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3872,6 +3872,12 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	struct psi_trigger *new;
 	struct cgroup *cgrp;
 	struct psi_group *psi;
+	struct psi_trigger_params params;
+	int err;
+
+	err = psi_trigger_parse(&params, buf);
+	if (err)
+		return err;
 
 	cgrp = cgroup_kn_lock_live(of->kn, false);
 	if (!cgrp)
@@ -3887,7 +3893,13 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	}
 
 	psi = cgroup_psi(cgrp);
-	new = psi_trigger_create(psi, buf, res, of->file, of);
+
+	params.type = PSI_CGROUP;
+	params.res = res;
+	params.privileged = psi_file_privileged(of->file);
+	params.of = of;
+
+	new = psi_trigger_create(psi, &params);
 	if (IS_ERR(new)) {
 		cgroup_put(cgrp);
 		return PTR_ERR(new);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index ad04a5c3162a..e1d8eaeeff17 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -489,7 +489,7 @@ static void update_triggers(struct psi_group *group, u64 now,
 
 		/* Generate an event */
 		if (cmpxchg(&t->event, 0, 1) == 0) {
-			if (t->of)
+			if (t->type == PSI_CGROUP)
 				kernfs_notify(t->of->kn);
 			else
 				wake_up_interruptible(&t->event_wait);
@@ -1281,74 +1281,87 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 	return 0;
 }
 
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
-				       enum psi_res res, struct file *file,
-				       struct kernfs_open_file *of)
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf)
 {
-	struct psi_trigger *t;
-	enum psi_states state;
-	u32 threshold_us;
-	bool privileged;
-	u32 window_us;
+	u32 threshold_us, window_us;
 
 	if (static_branch_likely(&psi_disabled))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	/*
-	 * Checking the privilege here on file->f_cred implies that a privileged user
-	 * could open the file and delegate the write to an unprivileged one.
-	 */
-	privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+		return -EOPNOTSUPP;
 
 	if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
-		state = PSI_IO_SOME + res * 2;
+		params->full = false;
 	else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
-		state = PSI_IO_FULL + res * 2;
+		params->full = true;
 	else
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
+
+	params->threshold_us = threshold_us;
+	params->window_us = window_us;
+	return 0;
+}
+
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+				       const struct psi_trigger_params *params)
+{
+	struct psi_trigger *t;
+	enum psi_states state;
+
+	if (static_branch_likely(&psi_disabled))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	state = params->full ? PSI_IO_FULL : PSI_IO_SOME;
+	state += params->res * 2;
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	if (res == PSI_IRQ && --state != PSI_IRQ_FULL)
+	if (params->res == PSI_IRQ && --state != PSI_IRQ_FULL)
 		return ERR_PTR(-EINVAL);
 #endif
 
 	if (state >= PSI_NONIDLE)
 		return ERR_PTR(-EINVAL);
 
-	if (window_us == 0 || window_us > WINDOW_MAX_US)
+	if (params->window_us == 0 || params->window_us > WINDOW_MAX_US)
 		return ERR_PTR(-EINVAL);
 
 	/*
 	 * Unprivileged users can only use 2s windows so that averages aggregation
 	 * work is used, and no RT threads need to be spawned.
 	 */
-	if (!privileged && window_us % 2000000)
+	if (!params->privileged && params->window_us % 2000000)
 		return ERR_PTR(-EINVAL);
 
 	/* Check threshold */
-	if (threshold_us == 0 || threshold_us > window_us)
+	if (params->threshold_us == 0 || params->threshold_us > params->window_us)
 		return ERR_PTR(-EINVAL);
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return ERR_PTR(-ENOMEM);
 
+	t->type = params->type;
 	t->group = group;
 	t->state = state;
-	t->threshold = threshold_us * NSEC_PER_USEC;
-	t->win.size = window_us * NSEC_PER_USEC;
+	t->threshold = params->threshold_us * NSEC_PER_USEC;
+	t->win.size = params->window_us * NSEC_PER_USEC;
 	window_reset(&t->win, sched_clock(),
 			group->total[PSI_POLL][t->state], 0);
 
 	t->event = 0;
 	t->last_event_time = 0;
-	t->of = of;
-	if (!of)
+
+	switch (params->type) {
+	case PSI_SYSTEM:
 		init_waitqueue_head(&t->event_wait);
+		break;
+	case PSI_CGROUP:
+		t->of = params->of;
+		break;
+	}
+
 	t->pending_event = false;
-	t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+	t->aggregator = params->privileged ? PSI_POLL : PSI_AVGS;
 
-	if (privileged) {
+	if (params->privileged) {
 		mutex_lock(&group->rtpoll_trigger_lock);
 
 		if (!rcu_access_pointer(group->rtpoll_task)) {
@@ -1401,7 +1414,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
 	 * being accessed later. Can happen if cgroup is deleted from under a
 	 * polling process.
 	 */
-	if (t->of)
+	if (t->type == PSI_CGROUP)
 		kernfs_notify(t->of->kn);
 	else
 		wake_up_interruptible(&t->event_wait);
@@ -1481,7 +1494,7 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
 	if (!t)
 		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 
-	if (t->of)
+	if (t->type == PSI_CGROUP)
 		kernfs_generic_poll(t->of, wait);
 	else
 		poll_wait(file, &t->event_wait, wait);
@@ -1530,6 +1543,8 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 	size_t buf_size;
 	struct seq_file *seq;
 	struct psi_trigger *new;
+	struct psi_trigger_params params;
+	int err;
 
 	if (static_branch_likely(&psi_disabled))
 		return -EOPNOTSUPP;
@@ -1543,6 +1558,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 
 	buf[buf_size - 1] = '\0';
 
+	err = psi_trigger_parse(&params, buf);
+	if (err)
+		return err;
+
 	seq = file->private_data;
 
 	/* Take seq->lock to protect seq->private from concurrent writes */
@@ -1554,7 +1573,11 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 		return -EBUSY;
 	}
 
-	new = psi_trigger_create(&psi_system, buf, res, file, NULL);
+	params.type = PSI_SYSTEM;
+	params.res = res;
+	params.privileged = psi_file_privileged(file);
+
+	new = psi_trigger_create(&psi_system, &params);
 	if (IS_ERR(new)) {
 		mutex_unlock(&seq->lock);
 		return PTR_ERR(new);
-- 
2.50.1
Re: [PATCH v1 11/14] sched: psi: refactor psi_trigger_create()
Posted by Suren Baghdasaryan 1 month, 2 weeks ago
On Mon, Aug 18, 2025 at 10:02 AM Roman Gushchin
<roman.gushchin@linux.dev> wrote:
>
> Currently psi_trigger_create() does a lot of things:
> parses the user text input, allocates and initializes
> the psi_trigger structure and turns on the trigger.
> It does it slightly different for two existing types
> of psi_triggers: system-wide and cgroup-wide.
>
> In order to support a new type of psi triggers, which
> will be owned by a bpf program and won't have a user's
> text description, let's refactor psi_trigger_create().
>
> 1. Introduce psi_trigger_type enum:
>    currently PSI_SYSTEM and PSI_CGROUP are valid values.
> 2. Introduce psi_trigger_params structure to avoid passing
>    a large number of parameters to psi_trigger_create().
> 3. Move out the user's input parsing into the new
>    psi_trigger_parse() helper.
> 4. Move out the capabilities check into the new
>    psi_file_privileged() helper.
> 5. Stop relying on t->of for detecting trigger type.

It's worth noting that this is a pure core refactoring without any
functional change (hopefully :))

>
> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
> ---
>  include/linux/psi.h       | 15 +++++--
>  include/linux/psi_types.h | 33 ++++++++++++++-
>  kernel/cgroup/cgroup.c    | 14 ++++++-
>  kernel/sched/psi.c        | 87 +++++++++++++++++++++++++--------------
>  4 files changed, 112 insertions(+), 37 deletions(-)
>
> diff --git a/include/linux/psi.h b/include/linux/psi.h
> index e0745873e3f2..8178e998d94b 100644
> --- a/include/linux/psi.h
> +++ b/include/linux/psi.h
> @@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
>  void psi_memstall_leave(unsigned long *flags);
>
>  int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
> -                                      enum psi_res res, struct file *file,
> -                                      struct kernfs_open_file *of);
> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
> +                               const struct psi_trigger_params *param);
>  void psi_trigger_destroy(struct psi_trigger *t);
>
>  __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
>                         poll_table *wait);
>
> +static inline bool psi_file_privileged(struct file *file)
> +{
> +       /*
> +        * Checking the privilege here on file->f_cred implies that a privileged user
> +        * could open the file and delegate the write to an unprivileged one.
> +        */
> +       return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
> +}
> +
>  #ifdef CONFIG_CGROUPS
>  static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
>  {
> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
> index f1fd3a8044e0..cea54121d9b9 100644
> --- a/include/linux/psi_types.h
> +++ b/include/linux/psi_types.h
> @@ -121,7 +121,38 @@ struct psi_window {
>         u64 prev_growth;
>  };
>
> +enum psi_trigger_type {
> +       PSI_SYSTEM,
> +       PSI_CGROUP,
> +};
> +
> +struct psi_trigger_params {
> +       /* Trigger type */
> +       enum psi_trigger_type type;
> +
> +       /* Resources that workloads could be stalled on */

I would describe this as "Resource to be monitored"

> +       enum psi_res res;
> +
> +       /* True if all threads should be stalled to trigger */
> +       bool full;
> +
> +       /* Threshold in us */
> +       u32 threshold_us;
> +
> +       /* Window in us */
> +       u32 window_us;
> +
> +       /* Privileged triggers are treated differently */
> +       bool privileged;
> +
> +       /* Link to kernfs open file, only for PSI_CGROUP */
> +       struct kernfs_open_file *of;
> +};
> +
>  struct psi_trigger {
> +       /* Trigger type */
> +       enum psi_trigger_type type;
> +
>         /* PSI state being monitored by the trigger */
>         enum psi_states state;
>
> @@ -137,7 +168,7 @@ struct psi_trigger {
>         /* Wait queue for polling */
>         wait_queue_head_t event_wait;
>
> -       /* Kernfs file for cgroup triggers */
> +       /* Kernfs file for PSI_CGROUP triggers */
>         struct kernfs_open_file *of;
>
>         /* Pending event flag */
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index a723b7dc6e4e..9cd3c3a52c21 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -3872,6 +3872,12 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
>         struct psi_trigger *new;
>         struct cgroup *cgrp;
>         struct psi_group *psi;
> +       struct psi_trigger_params params;
> +       int err;
> +
> +       err = psi_trigger_parse(&params, buf);
> +       if (err)
> +               return err;
>
>         cgrp = cgroup_kn_lock_live(of->kn, false);
>         if (!cgrp)
> @@ -3887,7 +3893,13 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
>         }
>
>         psi = cgroup_psi(cgrp);
> -       new = psi_trigger_create(psi, buf, res, of->file, of);
> +
> +       params.type = PSI_CGROUP;
> +       params.res = res;
> +       params.privileged = psi_file_privileged(of->file);
> +       params.of = of;
> +
> +       new = psi_trigger_create(psi, &params);
>         if (IS_ERR(new)) {
>                 cgroup_put(cgrp);
>                 return PTR_ERR(new);
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index ad04a5c3162a..e1d8eaeeff17 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -489,7 +489,7 @@ static void update_triggers(struct psi_group *group, u64 now,
>
>                 /* Generate an event */
>                 if (cmpxchg(&t->event, 0, 1) == 0) {
> -                       if (t->of)
> +                       if (t->type == PSI_CGROUP)
>                                 kernfs_notify(t->of->kn);
>                         else
>                                 wake_up_interruptible(&t->event_wait);
> @@ -1281,74 +1281,87 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
>         return 0;
>  }
>
> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
> -                                      enum psi_res res, struct file *file,
> -                                      struct kernfs_open_file *of)
> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf)
>  {
> -       struct psi_trigger *t;
> -       enum psi_states state;
> -       u32 threshold_us;
> -       bool privileged;
> -       u32 window_us;
> +       u32 threshold_us, window_us;
>
>         if (static_branch_likely(&psi_disabled))
> -               return ERR_PTR(-EOPNOTSUPP);
> -
> -       /*
> -        * Checking the privilege here on file->f_cred implies that a privileged user
> -        * could open the file and delegate the write to an unprivileged one.
> -        */
> -       privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
> +               return -EOPNOTSUPP;
>
>         if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
> -               state = PSI_IO_SOME + res * 2;
> +               params->full = false;
>         else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
> -               state = PSI_IO_FULL + res * 2;
> +               params->full = true;
>         else
> -               return ERR_PTR(-EINVAL);
> +               return -EINVAL;
> +
> +       params->threshold_us = threshold_us;
> +       params->window_us = window_us;
> +       return 0;
> +}
> +
> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
> +                                      const struct psi_trigger_params *params)
> +{
> +       struct psi_trigger *t;
> +       enum psi_states state;
> +
> +       if (static_branch_likely(&psi_disabled))
> +               return ERR_PTR(-EOPNOTSUPP);
> +
> +       state = params->full ? PSI_IO_FULL : PSI_IO_SOME;
> +       state += params->res * 2;
>
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -       if (res == PSI_IRQ && --state != PSI_IRQ_FULL)
> +       if (params->res == PSI_IRQ && --state != PSI_IRQ_FULL)
>                 return ERR_PTR(-EINVAL);
>  #endif
>
>         if (state >= PSI_NONIDLE)
>                 return ERR_PTR(-EINVAL);
>
> -       if (window_us == 0 || window_us > WINDOW_MAX_US)
> +       if (params->window_us == 0 || params->window_us > WINDOW_MAX_US)
>                 return ERR_PTR(-EINVAL);
>
>         /*
>          * Unprivileged users can only use 2s windows so that averages aggregation
>          * work is used, and no RT threads need to be spawned.
>          */
> -       if (!privileged && window_us % 2000000)
> +       if (!params->privileged && params->window_us % 2000000)
>                 return ERR_PTR(-EINVAL);
>
>         /* Check threshold */
> -       if (threshold_us == 0 || threshold_us > window_us)
> +       if (params->threshold_us == 0 || params->threshold_us > params->window_us)
>                 return ERR_PTR(-EINVAL);
>
>         t = kmalloc(sizeof(*t), GFP_KERNEL);
>         if (!t)
>                 return ERR_PTR(-ENOMEM);
>
> +       t->type = params->type;
>         t->group = group;
>         t->state = state;
> -       t->threshold = threshold_us * NSEC_PER_USEC;
> -       t->win.size = window_us * NSEC_PER_USEC;
> +       t->threshold = params->threshold_us * NSEC_PER_USEC;
> +       t->win.size = params->window_us * NSEC_PER_USEC;
>         window_reset(&t->win, sched_clock(),
>                         group->total[PSI_POLL][t->state], 0);
>
>         t->event = 0;
>         t->last_event_time = 0;
> -       t->of = of;
> -       if (!of)
> +
> +       switch (params->type) {
> +       case PSI_SYSTEM:
>                 init_waitqueue_head(&t->event_wait);

I think t->of will be left uninitialized here. Let's set it to NULL please.


> +               break;
> +       case PSI_CGROUP:
> +               t->of = params->of;
> +               break;
> +       }
> +
>         t->pending_event = false;
> -       t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
> +       t->aggregator = params->privileged ? PSI_POLL : PSI_AVGS;
>
> -       if (privileged) {
> +       if (params->privileged) {
>                 mutex_lock(&group->rtpoll_trigger_lock);
>
>                 if (!rcu_access_pointer(group->rtpoll_task)) {
> @@ -1401,7 +1414,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
>          * being accessed later. Can happen if cgroup is deleted from under a
>          * polling process.
>          */
> -       if (t->of)
> +       if (t->type == PSI_CGROUP)
>                 kernfs_notify(t->of->kn);
>         else
>                 wake_up_interruptible(&t->event_wait);
> @@ -1481,7 +1494,7 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
>         if (!t)
>                 return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
>
> -       if (t->of)
> +       if (t->type == PSI_CGROUP)
>                 kernfs_generic_poll(t->of, wait);
>         else
>                 poll_wait(file, &t->event_wait, wait);
> @@ -1530,6 +1543,8 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
>         size_t buf_size;
>         struct seq_file *seq;
>         struct psi_trigger *new;
> +       struct psi_trigger_params params;
> +       int err;
>
>         if (static_branch_likely(&psi_disabled))
>                 return -EOPNOTSUPP;
> @@ -1543,6 +1558,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
>
>         buf[buf_size - 1] = '\0';
>
> +       err = psi_trigger_parse(&params, buf);
> +       if (err)
> +               return err;
> +
>         seq = file->private_data;
>
>         /* Take seq->lock to protect seq->private from concurrent writes */
> @@ -1554,7 +1573,11 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
>                 return -EBUSY;
>         }
>
> -       new = psi_trigger_create(&psi_system, buf, res, file, NULL);
> +       params.type = PSI_SYSTEM;
> +       params.res = res;
> +       params.privileged = psi_file_privileged(file);
> +
> +       new = psi_trigger_create(&psi_system, &params);
>         if (IS_ERR(new)) {
>                 mutex_unlock(&seq->lock);
>                 return PTR_ERR(new);
> --
> 2.50.1
>
Re: [PATCH v1 11/14] sched: psi: refactor psi_trigger_create()
Posted by Roman Gushchin 1 month, 2 weeks ago
Suren Baghdasaryan <surenb@google.com> writes:

> On Mon, Aug 18, 2025 at 10:02 AM Roman Gushchin
> <roman.gushchin@linux.dev> wrote:
>>
>> Currently psi_trigger_create() does a lot of things:
>> parses the user text input, allocates and initializes
>> the psi_trigger structure and turns on the trigger.
>> It does it slightly different for two existing types
>> of psi_triggers: system-wide and cgroup-wide.
>>
>> In order to support a new type of psi triggers, which
>> will be owned by a bpf program and won't have a user's
>> text description, let's refactor psi_trigger_create().
>>
>> 1. Introduce psi_trigger_type enum:
>>    currently PSI_SYSTEM and PSI_CGROUP are valid values.
>> 2. Introduce psi_trigger_params structure to avoid passing
>>    a large number of parameters to psi_trigger_create().
>> 3. Move out the user's input parsing into the new
>>    psi_trigger_parse() helper.
>> 4. Move out the capabilities check into the new
>>    psi_file_privileged() helper.
>> 5. Stop relying on t->of for detecting trigger type.
>
> It's worth noting that this is a pure core refactoring without any
> functional change (hopefully :))

Added this to the commit log.

>
>>
>> Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
>> ---
>>  include/linux/psi.h       | 15 +++++--
>>  include/linux/psi_types.h | 33 ++++++++++++++-
>>  kernel/cgroup/cgroup.c    | 14 ++++++-
>>  kernel/sched/psi.c        | 87 +++++++++++++++++++++++++--------------
>>  4 files changed, 112 insertions(+), 37 deletions(-)
>>
>> diff --git a/include/linux/psi.h b/include/linux/psi.h
>> index e0745873e3f2..8178e998d94b 100644
>> --- a/include/linux/psi.h
>> +++ b/include/linux/psi.h
>> @@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
>>  void psi_memstall_leave(unsigned long *flags);
>>
>>  int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
>> -struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
>> -                                      enum psi_res res, struct file *file,
>> -                                      struct kernfs_open_file *of);
>> +int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
>> +struct psi_trigger *psi_trigger_create(struct psi_group *group,
>> +                               const struct psi_trigger_params *param);
>>  void psi_trigger_destroy(struct psi_trigger *t);
>>
>>  __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
>>                         poll_table *wait);
>>
>> +static inline bool psi_file_privileged(struct file *file)
>> +{
>> +       /*
>> +        * Checking the privilege here on file->f_cred implies that a privileged user
>> +        * could open the file and delegate the write to an unprivileged one.
>> +        */
>> +       return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
>> +}
>> +
>>  #ifdef CONFIG_CGROUPS
>>  static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
>>  {
>> diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
>> index f1fd3a8044e0..cea54121d9b9 100644
>> --- a/include/linux/psi_types.h
>> +++ b/include/linux/psi_types.h
>> @@ -121,7 +121,38 @@ struct psi_window {
>>         u64 prev_growth;
>>  };
>>
>> +enum psi_trigger_type {
>> +       PSI_SYSTEM,
>> +       PSI_CGROUP,
>> +};
>> +
>> +struct psi_trigger_params {
>> +       /* Trigger type */
>> +       enum psi_trigger_type type;
>> +
>> +       /* Resources that workloads could be stalled on */
>
> I would describe this as "Resource to be monitored"

Fixed.

>
>> +       enum psi_res res;
>> +
>> +       /* True if all threads should be stalled to trigger */
>> +       bool full;
>> +
>> +       /* Threshold in us */
>> +       u32 threshold_us;
>> +
>> +       /* Window in us */
>> +       u32 window_us;
>> +
>> +       /* Privileged triggers are treated differently */
>> +       bool privileged;
>> +
>> +       /* Link to kernfs open file, only for PSI_CGROUP */
>> +       struct kernfs_open_file *of;
...
>>         t->event = 0;
>>         t->last_event_time = 0;
>> -       t->of = of;
>> -       if (!of)
>> +
>> +       switch (params->type) {
>> +       case PSI_SYSTEM:
>>                 init_waitqueue_head(&t->event_wait);
>
> I think t->of will be left uninitialized here. Let's set it to NULL
> please.

Ack.

Thanks!