This commit restores the previously removed functions kthread_wakeup and
kthread_stop, and introduces a new ops structure to handle worker wakeup,
stop, and creation. The function vhost_worker_create initializes these
ops pointers based on the inherit_owner value.
Signed-off-by: Cindy Lu <lulu@redhat.com>
---
drivers/vhost/vhost.c | 115 +++++++++++++++++++++++++++++++++++-------
drivers/vhost/vhost.h | 12 +++++
2 files changed, 110 insertions(+), 17 deletions(-)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index adbb957c8b5f..d8c0ea118bb1 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -243,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
* test_and_set_bit() implies a memory barrier.
*/
llist_add(&work->node, &worker->work_list);
- vhost_task_wake(worker->vtsk);
+ worker->ops->wakeup(worker);
}
}
@@ -697,7 +697,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
WARN_ON(!llist_empty(&worker->work_list));
xa_erase(&dev->worker_xa, worker->id);
- vhost_task_stop(worker->vtsk);
+ worker->ops->stop(worker);
kfree(worker);
}
@@ -720,42 +720,123 @@ static void vhost_workers_free(struct vhost_dev *dev)
xa_destroy(&dev->worker_xa);
}
+static void vhost_task_wakeup_fn(struct vhost_worker *worker)
+{
+ return vhost_task_wake(worker->vtsk);
+}
+
+static void vhost_kthread_wakeup_fn(struct vhost_worker *worker)
+{
+ wake_up_process(worker->kthread_task);
+}
+
+static void vhost_task_stop_fn(struct vhost_worker *worker)
+{
+ return vhost_task_stop(worker->vtsk);
+}
+
+static void vhost_kthread_stop_fn(struct vhost_worker *worker)
+{
+ kthread_stop(worker->kthread_task);
+}
+
+static int vhost_task_worker_create_fn(struct vhost_worker *worker,
+ struct vhost_dev *dev, const char *name)
+{
+ struct vhost_task *vtsk;
+ u32 id;
+ int ret;
+
+ vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
+ worker, name);
+ if (!vtsk)
+ return -ENOMEM;
+
+ worker->vtsk = vtsk;
+ vhost_task_start(vtsk);
+ ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+ if (ret < 0) {
+ vhost_task_stop_fn(worker);
+ return ret;
+ }
+ worker->id = id;
+ return 0;
+}
+
+static int kthread_worker_create_fn(struct vhost_worker *worker,
+ struct vhost_dev *dev, const char *name)
+{
+ struct task_struct *task;
+ u32 id;
+ int ret;
+
+ task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+
+ worker->kthread_task = task;
+ wake_up_process(task);
+ ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+ if (ret < 0)
+ goto stop_worker;
+
+ ret = vhost_attach_task_to_cgroups(worker);
+ if (ret)
+ goto stop_worker;
+
+ worker->id = id;
+ return 0;
+
+stop_worker:
+ vhost_kthread_stop_fn(worker);
+ return ret;
+}
+
+static const struct vhost_worker_ops vhost_task_ops = {
+ .create = vhost_task_worker_create_fn,
+ .stop = vhost_task_stop_fn,
+ .wakeup = vhost_task_wakeup_fn,
+};
+
+static const struct vhost_worker_ops kthread_ops = {
+ .create = kthread_worker_create_fn,
+ .stop = vhost_kthread_stop_fn,
+ .wakeup = vhost_kthread_wakeup_fn,
+};
+
static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
{
struct vhost_worker *worker;
- struct vhost_task *vtsk;
char name[TASK_COMM_LEN];
int ret;
- u32 id;
+ const struct vhost_worker_ops *ops =
+ dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
if (!worker)
return NULL;
worker->dev = dev;
+ worker->ops = ops;
snprintf(name, sizeof(name), "vhost-%d", current->pid);
- vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
- worker, name);
- if (!vtsk)
- goto free_worker;
-
mutex_init(&worker->mutex);
init_llist_head(&worker->work_list);
worker->kcov_handle = kcov_common_handle();
- worker->vtsk = vtsk;
-
- vhost_task_start(vtsk);
+ /*
+ * If inherit_owner is true we use vhost_tasks to create
+ * the worker so all settings/limits like cgroups, NPROC,
+ * scheduler, etc are inherited from the owner. If false,
+ * we use kthreads and only attach to the same cgroups
+ * as the owner for compat with older kernels.
+ */
- ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
+ ret = ops->create(worker, dev, name);
if (ret < 0)
- goto stop_worker;
- worker->id = id;
+ goto free_worker;
return worker;
-stop_worker:
- vhost_task_stop(vtsk);
free_worker:
kfree(worker);
return NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index c650c4506c70..029c203147be 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -26,7 +26,18 @@ struct vhost_work {
unsigned long flags;
};
+struct vhost_worker;
+struct vhost_dev;
+
+struct vhost_worker_ops {
+ int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
+ const char *name);
+ void (*stop)(struct vhost_worker *worker);
+ void (*wakeup)(struct vhost_worker *worker);
+};
+
struct vhost_worker {
+ struct task_struct *kthread_task;
struct vhost_task *vtsk;
struct vhost_dev *dev;
/* Used to serialize device wide flushing with worker swapping. */
@@ -36,6 +47,7 @@ struct vhost_worker {
u32 id;
int attachment_cnt;
bool killed;
+ const struct vhost_worker_ops *ops;
};
/* Poll a file (eventfd or socket) */
--
2.45.0
On Sun, Feb 23, 2025 at 11:41 PM Cindy Lu <lulu@redhat.com> wrote:
>
> This commit restores the previously removed functions kthread_wakeup and
> kthread_stop, and introduces a new ops structure to handle worker wakeup,
> stop, and creation. The function vhost_worker_create initializes these
> ops pointers based on the inherit_owner value.
>
> Signed-off-by: Cindy Lu <lulu@redhat.com>
Patch looks good but I have some some nits:
It might be better to have a separate patch to introduce the ops then
doing the kthread stuff on top.
> ---
> drivers/vhost/vhost.c | 115 +++++++++++++++++++++++++++++++++++-------
> drivers/vhost/vhost.h | 12 +++++
> 2 files changed, 110 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index adbb957c8b5f..d8c0ea118bb1 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -243,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
> * test_and_set_bit() implies a memory barrier.
> */
> llist_add(&work->node, &worker->work_list);
> - vhost_task_wake(worker->vtsk);
> + worker->ops->wakeup(worker);
> }
> }
>
> @@ -697,7 +697,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
>
> WARN_ON(!llist_empty(&worker->work_list));
> xa_erase(&dev->worker_xa, worker->id);
> - vhost_task_stop(worker->vtsk);
> + worker->ops->stop(worker);
> kfree(worker);
> }
>
> @@ -720,42 +720,123 @@ static void vhost_workers_free(struct vhost_dev *dev)
> xa_destroy(&dev->worker_xa);
> }
>
> +static void vhost_task_wakeup_fn(struct vhost_worker *worker)
> +{
> + return vhost_task_wake(worker->vtsk);
> +}
> +
> +static void vhost_kthread_wakeup_fn(struct vhost_worker *worker)
> +{
> + wake_up_process(worker->kthread_task);
> +}
> +
> +static void vhost_task_stop_fn(struct vhost_worker *worker)
> +{
> + return vhost_task_stop(worker->vtsk);
> +}
> +
> +static void vhost_kthread_stop_fn(struct vhost_worker *worker)
> +{
> + kthread_stop(worker->kthread_task);
> +}
> +
> +static int vhost_task_worker_create_fn(struct vhost_worker *worker,
> + struct vhost_dev *dev, const char *name)
> +{
> + struct vhost_task *vtsk;
> + u32 id;
> + int ret;
> +
> + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> + worker, name);
> + if (!vtsk)
> + return -ENOMEM;
> +
> + worker->vtsk = vtsk;
> + vhost_task_start(vtsk);
> + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + if (ret < 0) {
> + vhost_task_stop_fn(worker);
> + return ret;
> + }
> + worker->id = id;
> + return 0;
> +}
> +
> +static int kthread_worker_create_fn(struct vhost_worker *worker,
Let's have a consistent name, e.g vhost_kthread_worker_create.
> + struct vhost_dev *dev, const char *name)
> +{
> + struct task_struct *task;
> + u32 id;
> + int ret;
> +
> + task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
> + if (IS_ERR(task))
> + return PTR_ERR(task);
> +
> + worker->kthread_task = task;
> + wake_up_process(task);
> + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + if (ret < 0)
> + goto stop_worker;
> +
> + ret = vhost_attach_task_to_cgroups(worker);
> + if (ret)
> + goto stop_worker;
> +
> + worker->id = id;
> + return 0;
> +
> +stop_worker:
> + vhost_kthread_stop_fn(worker);
> + return ret;
> +}
> +
> +static const struct vhost_worker_ops vhost_task_ops = {
> + .create = vhost_task_worker_create_fn,
I think we can get rid of the fn suffix as "fn".
> + .stop = vhost_task_stop_fn,
> + .wakeup = vhost_task_wakeup_fn,
> +};
> +
> +static const struct vhost_worker_ops kthread_ops = {
> + .create = kthread_worker_create_fn,
> + .stop = vhost_kthread_stop_fn,
> + .wakeup = vhost_kthread_wakeup_fn,
> +};
> +
> static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> {
> struct vhost_worker *worker;
> - struct vhost_task *vtsk;
> char name[TASK_COMM_LEN];
> int ret;
> - u32 id;
> + const struct vhost_worker_ops *ops =
> + dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
>
> worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> if (!worker)
> return NULL;
>
> worker->dev = dev;
> + worker->ops = ops;
> snprintf(name, sizeof(name), "vhost-%d", current->pid);
>
> - vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> - worker, name);
> - if (!vtsk)
> - goto free_worker;
> -
> mutex_init(&worker->mutex);
> init_llist_head(&worker->work_list);
> worker->kcov_handle = kcov_common_handle();
> - worker->vtsk = vtsk;
> -
> - vhost_task_start(vtsk);
> + /*
> + * If inherit_owner is true we use vhost_tasks to create
> + * the worker so all settings/limits like cgroups, NPROC,
> + * scheduler, etc are inherited from the owner. If false,
> + * we use kthreads and only attach to the same cgroups
> + * as the owner for compat with older kernels.
> + */
Is this better to move this to the definition of the inherit_owner?
>
> - ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> + ret = ops->create(worker, dev, name);
> if (ret < 0)
> - goto stop_worker;
> - worker->id = id;
> + goto free_worker;
>
> return worker;
>
> -stop_worker:
> - vhost_task_stop(vtsk);
> free_worker:
> kfree(worker);
> return NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index c650c4506c70..029c203147be 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -26,7 +26,18 @@ struct vhost_work {
> unsigned long flags;
> };
>
> +struct vhost_worker;
> +struct vhost_dev;
> +
> +struct vhost_worker_ops {
> + int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
> + const char *name);
> + void (*stop)(struct vhost_worker *worker);
> + void (*wakeup)(struct vhost_worker *worker);
> +};
> +
> struct vhost_worker {
> + struct task_struct *kthread_task;
> struct vhost_task *vtsk;
> struct vhost_dev *dev;
> /* Used to serialize device wide flushing with worker swapping. */
> @@ -36,6 +47,7 @@ struct vhost_worker {
> u32 id;
> int attachment_cnt;
> bool killed;
> + const struct vhost_worker_ops *ops;
> };
>
> /* Poll a file (eventfd or socket) */
> --
> 2.45.0
>
Thanks
On Mon, Feb 24, 2025 at 9:45 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Sun, Feb 23, 2025 at 11:41 PM Cindy Lu <lulu@redhat.com> wrote:
> >
> > This commit restores the previously removed functions kthread_wakeup and
> > kthread_stop, and introduces a new ops structure to handle worker wakeup,
> > stop, and creation. The function vhost_worker_create initializes these
> > ops pointers based on the inherit_owner value.
> >
> > Signed-off-by: Cindy Lu <lulu@redhat.com>
>
> Patch looks good but I have some some nits:
>
> It might be better to have a separate patch to introduce the ops then
> doing the kthread stuff on top.
>
sure, will do
thanks
cindy
> > ---
> > drivers/vhost/vhost.c | 115 +++++++++++++++++++++++++++++++++++-------
> > drivers/vhost/vhost.h | 12 +++++
> > 2 files changed, 110 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index adbb957c8b5f..d8c0ea118bb1 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -243,7 +243,7 @@ static void vhost_worker_queue(struct vhost_worker *worker,
> > * test_and_set_bit() implies a memory barrier.
> > */
> > llist_add(&work->node, &worker->work_list);
> > - vhost_task_wake(worker->vtsk);
> > + worker->ops->wakeup(worker);
> > }
> > }
> >
> > @@ -697,7 +697,7 @@ static void vhost_worker_destroy(struct vhost_dev *dev,
> >
> > WARN_ON(!llist_empty(&worker->work_list));
> > xa_erase(&dev->worker_xa, worker->id);
> > - vhost_task_stop(worker->vtsk);
> > + worker->ops->stop(worker);
> > kfree(worker);
> > }
> >
> > @@ -720,42 +720,123 @@ static void vhost_workers_free(struct vhost_dev *dev)
> > xa_destroy(&dev->worker_xa);
> > }
> >
> > +static void vhost_task_wakeup_fn(struct vhost_worker *worker)
> > +{
> > + return vhost_task_wake(worker->vtsk);
> > +}
> > +
> > +static void vhost_kthread_wakeup_fn(struct vhost_worker *worker)
> > +{
> > + wake_up_process(worker->kthread_task);
> > +}
> > +
> > +static void vhost_task_stop_fn(struct vhost_worker *worker)
> > +{
> > + return vhost_task_stop(worker->vtsk);
> > +}
> > +
> > +static void vhost_kthread_stop_fn(struct vhost_worker *worker)
> > +{
> > + kthread_stop(worker->kthread_task);
> > +}
> > +
> > +static int vhost_task_worker_create_fn(struct vhost_worker *worker,
> > + struct vhost_dev *dev, const char *name)
> > +{
> > + struct vhost_task *vtsk;
> > + u32 id;
> > + int ret;
> > +
> > + vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> > + worker, name);
> > + if (!vtsk)
> > + return -ENOMEM;
> > +
> > + worker->vtsk = vtsk;
> > + vhost_task_start(vtsk);
> > + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> > + if (ret < 0) {
> > + vhost_task_stop_fn(worker);
> > + return ret;
> > + }
> > + worker->id = id;
> > + return 0;
> > +}
> > +
> > +static int kthread_worker_create_fn(struct vhost_worker *worker,
>
> Let's have a consistent name, e.g vhost_kthread_worker_create.
>
> > + struct vhost_dev *dev, const char *name)
> > +{
> > + struct task_struct *task;
> > + u32 id;
> > + int ret;
> > +
> > + task = kthread_create(vhost_run_work_kthread_list, worker, "%s", name);
> > + if (IS_ERR(task))
> > + return PTR_ERR(task);
> > +
> > + worker->kthread_task = task;
> > + wake_up_process(task);
> > + ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> > + if (ret < 0)
> > + goto stop_worker;
> > +
> > + ret = vhost_attach_task_to_cgroups(worker);
> > + if (ret)
> > + goto stop_worker;
> > +
> > + worker->id = id;
> > + return 0;
> > +
> > +stop_worker:
> > + vhost_kthread_stop_fn(worker);
> > + return ret;
> > +}
> > +
> > +static const struct vhost_worker_ops vhost_task_ops = {
> > + .create = vhost_task_worker_create_fn,
>
> I think we can get rid of the fn suffix as "fn".
>
sure, will do
thanks
Cindy
> > + .stop = vhost_task_stop_fn,
> > + .wakeup = vhost_task_wakeup_fn,
> > +};
> > +
> > +static const struct vhost_worker_ops kthread_ops = {
> > + .create = kthread_worker_create_fn,
> > + .stop = vhost_kthread_stop_fn,
> > + .wakeup = vhost_kthread_wakeup_fn,
> > +};
> > +
> > static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
> > {
> > struct vhost_worker *worker;
> > - struct vhost_task *vtsk;
> > char name[TASK_COMM_LEN];
> > int ret;
> > - u32 id;
> > + const struct vhost_worker_ops *ops =
> > + dev->inherit_owner ? &vhost_task_ops : &kthread_ops;
> >
> > worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
> > if (!worker)
> > return NULL;
> >
> > worker->dev = dev;
> > + worker->ops = ops;
> > snprintf(name, sizeof(name), "vhost-%d", current->pid);
> >
> > - vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
> > - worker, name);
> > - if (!vtsk)
> > - goto free_worker;
> > -
> > mutex_init(&worker->mutex);
> > init_llist_head(&worker->work_list);
> > worker->kcov_handle = kcov_common_handle();
> > - worker->vtsk = vtsk;
> > -
> > - vhost_task_start(vtsk);
> > + /*
> > + * If inherit_owner is true we use vhost_tasks to create
> > + * the worker so all settings/limits like cgroups, NPROC,
> > + * scheduler, etc are inherited from the owner. If false,
> > + * we use kthreads and only attach to the same cgroups
> > + * as the owner for compat with older kernels.
> > + */
>
> Is this better to move this to the definition of the inherit_owner?
>
sure will do
Thanks
cindy
> >
> > - ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL);
> > + ret = ops->create(worker, dev, name);
> > if (ret < 0)
> > - goto stop_worker;
> > - worker->id = id;
> > + goto free_worker;
> >
> > return worker;
> >
> > -stop_worker:
> > - vhost_task_stop(vtsk);
> > free_worker:
> > kfree(worker);
> > return NULL;
> > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> > index c650c4506c70..029c203147be 100644
> > --- a/drivers/vhost/vhost.h
> > +++ b/drivers/vhost/vhost.h
> > @@ -26,7 +26,18 @@ struct vhost_work {
> > unsigned long flags;
> > };
> >
> > +struct vhost_worker;
> > +struct vhost_dev;
> > +
> > +struct vhost_worker_ops {
> > + int (*create)(struct vhost_worker *worker, struct vhost_dev *dev,
> > + const char *name);
> > + void (*stop)(struct vhost_worker *worker);
> > + void (*wakeup)(struct vhost_worker *worker);
> > +};
> > +
> > struct vhost_worker {
> > + struct task_struct *kthread_task;
> > struct vhost_task *vtsk;
> > struct vhost_dev *dev;
> > /* Used to serialize device wide flushing with worker swapping. */
> > @@ -36,6 +47,7 @@ struct vhost_worker {
> > u32 id;
> > int attachment_cnt;
> > bool killed;
> > + const struct vhost_worker_ops *ops;
> > };
> >
> > /* Poll a file (eventfd or socket) */
> > --
> > 2.45.0
> >
>
> Thanks
>
© 2016 - 2025 Red Hat, Inc.