From nobody Thu Apr 30 08:24:50 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 4CDB1C4332F for ; Wed, 1 Jun 2022 13:21:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353222AbiFANVF (ORCPT ); Wed, 1 Jun 2022 09:21:05 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41174 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1349877AbiFANU5 (ORCPT ); Wed, 1 Jun 2022 09:20:57 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [205.139.111.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id D7CFB4CD5B for ; Wed, 1 Jun 2022 06:20:55 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-606-W5A1QcP2OjSmyMrvEcSVfQ-1; Wed, 01 Jun 2022 09:20:48 -0400 X-MC-Unique: W5A1QcP2OjSmyMrvEcSVfQ-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id DD470811E81; Wed, 1 Jun 2022 13:20:47 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id C17D0414A7E7; Wed, 1 Jun 2022 13:20:45 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 1/4] sysctl: API extension for handling sysctl Date: Wed, 1 Jun 2022 15:20:29 +0200 Message-Id: <5ec6759ab3b617f9c12449a9606b6f0b5a7582d0.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" This adds additional optional functions for handling open, read, and write operations that can be customized for each sysctl file. It also creates ctl_context that persists from opening to closing the file in the /proc/sys. The context allows us to store dynamic information at the time the file is opened. This eliminates the need to duplicate ctl_table in order to dynamically change .data, .extra1 or .extra2. This API extends the existing one and does not require any changes to already existing sysctl handlers. Signed-off-by: Alexey Gladkov --- fs/proc/proc_sysctl.c | 71 +++++++++++++++++++++++++++++++++++------- include/linux/sysctl.h | 20 ++++++++++-- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 7d9cfc730bd4..d3d43e738f01 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -560,6 +560,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb= , struct iov_iter *iter, struct inode *inode =3D file_inode(iocb->ki_filp); struct ctl_table_header *head =3D grab_header(inode); struct ctl_table *table =3D PROC_I(inode)->sysctl_entry; + struct ctl_fops *fops =3D table->ctl_fops; size_t count =3D iov_iter_count(iter); char *kbuf; ssize_t error; @@ -577,7 +578,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb= , struct iov_iter *iter, =20 /* if that can happen at all, it should be -EINVAL, not -EISDIR */ error =3D -EINVAL; - if (!table->proc_handler) + if (!table->proc_handler && !fops) goto out; =20 /* don't even try if the size is too large */ @@ -600,8 +601,20 @@ static ssize_t proc_sys_call_handler(struct kiocb *ioc= b, struct iov_iter *iter, if (error) goto out_free_buf; =20 - /* careful: calling conventions are nasty here */ - error =3D table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); + if (fops) { + struct ctl_context *ctx =3D iocb->ki_filp->private_data; + + if (write && fops->write) + error =3D fops->write(ctx, iocb->ki_filp, kbuf, &count, &iocb->ki_pos); + else if (!write && fops->read) + error =3D fops->read(ctx, iocb->ki_filp, kbuf, &count, &iocb->ki_pos); + else + error =3D -EINVAL; + } else { + /* careful: calling conventions are nasty here */ + error =3D table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos); + } + if (error) goto out_free_buf; =20 @@ -634,17 +647,50 @@ static int proc_sys_open(struct inode *inode, struct = file *filp) { struct ctl_table_header *head =3D grab_header(inode); struct ctl_table *table =3D PROC_I(inode)->sysctl_entry; + struct ctl_context *ctx; + int ret =3D 0; =20 /* sysctl was unregistered */ if (IS_ERR(head)) return PTR_ERR(head); =20 - if (table->poll) - filp->private_data =3D proc_sys_poll_event(table->poll); + ctx =3D kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->table =3D table; + filp->private_data =3D ctx; + + if (table->ctl_fops && table->ctl_fops->open) + ret =3D table->ctl_fops->open(ctx, inode, filp); + + if (!ret && table->poll) + ctx->poll_event =3D proc_sys_poll_event(table->poll); =20 sysctl_head_finish(head); =20 - return 0; + return ret; +} + +static int proc_sys_release(struct inode *inode, struct file *filp) +{ + struct ctl_table_header *head =3D grab_header(inode); + struct ctl_table *table =3D PROC_I(inode)->sysctl_entry; + struct ctl_context *ctx =3D filp->private_data; + int ret =3D 0; + + if (IS_ERR(head)) + return PTR_ERR(head); + + if (table->ctl_fops && table->ctl_fops->release) + ret =3D table->ctl_fops->release(ctx, inode, filp); + + sysctl_head_finish(head); + + kfree(ctx); + filp->private_data =3D NULL; + + return ret; } =20 static __poll_t proc_sys_poll(struct file *filp, poll_table *wait) @@ -653,23 +699,23 @@ static __poll_t proc_sys_poll(struct file *filp, poll= _table *wait) struct ctl_table_header *head =3D grab_header(inode); struct ctl_table *table =3D PROC_I(inode)->sysctl_entry; __poll_t ret =3D DEFAULT_POLLMASK; - unsigned long event; + struct ctl_context *ctx; =20 /* sysctl was unregistered */ if (IS_ERR(head)) return EPOLLERR | EPOLLHUP; =20 - if (!table->proc_handler) + if (!table->proc_handler && !table->ctl_fops) goto out; =20 if (!table->poll) goto out; =20 - event =3D (unsigned long)filp->private_data; + ctx =3D filp->private_data; poll_wait(filp, &table->poll->wait, wait); =20 - if (event !=3D atomic_read(&table->poll->event)) { - filp->private_data =3D proc_sys_poll_event(table->poll); + if (ctx->poll_event !=3D atomic_read(&table->poll->event)) { + ctx->poll_event =3D proc_sys_poll_event(table->poll); ret =3D EPOLLIN | EPOLLRDNORM | EPOLLERR | EPOLLPRI; } =20 @@ -866,6 +912,7 @@ static int proc_sys_getattr(struct user_namespace *mnt_= userns, =20 static const struct file_operations proc_sys_file_operations =3D { .open =3D proc_sys_open, + .release =3D proc_sys_release, .poll =3D proc_sys_poll, .read_iter =3D proc_sys_read, .write_iter =3D proc_sys_write, @@ -1153,7 +1200,7 @@ static int sysctl_check_table(const char *path, struc= t ctl_table *table) else err |=3D sysctl_check_table_array(path, table); } - if (!table->proc_handler) + if (!table->proc_handler && !table->ctl_fops) err |=3D sysctl_err(path, table, "No proc_handler"); =20 if ((table->mode & (S_IRUGO|S_IWUGO)) !=3D table->mode) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6353d6db69b2..ca5657c9fcb2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -116,9 +116,9 @@ struct ctl_table_poll { wait_queue_head_t wait; }; =20 -static inline void *proc_sys_poll_event(struct ctl_table_poll *poll) +static inline unsigned long proc_sys_poll_event(struct ctl_table_poll *pol= l) { - return (void *)(unsigned long)atomic_read(&poll->event); + return (unsigned long)atomic_read(&poll->event); } =20 #define __CTL_TABLE_POLL_INITIALIZER(name) { \ @@ -128,6 +128,21 @@ static inline void *proc_sys_poll_event(struct ctl_tab= le_poll *poll) #define DEFINE_CTL_TABLE_POLL(name) \ struct ctl_table_poll name =3D __CTL_TABLE_POLL_INITIALIZER(name) =20 +struct ctl_context { + struct ctl_table *table; + unsigned long poll_event; + void *ctl_data; +}; + +struct inode; + +struct ctl_fops { + int (*open) (struct ctl_context *, struct inode *, struct file *); + int (*release) (struct ctl_context *, struct inode *, struct file *); + ssize_t (*read) (struct ctl_context *, struct file *, char *, size_t *, l= off_t *); + ssize_t (*write) (struct ctl_context *, struct file *, char *, size_t *, = loff_t *); +}; + /* A sysctl table is an array of struct ctl_table: */ struct ctl_table { const char *procname; /* Text ID for /proc/sys, or zero */ @@ -139,6 +154,7 @@ struct ctl_table { struct ctl_table_poll *poll; void *extra1; void *extra2; + struct ctl_fops *ctl_fops; } __randomize_layout; =20 struct ctl_node { --=20 2.33.3 From nobody Thu Apr 30 08:24:50 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0BDADC433EF for ; Wed, 1 Jun 2022 13:21:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353224AbiFANVP (ORCPT ); Wed, 1 Jun 2022 09:21:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41358 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353198AbiFANVE (ORCPT ); Wed, 1 Jun 2022 09:21:04 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [207.211.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 7641E175A3 for ; Wed, 1 Jun 2022 06:20:56 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-519-4Z-CMtpGOVKaHbq7mveZtw-1; Wed, 01 Jun 2022 09:20:51 -0400 X-MC-Unique: 4Z-CMtpGOVKaHbq7mveZtw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 4838C1C0F68B; Wed, 1 Jun 2022 13:20:50 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 2E85B414A7E7; Wed, 1 Jun 2022 13:20:48 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 2/4] sysctl: ipc: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:30 +0200 Message-Id: <857cb160a981b5719d8ed6a3e5e7c456915c64fa.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/ipc_namespace.h | 18 --- ipc/ipc_sysctl.c | 236 +++++++++++++++++----------------- ipc/namespace.c | 4 - 3 files changed, 121 insertions(+), 137 deletions(-) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e3e8c8662b49..51c2c247c447 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -191,22 +191,4 @@ static inline bool setup_mq_sysctls(struct ipc_namespa= ce *ns) } =20 #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ - -#ifdef CONFIG_SYSVIPC_SYSCTL - -bool setup_ipc_sysctls(struct ipc_namespace *ns); -void retire_ipc_sysctls(struct ipc_namespace *ns); - -#else /* CONFIG_SYSVIPC_SYSCTL */ - -static inline void retire_ipc_sysctls(struct ipc_namespace *ns) -{ -} - -static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) -{ - return true; -} - -#endif /* CONFIG_SYSVIPC_SYSCTL */ #endif diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index ef313ecfb53a..833b670c38f3 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -68,26 +68,94 @@ static int proc_ipc_sem_dointvec(struct ctl_table *tabl= e, int write, return ret; } =20 +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table= *table); + +static int ipc_sys_open(struct ctl_context *ctx, struct inode *inode, stru= ct file *file) +{ + struct ipc_namespace *ns =3D current->nsproxy->ipc_ns; + + // For now, we only allow changes in init_user_ns. + if (ns->user_ns !=3D &init_user_ns) + return -EPERM; + +#ifdef CONFIG_CHECKPOINT_RESTORE + int index =3D (ctx->table - ipc_sysctls); + + switch (index) { + case IPC_SYSCTL_SEM_NEXT_ID: + case IPC_SYSCTL_MSG_NEXT_ID: + case IPC_SYSCTL_SHM_NEXT_ID: + if (!checkpoint_restore_ns_capable(ns->user_ns)) + return -EPERM; + break; + } +#endif + ctx->ctl_data =3D ns; + return 0; +} + +static ssize_t ipc_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table =3D *ctx->table; + table.data =3D data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} + +static ssize_t ipc_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table =3D *ctx->table; + table.data =3D data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 1, buffer, lenp, ppos); +} + +static struct ctl_fops ipc_sys_fops =3D { + .open =3D ipc_sys_open, + .read =3D ipc_sys_read, + .write =3D ipc_sys_write, +}; + int ipc_mni =3D IPCMNI; int ipc_mni_shift =3D IPCMNI_SHIFT; int ipc_min_cycle =3D RADIX_TREE_MAP_SIZE; =20 +enum { + IPC_SYSCTL_SHMMAX, + IPC_SYSCTL_SHMALL, + IPC_SYSCTL_SHMMNI, + IPC_SYSCTL_SHM_RMID_FORCED, + IPC_SYSCTL_MSGMAX, + IPC_SYSCTL_MSGMNI, + IPC_SYSCTL_AUTO_MSGMNI, + IPC_SYSCTL_MSGMNB, + IPC_SYSCTL_SEM, +#ifdef CONFIG_CHECKPOINT_RESTORE + IPC_SYSCTL_SEM_NEXT_ID, + IPC_SYSCTL_MSG_NEXT_ID, + IPC_SYSCTL_SHM_NEXT_ID, +#endif + IPC_SYSCTL_COUNTS +}; + static struct ctl_table ipc_sysctls[] =3D { - { + [IPC_SYSCTL_SHMMAX] =3D { .procname =3D "shmmax", .data =3D &init_ipc_ns.shm_ctlmax, .maxlen =3D sizeof(init_ipc_ns.shm_ctlmax), .mode =3D 0644, - .proc_handler =3D proc_doulongvec_minmax, + .proc_handler =3D proc_doulongvec_minmax, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHMALL] =3D { .procname =3D "shmall", .data =3D &init_ipc_ns.shm_ctlall, .maxlen =3D sizeof(init_ipc_ns.shm_ctlall), .mode =3D 0644, - .proc_handler =3D proc_doulongvec_minmax, + .proc_handler =3D proc_doulongvec_minmax, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHMMNI] =3D { .procname =3D "shmmni", .data =3D &init_ipc_ns.shm_ctlmni, .maxlen =3D sizeof(init_ipc_ns.shm_ctlmni), @@ -95,8 +163,9 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D &ipc_mni, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHM_RMID_FORCED] =3D { .procname =3D "shm_rmid_forced", .data =3D &init_ipc_ns.shm_rmid_forced, .maxlen =3D sizeof(init_ipc_ns.shm_rmid_forced), @@ -104,8 +173,9 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_ipc_dointvec_minmax_orphans, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_ONE, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMAX] =3D { .procname =3D "msgmax", .data =3D &init_ipc_ns.msg_ctlmax, .maxlen =3D sizeof(init_ipc_ns.msg_ctlmax), @@ -113,8 +183,9 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_INT_MAX, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMNI] =3D { .procname =3D "msgmni", .data =3D &init_ipc_ns.msg_ctlmni, .maxlen =3D sizeof(init_ipc_ns.msg_ctlmni), @@ -122,8 +193,9 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D &ipc_mni, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_AUTO_MSGMNI] =3D { .procname =3D "auto_msgmni", .data =3D NULL, .maxlen =3D sizeof(int), @@ -131,8 +203,9 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_ipc_auto_msgmni, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_ONE, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSGMNB] =3D { .procname =3D "msgmnb", .data =3D &init_ipc_ns.msg_ctlmnb, .maxlen =3D sizeof(init_ipc_ns.msg_ctlmnb), @@ -140,152 +213,85 @@ static struct ctl_table ipc_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_INT_MAX, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_SEM] =3D { .procname =3D "sem", .data =3D &init_ipc_ns.sem_ctls, .maxlen =3D 4*sizeof(int), .mode =3D 0644, .proc_handler =3D proc_ipc_sem_dointvec, + .ctl_fops =3D &ipc_sys_fops, }, #ifdef CONFIG_CHECKPOINT_RESTORE - { + [IPC_SYSCTL_SEM_NEXT_ID] =3D { .procname =3D "sem_next_id", .data =3D &init_ipc_ns.ids[IPC_SEM_IDS].next_id, .maxlen =3D sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), - .mode =3D 0444, + .mode =3D 0666, .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_INT_MAX, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_MSG_NEXT_ID] =3D { .procname =3D "msg_next_id", .data =3D &init_ipc_ns.ids[IPC_MSG_IDS].next_id, .maxlen =3D sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), - .mode =3D 0444, + .mode =3D 0666, .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_INT_MAX, + .ctl_fops =3D &ipc_sys_fops, }, - { + [IPC_SYSCTL_SHM_NEXT_ID] =3D { .procname =3D "shm_next_id", .data =3D &init_ipc_ns.ids[IPC_SHM_IDS].next_id, .maxlen =3D sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), - .mode =3D 0444, + .mode =3D 0666, .proc_handler =3D proc_dointvec_minmax, .extra1 =3D SYSCTL_ZERO, .extra2 =3D SYSCTL_INT_MAX, + .ctl_fops =3D &ipc_sys_fops, }, #endif - {} + [IPC_SYSCTL_COUNTS] =3D {} }; =20 -static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table= *table) { - return ¤t->nsproxy->ipc_ns->ipc_set; -} - -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t->nsproxy->ipc_ns->ipc_set =3D=3D set; -} - -static int ipc_permissions(struct ctl_table_header *head, struct ctl_table= *table) -{ - int mode =3D table->mode; - + struct ipc_namespace *ns =3D ctx->ctl_data; + + switch (ctx->table - ipc_sysctls) { + case IPC_SYSCTL_SHMMAX: return &ns->shm_ctlmax; + case IPC_SYSCTL_SHMALL: return &ns->shm_ctlall; + case IPC_SYSCTL_SHMMNI: return &ns->shm_ctlmni; + case IPC_SYSCTL_SHM_RMID_FORCED: return &ns->shm_rmid_forced; + case IPC_SYSCTL_MSGMAX: return &ns->msg_ctlmax; + case IPC_SYSCTL_MSGMNI: return &ns->msg_ctlmni; + case IPC_SYSCTL_MSGMNB: return &ns->msg_ctlmnb; + case IPC_SYSCTL_SEM: return &ns->sem_ctls; #ifdef CONFIG_CHECKPOINT_RESTORE - struct ipc_namespace *ns =3D current->nsproxy->ipc_ns; - - if (((table->data =3D=3D &ns->ids[IPC_SEM_IDS].next_id) || - (table->data =3D=3D &ns->ids[IPC_MSG_IDS].next_id) || - (table->data =3D=3D &ns->ids[IPC_SHM_IDS].next_id)) && - checkpoint_restore_ns_capable(ns->user_ns)) - mode =3D 0666; + case IPC_SYSCTL_SEM_NEXT_ID: return &ns->ids[IPC_SEM_IDS].next_id; + case IPC_SYSCTL_MSG_NEXT_ID: return &ns->ids[IPC_MSG_IDS].next_id; + case IPC_SYSCTL_SHM_NEXT_ID: return &ns->ids[IPC_SHM_IDS].next_id; #endif - return mode; -} - -static struct ctl_table_root set_root =3D { - .lookup =3D set_lookup, - .permissions =3D ipc_permissions, -}; - -bool setup_ipc_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); - - tbl =3D kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); - if (tbl) { - int i; - - for (i =3D 0; i < ARRAY_SIZE(ipc_sysctls); i++) { - if (tbl[i].data =3D=3D &init_ipc_ns.shm_ctlmax) - tbl[i].data =3D &ns->shm_ctlmax; - - else if (tbl[i].data =3D=3D &init_ipc_ns.shm_ctlall) - tbl[i].data =3D &ns->shm_ctlall; - - else if (tbl[i].data =3D=3D &init_ipc_ns.shm_ctlmni) - tbl[i].data =3D &ns->shm_ctlmni; - - else if (tbl[i].data =3D=3D &init_ipc_ns.shm_rmid_forced) - tbl[i].data =3D &ns->shm_rmid_forced; - - else if (tbl[i].data =3D=3D &init_ipc_ns.msg_ctlmax) - tbl[i].data =3D &ns->msg_ctlmax; - - else if (tbl[i].data =3D=3D &init_ipc_ns.msg_ctlmni) - tbl[i].data =3D &ns->msg_ctlmni; - - else if (tbl[i].data =3D=3D &init_ipc_ns.msg_ctlmnb) - tbl[i].data =3D &ns->msg_ctlmnb; - - else if (tbl[i].data =3D=3D &init_ipc_ns.sem_ctls) - tbl[i].data =3D &ns->sem_ctls; -#ifdef CONFIG_CHECKPOINT_RESTORE - else if (tbl[i].data =3D=3D &init_ipc_ns.ids[IPC_SEM_IDS].next_id) - tbl[i].data =3D &ns->ids[IPC_SEM_IDS].next_id; - - else if (tbl[i].data =3D=3D &init_ipc_ns.ids[IPC_MSG_IDS].next_id) - tbl[i].data =3D &ns->ids[IPC_MSG_IDS].next_id; - - else if (tbl[i].data =3D=3D &init_ipc_ns.ids[IPC_SHM_IDS].next_id) - tbl[i].data =3D &ns->ids[IPC_SHM_IDS].next_id; -#endif - else - tbl[i].data =3D NULL; - } - - ns->ipc_sysctls =3D __register_sysctl_table(&ns->ipc_set, "kernel", tbl); - } - if (!ns->ipc_sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->ipc_set); - return false; } - - return true; + return NULL; } =20 -void retire_ipc_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - tbl =3D ns->ipc_sysctls->ctl_table_arg; - unregister_sysctl_table(ns->ipc_sysctls); - retire_sysctl_set(&ns->ipc_set); - kfree(tbl); -} +static struct ctl_table ipc_root_table[] =3D { + { + .procname =3D "kernel", + .mode =3D 0555, + .child =3D ipc_sysctls, + }, + {} +}; =20 static int __init ipc_sysctl_init(void) { - if (!setup_ipc_sysctls(&init_ipc_ns)) { - pr_warn("ipc sysctl registration failed\n"); - return -ENOMEM; - } + register_sysctl_table(ipc_root_table); return 0; } =20 diff --git a/ipc/namespace.c b/ipc/namespace.c index 754f3237194a..f760243ca685 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -63,9 +63,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_na= mespace *user_ns, if (!setup_mq_sysctls(ns)) goto fail_put; =20 - if (!setup_ipc_sysctls(ns)) - goto fail_put; - sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -133,7 +130,6 @@ static void free_ipc_ns(struct ipc_namespace *ns) shm_exit_ns(ns); =20 retire_mq_sysctls(ns); - retire_ipc_sysctls(ns); =20 dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); --=20 2.33.3 From nobody Thu Apr 30 08:24:50 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A56BEC433F5 for ; Wed, 1 Jun 2022 13:21:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1347317AbiFANVL (ORCPT ); Wed, 1 Jun 2022 09:21:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41360 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1348228AbiFANVD (ORCPT ); Wed, 1 Jun 2022 09:21:03 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [205.139.111.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id EAD7A4C7A5 for ; Wed, 1 Jun 2022 06:20:57 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mimecast-mx02.redhat.com [66.187.233.88]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-264-O_Pd3AsIMGigCWcA94cQtA-1; Wed, 01 Jun 2022 09:20:53 -0400 X-MC-Unique: O_Pd3AsIMGigCWcA94cQtA-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id B024B802804; Wed, 1 Jun 2022 13:20:52 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 94762414A7E7; Wed, 1 Jun 2022 13:20:50 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 3/4] sysctl: userns: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:31 +0200 Message-Id: <81190e5e4879d53be2e1416bcad0b663421339d6.1654086665.git.legion@kernel.org> In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/user_namespace.h | 6 -- kernel/ucount.c | 116 +++++++++++++-------------------- kernel/user_namespace.c | 10 +-- 3 files changed, 46 insertions(+), 86 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..7b134516e5cb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -95,10 +95,6 @@ struct user_namespace { struct key *persistent_keyring_register; #endif struct work_struct work; -#ifdef CONFIG_SYSCTL - struct ctl_table_set set; - struct ctl_table_header *sysctls; -#endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; long rlimit_max[UCOUNT_RLIMIT_COUNTS]; @@ -116,8 +112,6 @@ struct ucounts { extern struct user_namespace init_user_ns; extern struct ucounts init_ucounts; =20 -bool setup_userns_sysctls(struct user_namespace *ns); -void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum uco= unt_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); diff --git a/kernel/ucount.c b/kernel/ucount.c index ee8e57fd6f90..4a5072671847 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -7,6 +7,7 @@ #include #include #include +#include =20 struct ucounts init_ucounts =3D { .ns =3D &init_user_ns, @@ -26,38 +27,20 @@ static DEFINE_SPINLOCK(ucounts_lock); =20 =20 #ifdef CONFIG_SYSCTL -static struct ctl_table_set * -set_lookup(struct ctl_table_root *root) -{ - return ¤t_user_ns()->set; -} - -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t_user_ns()->set =3D=3D set; -} - -static int set_permissions(struct ctl_table_header *head, - struct ctl_table *table) -{ - struct user_namespace *user_ns =3D - container_of(head->set, struct user_namespace, set); - int mode; - - /* Allow users with CAP_SYS_RESOURCE unrestrained access */ - if (ns_capable(user_ns, CAP_SYS_RESOURCE)) - mode =3D (table->mode & S_IRWXU) >> 6; - else - /* Allow all others at most read-only access */ - mode =3D table->mode & S_IROTH; - return (mode << 6) | (mode << 3) | mode; -} - -static struct ctl_table_root set_root =3D { - .lookup =3D set_lookup, - .permissions =3D set_permissions, +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, + struct file *file); +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos); + +static struct ctl_fops user_sys_fops =3D { + .open =3D user_sys_open, + .read =3D user_sys_read, + .write =3D user_sys_write, }; =20 +static long ue_dummy =3D 0; static long ue_zero =3D 0; static long ue_int_max =3D INT_MAX; =20 @@ -66,9 +49,11 @@ static long ue_int_max =3D INT_MAX; .procname =3D name, \ .maxlen =3D sizeof(long), \ .mode =3D 0644, \ + .data =3D &ue_dummy, \ .proc_handler =3D proc_doulongvec_minmax, \ .extra1 =3D &ue_zero, \ .extra2 =3D &ue_int_max, \ + .ctl_fops =3D &user_sys_fops, \ } static struct ctl_table user_table[] =3D { UCOUNT_ENTRY("max_user_namespaces"), @@ -89,44 +74,43 @@ static struct ctl_table user_table[] =3D { #endif { } }; -#endif /* CONFIG_SYSCTL */ =20 -bool setup_userns_sysctls(struct user_namespace *ns) +static int user_sys_open(struct ctl_context *ctx, struct inode *inode, str= uct file *file) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; - - BUILD_BUG_ON(ARRAY_SIZE(user_table) !=3D UCOUNT_COUNTS + 1); - setup_sysctl_set(&ns->set, &set_root, set_is_seen); - tbl =3D kmemdup(user_table, sizeof(user_table), GFP_KERNEL); - if (tbl) { - int i; - for (i =3D 0; i < UCOUNT_COUNTS; i++) { - tbl[i].data =3D &ns->ucount_max[i]; - } - ns->sysctls =3D __register_sysctl_table(&ns->set, "user", tbl); - } - if (!ns->sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->set); - return false; - } -#endif - return true; + /* Allow users with CAP_SYS_RESOURCE unrestrained access */ + if ((file->f_mode & FMODE_WRITE) && + !ns_capable(file->f_cred->user_ns, CAP_SYS_RESOURCE)) + return -EPERM; + return 0; } =20 -void retire_userns_sysctls(struct user_namespace *ns) +static ssize_t user_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) { -#ifdef CONFIG_SYSCTL - struct ctl_table *tbl; + struct ctl_table table =3D *ctx->table; + table.data =3D &file->f_cred->user_ns->ucount_max[ctx->table - user_table= ]; + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} =20 - tbl =3D ns->sysctls->ctl_table_arg; - unregister_sysctl_table(ns->sysctls); - retire_sysctl_set(&ns->set); - kfree(tbl); -#endif +static ssize_t user_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table =3D *ctx->table; + table.data =3D &file->f_cred->user_ns->ucount_max[ctx->table - user_table= ]; + return table.proc_handler(&table, 1, buffer, lenp, ppos); } =20 +static struct ctl_table user_root_table[] =3D { + { + .procname =3D "user", + .mode =3D 0555, + .child =3D user_table, + }, + {} +}; + +#endif /* CONFIG_SYSCTL */ + static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid,= struct hlist_head *hashent) { struct ucounts *ucounts; @@ -357,17 +341,7 @@ bool is_rlimit_overlimit(struct ucounts *ucounts, enum= rlimit_type type, unsigne static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL - static struct ctl_table_header *user_header; - static struct ctl_table empty[1]; - /* - * It is necessary to register the user directory in the - * default set so that registrations in the child sets work - * properly. - */ - user_header =3D register_sysctl("user", empty); - kmemleak_ignore(user_header); - BUG_ON(!user_header); - BUG_ON(!setup_userns_sysctls(&init_user_ns)); + register_sysctl_table(user_root_table); #endif hlist_add_ucounts(&init_ucounts); inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 981bb2d10d83..c0e707bc9a31 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -149,17 +149,10 @@ int create_user_ns(struct cred *new) INIT_LIST_HEAD(&ns->keyring_name_list); init_rwsem(&ns->keyring_sem); #endif - ret =3D -ENOMEM; - if (!setup_userns_sysctls(ns)) - goto fail_keyring; =20 set_cred_user_ns(new, ns); return 0; -fail_keyring: -#ifdef CONFIG_PERSISTENT_KEYRINGS - key_put(ns->persistent_keyring_register); -#endif - ns_free_inum(&ns->ns); + fail_free: kmem_cache_free(user_ns_cachep, ns); fail_dec: @@ -208,7 +201,6 @@ static void free_user_ns(struct work_struct *work) kfree(ns->projid_map.forward); kfree(ns->projid_map.reverse); } - retire_userns_sysctls(ns); key_free_user_ns(ns); ns_free_inum(&ns->ns); kmem_cache_free(user_ns_cachep, ns); --=20 2.33.3 From nobody Thu Apr 30 08:24:50 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5D6F6C433EF for ; Wed, 1 Jun 2022 13:21:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1353233AbiFANVS (ORCPT ); Wed, 1 Jun 2022 09:21:18 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41392 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1353202AbiFANVF (ORCPT ); Wed, 1 Jun 2022 09:21:05 -0400 Received: from us-smtp-delivery-44.mimecast.com (us-smtp-delivery-44.mimecast.com [207.211.30.44]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 30FF54EA2E for ; Wed, 1 Jun 2022 06:21:03 -0700 (PDT) Received: from mimecast-mx02.redhat.com (mx3-rdu2.redhat.com [66.187.233.73]) by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id us-mta-621-vLc6cXEvNsOpYsXD4FreTw-1; Wed, 01 Jun 2022 09:20:55 -0400 X-MC-Unique: vLc6cXEvNsOpYsXD4FreTw-1 Received: from smtp.corp.redhat.com (int-mx02.intmail.prod.int.rdu2.redhat.com [10.11.54.2]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx02.redhat.com (Postfix) with ESMTPS id 1C6001C0F68A; Wed, 1 Jun 2022 13:20:55 +0000 (UTC) Received: from comp-core-i7-2640m-0182e6.redhat.com (unknown [10.36.110.3]) by smtp.corp.redhat.com (Postfix) with ESMTP id 01B4B414A7E9; Wed, 1 Jun 2022 13:20:52 +0000 (UTC) From: Alexey Gladkov To: LKML , "Eric W . Biederman" , Linus Torvalds Cc: Andrew Morton , Christian Brauner , Iurii Zaikin , Kees Cook , Linux Containers , linux-fsdevel@vger.kernel.org, Luis Chamberlain , Vasily Averin Subject: [RFC PATCH 4/4] sysctl: mqueue: Do not use dynamic memory Date: Wed, 1 Jun 2022 15:20:32 +0200 Message-Id: In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Scanned-By: MIMEDefang 2.84 on 10.11.54.2 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Dynamic memory allocation is needed to modify .data and specify the per namespace parameter. The new sysctl API is allowed to get rid of the need for such modification. Signed-off-by: Alexey Gladkov --- include/linux/ipc_namespace.h | 17 ----- ipc/mq_sysctl.c | 138 +++++++++++++++++++--------------- ipc/mqueue.c | 5 -- ipc/namespace.c | 6 -- 4 files changed, 79 insertions(+), 87 deletions(-) diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 51c2c247c447..d20753093a2c 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -174,21 +174,4 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) } #endif =20 -#ifdef CONFIG_POSIX_MQUEUE_SYSCTL - -void retire_mq_sysctls(struct ipc_namespace *ns); -bool setup_mq_sysctls(struct ipc_namespace *ns); - -#else /* CONFIG_POSIX_MQUEUE_SYSCTL */ - -static inline void retire_mq_sysctls(struct ipc_namespace *ns) -{ -} - -static inline bool setup_mq_sysctls(struct ipc_namespace *ns) -{ - return true; -} - -#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ #endif diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index fbf6a8b93a26..08ff7dfb721c 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -13,6 +13,45 @@ #include #include =20 +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table= *table); + +static int mq_sys_open(struct ctl_context *ctx, struct inode *inode, struc= t file *file) +{ + ctx->ctl_data =3D current->nsproxy->ipc_ns; + return 0; +} + +static ssize_t mq_sys_read(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table =3D *ctx->table; + table.data =3D data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 0, buffer, lenp, ppos); +} + +static ssize_t mq_sys_write(struct ctl_context *ctx, struct file *file, + char *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table =3D *ctx->table; + table.data =3D data_from_ns(ctx, ctx->table); + return table.proc_handler(&table, 1, buffer, lenp, ppos); +} + +static struct ctl_fops mq_sys_fops =3D { + .open =3D mq_sys_open, + .read =3D mq_sys_read, + .write =3D mq_sys_write, +}; + +enum { + MQ_SYSCTL_QUEUES_MAX, + MQ_SYSCTL_MSG_MAX, + MQ_SYSCTL_MSGSIZE_MAX, + MQ_SYSCTL_MSG_DEFAULT, + MQ_SYSCTL_MSGSIZE_DEFAULT, + MQ_SYSCTL_COUNTS +}; + static int msg_max_limit_min =3D MIN_MSGMAX; static int msg_max_limit_max =3D HARD_MSGMAX; =20 @@ -20,14 +59,15 @@ static int msg_maxsize_limit_min =3D MIN_MSGSIZEMAX; static int msg_maxsize_limit_max =3D HARD_MSGSIZEMAX; =20 static struct ctl_table mq_sysctls[] =3D { - { + [MQ_SYSCTL_QUEUES_MAX] =3D { .procname =3D "queues_max", .data =3D &init_ipc_ns.mq_queues_max, .maxlen =3D sizeof(int), .mode =3D 0644, .proc_handler =3D proc_dointvec, + .ctl_fops =3D &mq_sys_fops, }, - { + [MQ_SYSCTL_MSG_MAX] =3D { .procname =3D "msg_max", .data =3D &init_ipc_ns.mq_msg_max, .maxlen =3D sizeof(int), @@ -35,8 +75,9 @@ static struct ctl_table mq_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D &msg_max_limit_min, .extra2 =3D &msg_max_limit_max, + .ctl_fops =3D &mq_sys_fops, }, - { + [MQ_SYSCTL_MSGSIZE_MAX] =3D { .procname =3D "msgsize_max", .data =3D &init_ipc_ns.mq_msgsize_max, .maxlen =3D sizeof(int), @@ -44,8 +85,9 @@ static struct ctl_table mq_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D &msg_maxsize_limit_min, .extra2 =3D &msg_maxsize_limit_max, + .ctl_fops =3D &mq_sys_fops, }, - { + [MQ_SYSCTL_MSG_DEFAULT] =3D { .procname =3D "msg_default", .data =3D &init_ipc_ns.mq_msg_default, .maxlen =3D sizeof(int), @@ -53,8 +95,9 @@ static struct ctl_table mq_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D &msg_max_limit_min, .extra2 =3D &msg_max_limit_max, + .ctl_fops =3D &mq_sys_fops, }, - { + [MQ_SYSCTL_MSGSIZE_DEFAULT] =3D { .procname =3D "msgsize_default", .data =3D &init_ipc_ns.mq_msgsize_default, .maxlen =3D sizeof(int), @@ -62,70 +105,47 @@ static struct ctl_table mq_sysctls[] =3D { .proc_handler =3D proc_dointvec_minmax, .extra1 =3D &msg_maxsize_limit_min, .extra2 =3D &msg_maxsize_limit_max, + .ctl_fops =3D &mq_sys_fops, }, {} }; =20 -static struct ctl_table_set *set_lookup(struct ctl_table_root *root) +static inline void *data_from_ns(struct ctl_context *ctx, struct ctl_table= *table) { - return ¤t->nsproxy->ipc_ns->mq_set; + struct ipc_namespace *ns =3D ctx->ctl_data; + + switch (ctx->table - mq_sysctls) { + case MQ_SYSCTL_QUEUES_MAX: return &ns->mq_queues_max; + case MQ_SYSCTL_MSG_MAX: return &ns->mq_msg_max; + case MQ_SYSCTL_MSGSIZE_MAX: return &ns->mq_msgsize_max; + case MQ_SYSCTL_MSG_DEFAULT: return &ns->mq_msg_default; + case MQ_SYSCTL_MSGSIZE_DEFAULT: return &ns->mq_msgsize_default; + } + return NULL; } =20 -static int set_is_seen(struct ctl_table_set *set) -{ - return ¤t->nsproxy->ipc_ns->mq_set =3D=3D set; -} +static struct ctl_table mq_sysctl_dir[] =3D { + { + .procname =3D "mqueue", + .mode =3D 0555, + .child =3D mq_sysctls, + }, + {} +}; =20 -static struct ctl_table_root set_root =3D { - .lookup =3D set_lookup, +static struct ctl_table mq_sysctl_root[] =3D { + { + .procname =3D "fs", + .mode =3D 0555, + .child =3D mq_sysctl_dir, + }, + {} }; =20 -bool setup_mq_sysctls(struct ipc_namespace *ns) +static int __init mq_sysctl_init(void) { - struct ctl_table *tbl; - - setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); - - tbl =3D kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); - if (tbl) { - int i; - - for (i =3D 0; i < ARRAY_SIZE(mq_sysctls); i++) { - if (tbl[i].data =3D=3D &init_ipc_ns.mq_queues_max) - tbl[i].data =3D &ns->mq_queues_max; - - else if (tbl[i].data =3D=3D &init_ipc_ns.mq_msg_max) - tbl[i].data =3D &ns->mq_msg_max; - - else if (tbl[i].data =3D=3D &init_ipc_ns.mq_msgsize_max) - tbl[i].data =3D &ns->mq_msgsize_max; - - else if (tbl[i].data =3D=3D &init_ipc_ns.mq_msg_default) - tbl[i].data =3D &ns->mq_msg_default; - - else if (tbl[i].data =3D=3D &init_ipc_ns.mq_msgsize_default) - tbl[i].data =3D &ns->mq_msgsize_default; - else - tbl[i].data =3D NULL; - } - - ns->mq_sysctls =3D __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl= ); - } - if (!ns->mq_sysctls) { - kfree(tbl); - retire_sysctl_set(&ns->mq_set); - return false; - } - - return true; + register_sysctl_table(mq_sysctl_root); + return 0; } =20 -void retire_mq_sysctls(struct ipc_namespace *ns) -{ - struct ctl_table *tbl; - - tbl =3D ns->mq_sysctls->ctl_table_arg; - unregister_sysctl_table(ns->mq_sysctls); - retire_sysctl_set(&ns->mq_set); - kfree(tbl); -} +device_initcall(mq_sysctl_init); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index c0f24cc9f619..ffb79a24d70b 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1711,11 +1711,6 @@ static int __init init_mqueue_fs(void) if (mqueue_inode_cachep =3D=3D NULL) return -ENOMEM; =20 - if (!setup_mq_sysctls(&init_ipc_ns)) { - pr_warn("sysctl registration failed\n"); - return -ENOMEM; - } - error =3D register_filesystem(&mqueue_fs_type); if (error) goto out_sysctl; diff --git a/ipc/namespace.c b/ipc/namespace.c index f760243ca685..ae83f0f2651b 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -59,10 +59,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_n= amespace *user_ns, if (err) goto fail_put; =20 - err =3D -ENOMEM; - if (!setup_mq_sysctls(ns)) - goto fail_put; - sem_init_ns(ns); msg_init_ns(ns); shm_init_ns(ns); @@ -129,8 +125,6 @@ static void free_ipc_ns(struct ipc_namespace *ns) msg_exit_ns(ns); shm_exit_ns(ns); =20 - retire_mq_sysctls(ns); - dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); ns_free_inum(&ns->ns); --=20 2.33.3