From nobody Sat Feb 7 08:45:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 37D9FC77B7A for ; Tue, 30 May 2023 19:00:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233057AbjE3TAM convert rfc822-to-8bit (ORCPT ); Tue, 30 May 2023 15:00:12 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43770 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230308AbjE3TAI (ORCPT ); Tue, 30 May 2023 15:00:08 -0400 X-Greylist: delayed 549 seconds by postgrey-1.37 at lindbergh.monkeyblade.net; Tue, 30 May 2023 12:00:02 PDT Received: from mail-b.sr.ht (mail-b.sr.ht [173.195.146.151]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B0DB1F9 for ; Tue, 30 May 2023 12:00:02 -0700 (PDT) Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 6224B11F05E; Tue, 30 May 2023 18:50:50 +0000 (UTC) From: ~akihirosuda Date: Tue, 30 May 2023 23:42:52 +0900 Subject: [PATCH linux 1/3] net/ipv4: split group_range logic to kernel/group_range.c Message-ID: <168547265011.24337.4306067683997517082-1@git.sr.ht> X-Mailer: git.sr.ht Reply-to: ~akihirosuda In-Reply-To: <168547265011.24337.4306067683997517082-0@git.sr.ht> To: linux-kernel@vger.kernel.org, containers@lists.linux.dev, serge@hallyn.com, brauner@kernel.org, paul@paul-moore.com, ebiederm@xmission.com Cc: suda.kyoto@gmail.com, akihiro.suda.cz@hco.ntt.co.jp Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Akihiro Suda The logic can be reused for other sysctls in future. Signed-off-by: Akihiro Suda --- include/linux/group_range.h | 24 ++++++++++ include/net/netns/ipv4.h | 9 +--- include/net/ping.h | 6 --- kernel/Makefile | 2 +- kernel/group_range.c | 91 +++++++++++++++++++++++++++++++++++++ net/ipv4/ping.c | 39 ++-------------- net/ipv4/sysctl_net_ipv4.c | 56 ++--------------------- 7 files changed, 125 insertions(+), 102 deletions(-) create mode 100644 include/linux/group_range.h create mode 100644 kernel/group_range.c diff --git a/include/linux/group_range.h b/include/linux/group_range.h new file mode 100644 index 000000000000..5bd837eced95 --- /dev/null +++ b/include/linux/group_range.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_GROUP_RANGE_H +#define _LINUX_GROUP_RANGE_H + +#include +#include + +/* + * gid_t is either uint or ushort. We want to pass it to + * proc_dointvec_minmax(), so it must not be larger than MAX_INT + */ +#define GROUP_RANGE_MAX (((gid_t)~0U) >> 1) + +struct group_range { + seqlock_t lock; + kgid_t range[2]; +}; + +typedef struct group_range* (*sysctl_group_range_func_t)(struct ctl_table = *); +int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *tab= le, + int write, void *buffer, size_t *lenp, loff_t *ppos); + +bool check_current_group_range(struct group_range *gr); +#endif /* _LINUX_GROUP_RANGE_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index db762e35aca9..75d745a7c6e1 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -6,11 +6,11 @@ #ifndef __NETNS_IPV4_H__ #define __NETNS_IPV4_H__ =20 -#include #include #include #include #include +#include =20 struct ctl_table_header; struct ipv4_devconf; @@ -24,11 +24,6 @@ struct local_ports { bool warned; }; =20 -struct ping_group_range { - seqlock_t lock; - kgid_t range[2]; -}; - struct inet_hashinfo; =20 struct inet_timewait_death_row { @@ -204,7 +199,7 @@ struct netns_ipv4 { int sysctl_igmp_max_msf; int sysctl_igmp_qrv; =20 - struct ping_group_range ping_group_range; + struct group_range ping_group_range; =20 atomic_t dev_addr_genid; =20 diff --git a/include/net/ping.h b/include/net/ping.h index 9233ad3de0ad..37b1d7baeb7b 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -16,12 +16,6 @@ #define PING_HTABLE_SIZE 64 #define PING_HTABLE_MASK (PING_HTABLE_SIZE-1) =20 -/* - * gid_t is either uint or ushort. We want to pass it to - * proc_dointvec_minmax(), so it must not be larger than MAX_INT - */ -#define GID_T_MAX (((gid_t)~0U) >> 1) - /* Compatibility glue so we can support IPv6 when it's compiled as a modul= e */ struct pingv6_ops { int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, diff --git a/kernel/Makefile b/kernel/Makefile index b69c95315480..fb3a812cf92e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y =3D fork.o exec_domain.o panic.o \ extable.o params.o \ kthread.o sys_ni.o nsproxy.o \ notifier.o ksysfs.o cred.o reboot.o \ - async.o range.o smpboot.o ucount.o regset.o + async.o range.o smpboot.o ucount.o regset.o group_range.o =20 obj-$(CONFIG_USERMODE_DRIVER) +=3D usermode_driver.o obj-$(CONFIG_MULTIUSER) +=3D groups.o diff --git a/kernel/group_range.c b/kernel/group_range.c new file mode 100644 index 000000000000..b5c7d35d680b --- /dev/null +++ b/kernel/group_range.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include + +static void get_group_range(struct group_range *gr, kgid_t *low, kgid_t *h= igh) +{ + unsigned int seq; + + do { + seq =3D read_seqbegin(&gr->lock); + + *low =3D gr->range[0]; + *high =3D gr->range[1]; + } while (read_seqretry(&gr->lock, seq)); +} + +static void set_group_range(struct group_range *gr, kgid_t low, kgid_t hig= h) +{ + write_seqlock(&gr->lock); + gr->range[0] =3D low; + gr->range[1] =3D high; + write_sequnlock(&gr->lock); +} + +static int group_range_min[] =3D { 0, 0 }; +static int group_range_max[] =3D { GROUP_RANGE_MAX, GROUP_RANGE_MAX }; + +int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *tab= le, + int write, void *buffer, size_t *lenp, loff_t *ppos) +{ + struct group_range *gr =3D fn(table); + struct user_namespace *user_ns =3D current_user_ns(); + int ret; + gid_t urange[2]; + kgid_t low, high; + struct ctl_table tmp =3D { + .data =3D &urange, + .maxlen =3D sizeof(urange), + .mode =3D table->mode, + .extra1 =3D &group_range_min, + .extra2 =3D &group_range_max, + }; + + get_group_range(gr, &low, &high); + urange[0] =3D from_kgid_munged(user_ns, low); + urange[1] =3D from_kgid_munged(user_ns, high); + ret =3D proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret =3D=3D 0) { + low =3D make_kgid(user_ns, urange[0]); + high =3D make_kgid(user_ns, urange[1]); + if (!gid_valid(low) || !gid_valid(high)) + return -EINVAL; + if (urange[1] < urange[0] || gid_lt(high, low)) { + low =3D make_kgid(&init_user_ns, 1); + high =3D make_kgid(&init_user_ns, 0); + } + set_group_range(gr, low, high); + } + + return ret; +} + +bool check_current_group_range(struct group_range *gr) +{ + kgid_t group =3D current_egid(); + struct group_info *group_info; + int i; + kgid_t low, high; + bool ret =3D true; + + get_group_range(gr, &low, &high); + if (gid_lte(low, group) && gid_lte(group, high)) + return true; + + group_info =3D get_current_groups(); + for (i =3D 0; i < group_info->ngroups; i++) { + kgid_t gid =3D group_info->gid[i]; + + if (gid_lte(low, gid) && gid_lte(gid, high)) + goto out_release_group; + } + ret =3D false; +out_release_group: + put_group_info(group_info); + return ret; +} diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 5178a3f3cb53..6e23771c5234 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -244,50 +244,17 @@ exit: return sk; } =20 -static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, - kgid_t *high) -{ - kgid_t *data =3D net->ipv4.ping_group_range.range; - unsigned int seq; - - do { - seq =3D read_seqbegin(&net->ipv4.ping_group_range.lock); - - *low =3D data[0]; - *high =3D data[1]; - } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); -} - - int ping_init_sock(struct sock *sk) { struct net *net =3D sock_net(sk); - kgid_t group =3D current_egid(); - struct group_info *group_info; - int i; - kgid_t low, high; - int ret =3D 0; =20 if (sk->sk_family =3D=3D AF_INET6) sk->sk_ipv6only =3D 1; =20 - inet_get_ping_group_range_net(net, &low, &high); - if (gid_lte(low, group) && gid_lte(group, high)) - return 0; - - group_info =3D get_current_groups(); - for (i =3D 0; i < group_info->ngroups; i++) { - kgid_t gid =3D group_info->gid[i]; + if (!check_current_group_range(&net->ipv4.ping_group_range)) + return -EACCES; =20 - if (gid_lte(low, gid) && gid_lte(gid, high)) - goto out_release_group; - } - - ret =3D -EACCES; - -out_release_group: - put_group_info(group_info); - return ret; + return 0; } EXPORT_SYMBOL_GPL(ping_init_sock); =20 diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 40fe70fc2015..ad355ab265db 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -34,8 +34,6 @@ static int ip_ttl_min =3D 1; static int ip_ttl_max =3D 255; static int tcp_syn_retries_min =3D 1; static int tcp_syn_retries_max =3D MAX_TCP_SYNCNT; -static int ip_ping_group_range_min[] =3D { 0, 0 }; -static int ip_ping_group_range_max[] =3D { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ =3D UINT_MAX / HZ; static int one_day_secs =3D 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused =3D @@ -133,66 +131,20 @@ static int ipv4_privileged_ports(struct ctl_table *ta= ble, int write, return ret; } =20 -static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_= t *low, kgid_t *high) +static struct group_range *ipv4_ping_group_range_func(struct ctl_table *ta= ble) { - kgid_t *data =3D table->data; struct net *net =3D container_of(table->data, struct net, ipv4.ping_group_range.range); - unsigned int seq; - do { - seq =3D read_seqbegin(&net->ipv4.ping_group_range.lock); =20 - *low =3D data[0]; - *high =3D data[1]; - } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); -} - -/* Update system visible IP port range */ -static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid= _t high) -{ - kgid_t *data =3D table->data; - struct net *net =3D - container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ping_group_range.lock); - data[0] =3D low; - data[1] =3D high; - write_sequnlock(&net->ipv4.ping_group_range.lock); + return &net->ipv4.ping_group_range; } =20 /* Validate changes from /proc interface. */ static int ipv4_ping_group_range(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - struct user_namespace *user_ns =3D current_user_ns(); - int ret; - gid_t urange[2]; - kgid_t low, high; - struct ctl_table tmp =3D { - .data =3D &urange, - .maxlen =3D sizeof(urange), - .mode =3D table->mode, - .extra1 =3D &ip_ping_group_range_min, - .extra2 =3D &ip_ping_group_range_max, - }; - - inet_get_ping_group_range_table(table, &low, &high); - urange[0] =3D from_kgid_munged(user_ns, low); - urange[1] =3D from_kgid_munged(user_ns, high); - ret =3D proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); - - if (write && ret =3D=3D 0) { - low =3D make_kgid(user_ns, urange[0]); - high =3D make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high)) - return -EINVAL; - if (urange[1] < urange[0] || gid_lt(high, low)) { - low =3D make_kgid(&init_user_ns, 1); - high =3D make_kgid(&init_user_ns, 0); - } - set_ping_group_range(table, low, high); - } - - return ret; + return sysctl_group_range(ipv4_ping_group_range_func, table, + write, buffer, lenp, ppos); } =20 static int ipv4_fwd_update_priority(struct ctl_table *table, int write, --=20 2.38.4 From nobody Sat Feb 7 08:45:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3B5C8C7EE24 for ; Tue, 30 May 2023 19:00:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233233AbjE3TAR convert rfc822-to-8bit (ORCPT ); Tue, 30 May 2023 15:00:17 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43786 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233036AbjE3TAL (ORCPT ); Tue, 30 May 2023 15:00:11 -0400 Received: from mail-b.sr.ht (mail-b.sr.ht [173.195.146.151]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D2FB2107 for ; Tue, 30 May 2023 12:00:02 -0700 (PDT) Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id 8CE6311F060; Tue, 30 May 2023 18:50:50 +0000 (UTC) From: ~akihirosuda Date: Wed, 31 May 2023 02:31:11 +0900 Subject: [PATCH linux 2/3] group_range: allow GID from 2147483648 to 4294967294 Message-ID: <168547265011.24337.4306067683997517082-2@git.sr.ht> X-Mailer: git.sr.ht Reply-to: ~akihirosuda In-Reply-To: <168547265011.24337.4306067683997517082-0@git.sr.ht> To: linux-kernel@vger.kernel.org, containers@lists.linux.dev, serge@hallyn.com, brauner@kernel.org, paul@paul-moore.com, ebiederm@xmission.com Cc: suda.kyoto@gmail.com, akihiro.suda.cz@hco.ntt.co.jp Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Akihiro Suda proc_dointvec_minmax is no longer used because it does not support GID from 2147483648 to 4294967294. proc_douintvec is not used either, because it does not support vectors, despite its function name. Signed-off-by: Akihiro Suda --- include/linux/group_range.h | 6 ----- kernel/group_range.c | 52 ++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/include/linux/group_range.h b/include/linux/group_range.h index 5bd837eced95..8f71dc956693 100644 --- a/include/linux/group_range.h +++ b/include/linux/group_range.h @@ -5,12 +5,6 @@ #include #include =20 -/* - * gid_t is either uint or ushort. We want to pass it to - * proc_dointvec_minmax(), so it must not be larger than MAX_INT - */ -#define GROUP_RANGE_MAX (((gid_t)~0U) >> 1) - struct group_range { seqlock_t lock; kgid_t range[2]; diff --git a/kernel/group_range.c b/kernel/group_range.c index b5c7d35d680b..13db83b77832 100644 --- a/kernel/group_range.c +++ b/kernel/group_range.c @@ -4,6 +4,7 @@ #include #include #include +#include #include =20 static void get_group_range(struct group_range *gr, kgid_t *low, kgid_t *h= igh) @@ -26,9 +27,6 @@ static void set_group_range(struct group_range *gr, kgid_= t low, kgid_t high) write_sequnlock(&gr->lock); } =20 -static int group_range_min[] =3D { 0, 0 }; -static int group_range_max[] =3D { GROUP_RANGE_MAX, GROUP_RANGE_MAX }; - int sysctl_group_range(sysctl_group_range_func_t fn, struct ctl_table *tab= le, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -37,24 +35,56 @@ int sysctl_group_range(sysctl_group_range_func_t fn, st= ruct ctl_table *table, int ret; gid_t urange[2]; kgid_t low, high; + size_t slen =3D 256; /* total bytes including '\0' */ + char *s =3D kmalloc(slen, GFP_KERNEL); /* clobbered by strsep */ struct ctl_table tmp =3D { - .data =3D &urange, - .maxlen =3D sizeof(urange), + .data =3D s, + .maxlen =3D slen, .mode =3D table->mode, - .extra1 =3D &group_range_min, - .extra2 =3D &group_range_max, }; =20 + if (unlikely(!s)) + return -ENOMEM; + + /* + * proc_dointvec_minmax is no longer used because it does not support + * GID from 2147483648 to 4294967294. + * + * proc_douintvec is not used either, because it does not support + * vectors, despite its function name. + */ get_group_range(gr, &low, &high); urange[0] =3D from_kgid_munged(user_ns, low); urange[1] =3D from_kgid_munged(user_ns, high); - ret =3D proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + ret =3D snprintf(tmp.data, slen, "%u\t%u", urange[0], urange[1]); + if (ret < 0) + goto done; + ret =3D proc_dostring(&tmp, write, buffer, lenp, ppos); + if (*lenp >=3D slen - 1) /* truncated */ + ret =3D -EINVAL; =20 if (write && ret =3D=3D 0) { + char *tok[2]; + int i; + + s =3D strim(s); + tok[0] =3D strsep(&s, " \t"); + tok[1] =3D s; + for (i =3D 0; i < 2; i++) { + if (!tok[i]) { + ret =3D -EINVAL; + goto done; + } + ret =3D kstrtouint(tok[i], 0, &urange[i]); + if (ret < 0) + goto done; + } low =3D make_kgid(user_ns, urange[0]); high =3D make_kgid(user_ns, urange[1]); - if (!gid_valid(low) || !gid_valid(high)) - return -EINVAL; + if (!gid_valid(low) || !gid_valid(high)) { + ret =3D -EINVAL; + goto done; + } if (urange[1] < urange[0] || gid_lt(high, low)) { low =3D make_kgid(&init_user_ns, 1); high =3D make_kgid(&init_user_ns, 0); @@ -62,6 +92,8 @@ int sysctl_group_range(sysctl_group_range_func_t fn, stru= ct ctl_table *table, set_group_range(gr, low, high); } =20 +done: + kfree(tmp.data); return ret; } =20 --=20 2.38.4 From nobody Sat Feb 7 08:45:33 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5DC3EC77B73 for ; Tue, 30 May 2023 19:00:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S233157AbjE3TAP convert rfc822-to-8bit (ORCPT ); Tue, 30 May 2023 15:00:15 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:43784 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230183AbjE3TAK (ORCPT ); Tue, 30 May 2023 15:00:10 -0400 Received: from mail-b.sr.ht (mail-b.sr.ht [173.195.146.151]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BC227106 for ; Tue, 30 May 2023 12:00:02 -0700 (PDT) Authentication-Results: mail-b.sr.ht; dkim=none Received: from git.sr.ht (unknown [173.195.146.142]) by mail-b.sr.ht (Postfix) with ESMTPSA id B8D4811F074; Tue, 30 May 2023 18:50:50 +0000 (UTC) From: ~akihirosuda Date: Tue, 30 May 2023 20:34:33 +0900 Subject: [PATCH linux 3/3] userns: add sysctl "kernel.userns_group_range" Message-ID: <168547265011.24337.4306067683997517082-3@git.sr.ht> X-Mailer: git.sr.ht Reply-to: ~akihirosuda In-Reply-To: <168547265011.24337.4306067683997517082-0@git.sr.ht> To: linux-kernel@vger.kernel.org, containers@lists.linux.dev, serge@hallyn.com, brauner@kernel.org, paul@paul-moore.com, ebiederm@xmission.com Cc: suda.kyoto@gmail.com, akihiro.suda.cz@hco.ntt.co.jp Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Akihiro Suda This sysctl limits groups who can create a new userns without CAP_SYS_ADMIN in the current userns, so as to mitigate potential kernel vulnerabilities around userns. The sysctl value format is same as "net.ipv4.ping_group_range". To disable creating new unprivileged userns, set the sysctl value to "1 0" in the initial userns. To allow everyone to create new userns, set the sysctl value to "0 4294967294". This is the default value. This sysctl replaces "kernel.unprivileged_userns_clone" that is found in Ubuntu [1] and Debian GNU/Linux. Link: https://git.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/ja= mmy/commit?id=3D3422764 [1] Signed-off-by: Akihiro Suda --- include/linux/user_namespace.h | 5 +++++ kernel/fork.c | 24 ++++++++++++++++++++++++ kernel/sysctl.c | 30 ++++++++++++++++++++++++++++++ kernel/user.c | 9 +++++++++ 4 files changed, 68 insertions(+) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 45f09bec02c4..b8b5a982f818 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -11,6 +11,10 @@ #include #include =20 +#ifdef CONFIG_SYSCTL +#include +#endif + #define UID_GID_MAP_MAX_BASE_EXTENTS 5 #define UID_GID_MAP_MAX_EXTENTS 340 =20 @@ -98,6 +102,7 @@ struct user_namespace { #ifdef CONFIG_SYSCTL struct ctl_table_set set; struct ctl_table_header *sysctls; + struct group_range group_range; #endif struct ucounts *ucounts; long ucount_max[UCOUNT_COUNTS]; diff --git a/kernel/fork.c b/kernel/fork.c index ed4e01daccaa..1e8debdf0896 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -111,6 +111,10 @@ #define CREATE_TRACE_POINTS #include =20 +#ifdef CONFIG_USER_NS +#include +#endif + /* * Minimum number of threads to boot the kernel */ @@ -2235,6 +2239,16 @@ static void rv_task_fork(struct task_struct *p) #define rv_task_fork(p) do {} while (0) #endif =20 +#ifdef CONFIG_USER_NS +static bool userns_clone_is_allowed(void) +{ + if (capable(CAP_SYS_ADMIN)) + return true; + + return check_current_group_range(¤t_user_ns()->group_range); +} +#endif + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2266,6 +2280,11 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) =3D=3D (CLONE_NEWUSER|CLONE_= FS)) return ERR_PTR(-EINVAL); =20 +#ifdef CONFIG_USER_NS + if ((clone_flags & CLONE_NEWUSER) && !userns_clone_is_allowed()) + return ERR_PTR(-EPERM); +#endif + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -3340,6 +3359,11 @@ static int check_unshare_flags(unsigned long unshare= _flags) return -EINVAL; } =20 +#ifdef CONFIG_USER_NS + if ((unshare_flags & CLONE_NEWUSER) && !userns_clone_is_allowed()) + return -EPERM; +#endif + return 0; } =20 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bfe53e835524..ace7bf0fe9fc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -80,6 +80,9 @@ #ifdef CONFIG_RT_MUTEXES #include #endif +#ifdef CONFIG_USER_NS +#include +#endif =20 /* shared constants to be used in various sysctls */ const int sysctl_vals[] =3D { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX= , 65535, -1 }; @@ -1615,6 +1618,24 @@ int proc_do_static_key(struct ctl_table *table, int = write, return ret; } =20 +#ifdef CONFIG_USER_NS +static struct group_range *userns_group_range_func(struct ctl_table *table) +{ + struct user_namespace *user_ns =3D + container_of(table->data, struct user_namespace, group_range.range); + + return &user_ns->group_range; +} + +/* Validate changes from /proc interface. */ +static int userns_group_range(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return sysctl_group_range(userns_group_range_func, table, + write, buffer, lenp, ppos); +} +#endif + static struct ctl_table kern_table[] =3D { { .procname =3D "panic", @@ -1623,6 +1644,15 @@ static struct ctl_table kern_table[] =3D { .mode =3D 0644, .proc_handler =3D proc_dointvec, }, +#ifdef CONFIG_USER_NS + { + .procname =3D "userns_group_range", + .data =3D &init_user_ns.group_range.range, + .maxlen =3D sizeof(init_user_ns.group_range.range), + .mode =3D 0644, + .proc_handler =3D userns_group_range, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname =3D "tainted", diff --git a/kernel/user.c b/kernel/user.c index d667debeafd6..4704c93f62f9 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,6 +20,10 @@ #include #include =20 +#ifdef CONFIG_SYSCTL +#include +#endif + /* * userns count is 1 for root user, 1 for init_uts_ns, * and 1 for... ? @@ -67,6 +71,11 @@ struct user_namespace init_user_ns =3D { .keyring_name_list =3D LIST_HEAD_INIT(init_user_ns.keyring_name_list), .keyring_sem =3D __RWSEM_INITIALIZER(init_user_ns.keyring_sem), #endif +#ifdef CONFIG_SYSCTL + .group_range =3D { + .range =3D {0, ((gid_t)~0U) - 1}, + }, +#endif }; EXPORT_SYMBOL_GPL(init_user_ns); =20 --=20 2.38.4