[v4] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

[PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Yan Zhu 4 years, 2 months ago

We're moving sysctls out of kernel/sysctl.c as its a mess. We
already moved all filesystem sysctls out. And with time the goal is
to move all sysctls out to their own subsystem/actual user.

kernel/sysctl.c has grown to an insane mess and its easy to run
into conflicts with it. The effort to move them out is part of this.

Signed-off-by: Yan Zhu <zhuyan34@huawei.com>

---
v1->v2:
  1.Added patch branch identifier sysctl-next.
  2.Re-describe the reason for the patch submission.

v2->v3:
  Re-describe the reason for the patch submission.

v3->v4:
  1.Remove '#include <linux/bpf.h>' in kernel/sysctl.c
  2.re-adaptive the patch
---
 kernel/bpf/syscall.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c      | 79 -----------------------------------------------
 2 files changed, 87 insertions(+), 79 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cdaa1152436a..e9621cfa09f2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4908,3 +4908,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
 const struct bpf_prog_ops bpf_syscall_prog_ops = {
 	.test_run = bpf_prog_test_run_syscall,
 };
+
+#ifdef CONFIG_SYSCTL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+			     void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct static_key *key = (struct static_key *)table->data;
+	static int saved_val;
+	int val, ret;
+	struct ctl_table tmp = {
+		.data   = &val,
+		.maxlen = sizeof(val),
+		.mode   = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&bpf_stats_enabled_mutex);
+	val = saved_val;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret && val != saved_val) {
+		if (val)
+			static_key_slow_inc(key);
+		else
+			static_key_slow_dec(key);
+		saved_val = val;
+	}
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return ret;
+}
+
+void __weak unpriv_ebpf_notify(int new_state)
+{
+}
+
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, unpriv_enable = *(int *)table->data;
+	bool locked_state = unpriv_enable == 1;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &unpriv_enable;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (locked_state && unpriv_enable != 1)
+			return -EPERM;
+		*(int *)table->data = unpriv_enable;
+	}
+
+	unpriv_ebpf_notify(unpriv_enable);
+
+	return ret;
+}
+
+static struct ctl_table bpf_syscall_table[] = {
+	{
+		.procname	= "unprivileged_bpf_disabled",
+		.data		= &sysctl_unprivileged_bpf_disabled,
+		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
+		.mode		= 0644,
+		.proc_handler	= bpf_unpriv_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_TWO,
+	},
+	{
+		.procname	= "bpf_stats_enabled",
+		.data		= &bpf_stats_enabled_key.key,
+		.maxlen		= sizeof(bpf_stats_enabled_key),
+		.mode		= 0644,
+		.proc_handler	= bpf_stats_handler,
+	},
+	{ }
+};
+
+static int __init bpf_syscall_sysctl_init(void)
+{
+	register_sysctl_init("kernel", bpf_syscall_table);
+	return 0;
+}
+late_initcall(bpf_syscall_sysctl_init);
+#endif /* CONFIG_SYSCTL */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 21172d3dad6e..c0fdf465a93d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -62,7 +62,6 @@
 #include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
-#include <linux/bpf.h>
 #include <linux/mount.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/latencytop.h>
@@ -139,66 +138,6 @@ static const int max_extfrag_threshold = 1000;
 
 #endif /* CONFIG_SYSCTL */
 
-#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
-static int bpf_stats_handler(struct ctl_table *table, int write,
-			     void *buffer, size_t *lenp, loff_t *ppos)
-{
-	struct static_key *key = (struct static_key *)table->data;
-	static int saved_val;
-	int val, ret;
-	struct ctl_table tmp = {
-		.data   = &val,
-		.maxlen = sizeof(val),
-		.mode   = table->mode,
-		.extra1 = SYSCTL_ZERO,
-		.extra2 = SYSCTL_ONE,
-	};
-
-	if (write && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	mutex_lock(&bpf_stats_enabled_mutex);
-	val = saved_val;
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
-	if (write && !ret && val != saved_val) {
-		if (val)
-			static_key_slow_inc(key);
-		else
-			static_key_slow_dec(key);
-		saved_val = val;
-	}
-	mutex_unlock(&bpf_stats_enabled_mutex);
-	return ret;
-}
-
-void __weak unpriv_ebpf_notify(int new_state)
-{
-}
-
-static int bpf_unpriv_handler(struct ctl_table *table, int write,
-			      void *buffer, size_t *lenp, loff_t *ppos)
-{
-	int ret, unpriv_enable = *(int *)table->data;
-	bool locked_state = unpriv_enable == 1;
-	struct ctl_table tmp = *table;
-
-	if (write && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	tmp.data = &unpriv_enable;
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
-	if (write && !ret) {
-		if (locked_state && unpriv_enable != 1)
-			return -EPERM;
-		*(int *)table->data = unpriv_enable;
-	}
-
-	unpriv_ebpf_notify(unpriv_enable);
-
-	return ret;
-}
-#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
-
 /*
  * /proc/sys support
  */
@@ -2112,24 +2051,6 @@ static struct ctl_table kern_table[] = {
 		.extra2		= SYSCTL_ONE,
 	},
 #endif
-#ifdef CONFIG_BPF_SYSCALL
-	{
-		.procname	= "unprivileged_bpf_disabled",
-		.data		= &sysctl_unprivileged_bpf_disabled,
-		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
-		.mode		= 0644,
-		.proc_handler	= bpf_unpriv_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_TWO,
-	},
-	{
-		.procname	= "bpf_stats_enabled",
-		.data		= &bpf_stats_enabled_key.key,
-		.maxlen		= sizeof(bpf_stats_enabled_key),
-		.mode		= 0644,
-		.proc_handler	= bpf_stats_handler,
-	},
-#endif
 #if defined(CONFIG_TREE_RCU)
 	{
 		.procname	= "panic_on_rcu_stall",
-- 
2.12.3

Re: [PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Daniel Borkmann 4 years, 2 months ago

On 4/7/22 9:07 AM, Yan Zhu wrote:
> We're moving sysctls out of kernel/sysctl.c as its a mess. We
> already moved all filesystem sysctls out. And with time the goal is
> to move all sysctls out to their own subsystem/actual user.
> 
> kernel/sysctl.c has grown to an insane mess and its easy to run
> into conflicts with it. The effort to move them out is part of this.
> 
> Signed-off-by: Yan Zhu <zhuyan34@huawei.com>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

Given the desire is to route this via sysctl-next and we're not shortly
before but after the merge win, could we get a feature branch for bpf-next
to pull from to avoid conflicts with ongoing development cycle?

Thanks,
Daniel

Re: [PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Luis Chamberlain 4 years, 2 months ago

On Wed, Apr 13, 2022 at 04:45:00PM +0200, Daniel Borkmann wrote:
> On 4/7/22 9:07 AM, Yan Zhu wrote:
> > We're moving sysctls out of kernel/sysctl.c as its a mess. We
> > already moved all filesystem sysctls out. And with time the goal is
> > to move all sysctls out to their own subsystem/actual user.
> > 
> > kernel/sysctl.c has grown to an insane mess and its easy to run
> > into conflicts with it. The effort to move them out is part of this.
> > 
> > Signed-off-by: Yan Zhu <zhuyan34@huawei.com>
> 
> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> 
> Given the desire is to route this via sysctl-next and we're not shortly
> before but after the merge win, could we get a feature branch for bpf-next
> to pull from to avoid conflicts with ongoing development cycle?

Sure thing. So I've never done this sort of thing, so forgive me for
being new at it. Would it make sense to merge this change to sysctl-next
as-is today and put a frozen branch sysclt-next-bpf to reflect this,
which bpf-next can merge. And then sysctl-next just continues to chug on
its own? As-is my goal is to keep sysctl-next as immutable as well.

Or is there a better approach you can recommend?

  Luis

Re: [PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Daniel Borkmann 4 years, 2 months ago

On 4/13/22 9:00 PM, Luis Chamberlain wrote:
> On Wed, Apr 13, 2022 at 04:45:00PM +0200, Daniel Borkmann wrote:
>> On 4/7/22 9:07 AM, Yan Zhu wrote:
>>> We're moving sysctls out of kernel/sysctl.c as its a mess. We
>>> already moved all filesystem sysctls out. And with time the goal is
>>> to move all sysctls out to their own subsystem/actual user.
>>>
>>> kernel/sysctl.c has grown to an insane mess and its easy to run
>>> into conflicts with it. The effort to move them out is part of this.
>>>
>>> Signed-off-by: Yan Zhu <zhuyan34@huawei.com>
>>
>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>>
>> Given the desire is to route this via sysctl-next and we're not shortly
>> before but after the merge win, could we get a feature branch for bpf-next
>> to pull from to avoid conflicts with ongoing development cycle?
> 
> Sure thing. So I've never done this sort of thing, so forgive me for
> being new at it. Would it make sense to merge this change to sysctl-next
> as-is today and put a frozen branch sysclt-next-bpf to reflect this,
> which bpf-next can merge. And then sysctl-next just continues to chug on
> its own? As-is my goal is to keep sysctl-next as immutable as well.
> 
> Or is there a better approach you can recommend?

Are you able to merge the pr/bpf-sysctl branch into your sysctl-next tree?

   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/log/?h=pr/bpf-sysctl

This is based off common base for both trees (3123109284176b1532874591f7c81f3837bbdc17)
so should only pull in the single commit then.

Thanks,
Daniel

Re: [PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Luis Chamberlain 4 years, 2 months ago

On Wed, Apr 13, 2022 at 09:40:58PM +0200, Daniel Borkmann wrote:
> On 4/13/22 9:00 PM, Luis Chamberlain wrote:
> > On Wed, Apr 13, 2022 at 04:45:00PM +0200, Daniel Borkmann wrote:
> > > On 4/7/22 9:07 AM, Yan Zhu wrote:
> > > > We're moving sysctls out of kernel/sysctl.c as its a mess. We
> > > > already moved all filesystem sysctls out. And with time the goal is
> > > > to move all sysctls out to their own subsystem/actual user.
> > > > 
> > > > kernel/sysctl.c has grown to an insane mess and its easy to run
> > > > into conflicts with it. The effort to move them out is part of this.
> > > > 
> > > > Signed-off-by: Yan Zhu <zhuyan34@huawei.com>
> > > 
> > > Acked-by: Daniel Borkmann <daniel@iogearbox.net>
> > > 
> > > Given the desire is to route this via sysctl-next and we're not shortly
> > > before but after the merge win, could we get a feature branch for bpf-next
> > > to pull from to avoid conflicts with ongoing development cycle?
> > 
> > Sure thing. So I've never done this sort of thing, so forgive me for
> > being new at it. Would it make sense to merge this change to sysctl-next
> > as-is today and put a frozen branch sysclt-next-bpf to reflect this,
> > which bpf-next can merge. And then sysctl-next just continues to chug on
> > its own? As-is my goal is to keep sysctl-next as immutable as well.
> > 
> > Or is there a better approach you can recommend?
> 
> Are you able to merge the pr/bpf-sysctl branch into your sysctl-next tree?
> 
>   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/log/?h=pr/bpf-sysctl
> 
> This is based off common base for both trees (3123109284176b1532874591f7c81f3837bbdc17)
> so should only pull in the single commit then.

Yup. That worked just fine. I pushed it.

  Luis

Re: [PATCH v4 sysctl-next] bpf: move bpf sysctls from kernel/sysctl.c to bpf module

Posted by Daniel Borkmann 4 years, 2 months ago

On 4/13/22 9:46 PM, Luis Chamberlain wrote:
> On Wed, Apr 13, 2022 at 09:40:58PM +0200, Daniel Borkmann wrote:
>> On 4/13/22 9:00 PM, Luis Chamberlain wrote:
>>> On Wed, Apr 13, 2022 at 04:45:00PM +0200, Daniel Borkmann wrote:
>>>> On 4/7/22 9:07 AM, Yan Zhu wrote:
>>>>> We're moving sysctls out of kernel/sysctl.c as its a mess. We
>>>>> already moved all filesystem sysctls out. And with time the goal is
>>>>> to move all sysctls out to their own subsystem/actual user.
>>>>>
>>>>> kernel/sysctl.c has grown to an insane mess and its easy to run
>>>>> into conflicts with it. The effort to move them out is part of this.
>>>>>
>>>>> Signed-off-by: Yan Zhu <zhuyan34@huawei.com>
>>>>
>>>> Acked-by: Daniel Borkmann <daniel@iogearbox.net>
>>>>
>>>> Given the desire is to route this via sysctl-next and we're not shortly
>>>> before but after the merge win, could we get a feature branch for bpf-next
>>>> to pull from to avoid conflicts with ongoing development cycle?
>>>
>>> Sure thing. So I've never done this sort of thing, so forgive me for
>>> being new at it. Would it make sense to merge this change to sysctl-next
>>> as-is today and put a frozen branch sysclt-next-bpf to reflect this,
>>> which bpf-next can merge. And then sysctl-next just continues to chug on
>>> its own? As-is my goal is to keep sysctl-next as immutable as well.
>>>
>>> Or is there a better approach you can recommend?
>>
>> Are you able to merge the pr/bpf-sysctl branch into your sysctl-next tree?
>>
>>    https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/log/?h=pr/bpf-sysctl
>>
>> This is based off common base for both trees (3123109284176b1532874591f7c81f3837bbdc17)
>> so should only pull in the single commit then.
> 
> Yup. That worked just fine. I pushed it.

Great, thanks!