[PATCH 1/3] Wire up the lsm_manage_policy syscall

Maxime Bélair posted 3 patches 7 months, 2 weeks ago
[PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Maxime Bélair 7 months, 2 weeks ago
Add support for the new lsm_manage_policy syscall, providing a unified
API for loading and modifying LSM policies without requiring the LSM’s
pseudo-filesystem.

Benefits:
  - Works even if the LSM pseudo-filesystem isn’t mounted or available
    (e.g. in containers)
  - Offers a logical and unified interface rather than multiple
    heterogeneous pseudo-filesystems.
  - Avoids overhead of other kernel interfaces for better efficiency

Signed-off-by: Maxime Bélair <maxime.belair@canonical.com>
---
 arch/alpha/kernel/syscalls/syscall.tbl            | 1 +
 arch/arm/tools/syscall.tbl                        | 1 +
 arch/x86/entry/syscalls/syscall_32.tbl            | 1 +
 arch/x86/entry/syscalls/syscall_64.tbl            | 1 +
 include/linux/syscalls.h                          | 4 ++++
 include/uapi/asm-generic/unistd.h                 | 4 +++-
 kernel/sys_ni.c                                   | 1 +
 security/lsm_syscalls.c                           | 6 ++++++
 tools/include/uapi/asm-generic/unistd.h           | 4 +++-
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 +
 10 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 2dd6340de6b4..dfe6cd43c584 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -507,3 +507,4 @@
 575	common	listxattrat			sys_listxattrat
 576	common	removexattrat			sys_removexattrat
 577	common	open_tree_attr			sys_open_tree_attr
+578	common	lsm_manage_policy		sys_lsm_manage_policy
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 27c1d5ebcd91..60abcb3a8a1b 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -482,3 +482,4 @@
 465	common	listxattrat			sys_listxattrat
 466	common	removexattrat			sys_removexattrat
 467	common	open_tree_attr			sys_open_tree_attr
+468	common	lsm_manage_policy		sys_lsm_manage_policy
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ac007ea00979..bb91a929757a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -473,3 +473,4 @@
 465	i386	listxattrat		sys_listxattrat
 466	i386	removexattrat		sys_removexattrat
 467	i386	open_tree_attr		sys_open_tree_attr
+468	i386	lsm_manage_policy	sys_lsm_manage_policy
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..83819d4a5c8a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -391,6 +391,7 @@
 465	common	listxattrat		sys_listxattrat
 466	common	removexattrat		sys_removexattrat
 467	common	open_tree_attr		sys_open_tree_attr
+468	common	lsm_manage_policy	sys_lsm_manage_policy
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e5603cc91963..f52a0678b1d0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -989,6 +989,10 @@ asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *
 				      u32 size, u32 flags);
 asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags);
 
+asmlinkage long sys_lsm_manage_policy(u32 lsm_id, u32 op, void __user *buf,
+		u32 __user *size, u32 flags);
+
+
 /*
  * Architecture-specific system calls
  */
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2892a45023af..b94369baded8 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -851,9 +851,11 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat)
 __SYSCALL(__NR_removexattrat, sys_removexattrat)
 #define __NR_open_tree_attr 467
 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+#define __NR_lsm_manage_policy 468
+__SYSCALL(__NR_lsm_manage_policy, lsm_manage_policy)
 
 #undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 469
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c00a86931f8c..e556b07d8716 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -172,6 +172,7 @@ COND_SYSCALL_COMPAT(fadvise64_64);
 COND_SYSCALL(lsm_get_self_attr);
 COND_SYSCALL(lsm_set_self_attr);
 COND_SYSCALL(lsm_list_modules);
+COND_SYSCALL(lsm_manage_policy);
 
 /* CONFIG_MMU only */
 COND_SYSCALL(swapon);
diff --git a/security/lsm_syscalls.c b/security/lsm_syscalls.c
index 8440948a690c..dcaad8818679 100644
--- a/security/lsm_syscalls.c
+++ b/security/lsm_syscalls.c
@@ -118,3 +118,9 @@ SYSCALL_DEFINE3(lsm_list_modules, u64 __user *, ids, u32 __user *, size,
 
 	return lsm_active_cnt;
 }
+
+SYSCALL_DEFINE5(lsm_manage_policy, u32, lsm_id, u32, op, void __user *, buf, u32
+		__user *, size, u32, flags)
+{
+	return 0;
+}
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2892a45023af..b94369baded8 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -851,9 +851,11 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat)
 __SYSCALL(__NR_removexattrat, sys_removexattrat)
 #define __NR_open_tree_attr 467
 __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+#define __NR_lsm_manage_policy 468
+__SYSCALL(__NR_lsm_manage_policy, lsm_manage_policy)
 
 #undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 469
 
 /*
  * 32 bit systems traditionally used different
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..83819d4a5c8a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -391,6 +391,7 @@
 465	common	listxattrat		sys_listxattrat
 466	common	removexattrat		sys_removexattrat
 467	common	open_tree_attr		sys_open_tree_attr
+468	common	lsm_manage_policy	sys_lsm_manage_policy
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
-- 
2.48.1

Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by kernel test robot 7 months, 1 week ago
Hi Maxime,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 9c32cda43eb78f78c73aee4aa344b777714e259b]

url:    https://github.com/intel-lab-lkp/linux/commits/Maxime-B-lair/Wire-up-the-lsm_manage_policy-syscall/20250506-224212
base:   9c32cda43eb78f78c73aee4aa344b777714e259b
patch link:    https://lore.kernel.org/r/20250506143254.718647-2-maxime.belair%40canonical.com
patch subject: [PATCH 1/3] Wire up the lsm_manage_policy syscall
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20250507/202505072131.ogtsaLPI-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project f819f46284f2a79790038e1f6649172789734ae8)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250507/202505072131.ogtsaLPI-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505072131.ogtsaLPI-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> <stdin>:1618:2: warning: syscall lsm_manage_policy not implemented [-W#warnings]
    1618 | #warning syscall lsm_manage_policy not implemented
         |  ^
   1 warning generated.
--
>> <stdin>:1618:2: warning: syscall lsm_manage_policy not implemented [-W#warnings]
    1618 | #warning syscall lsm_manage_policy not implemented
         |  ^
   1 warning generated.

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Song Liu 7 months, 1 week ago
On Tue, May 6, 2025 at 7:40 AM Maxime Bélair
<maxime.belair@canonical.com> wrote:
>
> Add support for the new lsm_manage_policy syscall, providing a unified
> API for loading and modifying LSM policies without requiring the LSM’s
> pseudo-filesystem.
>
> Benefits:
>   - Works even if the LSM pseudo-filesystem isn’t mounted or available
>     (e.g. in containers)
>   - Offers a logical and unified interface rather than multiple
>     heterogeneous pseudo-filesystems.

These two do not feel like real benefits:
- Not working in containers is often not an issue, but a feature.
- One syscall cannot fit all use cases well...

>   - Avoids overhead of other kernel interfaces for better efficiency

.. and it is is probably less efficient, because everything need to
fit in the same API.

Overall, this set doesn't feel like a good change to me.

Thanks,
Song
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months, 1 week ago
On 5/6/25 23:26, Song Liu wrote:
> On Tue, May 6, 2025 at 7:40 AM Maxime Bélair
> <maxime.belair@canonical.com> wrote:
>>
>> Add support for the new lsm_manage_policy syscall, providing a unified
>> API for loading and modifying LSM policies without requiring the LSM’s
>> pseudo-filesystem.
>>
>> Benefits:
>>    - Works even if the LSM pseudo-filesystem isn’t mounted or available
>>      (e.g. in containers)
>>    - Offers a logical and unified interface rather than multiple
>>      heterogeneous pseudo-filesystems.
> 
> These two do not feel like real benefits:
> - Not working in containers is often not an issue, but a feature.

and the LSM doesn't have to allow the syscall to function in a container
where appropriate. Its up to the LSM if the syscall is supported and
what kind of permissions are needed.

However having the ability to function in a container and not having to
mount securityfs, or procfs into a container. similar to what landlock
gets with its syscall can be beneficial.

> - One syscall cannot fit all use cases well...
> 
of course not, and for those other use cases new syscalls can be added.

>>    - Avoids overhead of other kernel interfaces for better efficiency
> 
> .. and it is is probably less efficient, because everything need to
> fit in the same API.
> 
no not everything, just what fits into the syscall. Nor does an LSM
have to use the syscall it is still use what works for it.

This could be a little more efficient than the current fs interface
used by apparmor/selinux/smack but I don't think efficiency is going
to be a huge win for this.


> Overall, this set doesn't feel like a good change to me.
> 
> Thanks,
> Song

Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Maxime Bélair 7 months, 1 week ago

On 5/7/25 08:26, Song Liu wrote:
> On Tue, May 6, 2025 at 7:40 AM Maxime Bélair
> <maxime.belair@canonical.com> wrote:
>>
>> Add support for the new lsm_manage_policy syscall, providing a unified
>> API for loading and modifying LSM policies without requiring the LSM’s
>> pseudo-filesystem.
>>
>> Benefits:
>>   - Works even if the LSM pseudo-filesystem isn’t mounted or available
>>     (e.g. in containers)
>>   - Offers a logical and unified interface rather than multiple
>>     heterogeneous pseudo-filesystems.
> 
> These two do not feel like real benefits:
> - One syscall cannot fit all use cases well...

This syscall is not intended to cover every case, nor to replace existing kernel
interfaces.

Each LSM can decide which operations it wants to support (if any). For example, when
loading policies, an LSM may choose to allow only policies that further restrict
privileges.

> - Not working in containers is often not an issue, but a feature.

Indeed, using this syscall requires appropriate capabilities and will not permit
unprivileged containers to manage policies arbitrarily.

With this syscall, capability checks remain the responsibility of each LSM.

For instance, in the AppArmor patch, a profile can be loaded only if
aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design,
policies can be loaded only in the current namespace.

I see this syscall as a middle point between exposing the entire sysfs, creating a large
attack surface, and blocking everything.

Landlock’s existing syscalls already improve security by allowing processes to further
restrict their ambient rights while adding only a modest attack surface.

This syscall is a further step in that direction: it lets LSMs add restrictive policies 
without requiring exposing every other interface.

Again, each module decides which operations to expose through this syscall. In many cases
the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
that choose this interface remain secure while gaining its advantages.

>>   - Avoids overhead of other kernel interfaces for better efficiency
> 
> .. and it is is probably less efficient, because everything need to
> fit in the same API.

As shown below, the syscall can significantly improve the performance of policy management.
A more detailed benchmark is available in [1].

The following table presents the time required to load an AppArmor profile.

For every cell, the first value is the total time taken by aa-load, and the value in
parentheses is the time spent to load the policy in the kernel only (total - dry‑run).

Results are in microseconds and are averaged over 10 000 runs to reduce variance. 


| t (µs)    | syscall     | pseudofs    | Speedup       |
|-----------|-------------|-------------|---------------|
| 1password | 4257 (1127) | 3333 (192)  | x1.28 (x5.86) |
| Xorg      | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) |

If an LSM wants to allow several operations for a single LSM_POLICY_XXX it can multiplex a sub‑opcode in flags, and select the appropriate handler, this incurs negligible overhead.

Thanks,

Maxime

[1] https://gitlab.com/-/snippets/4840792
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Song Liu 7 months, 1 week ago
On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
<maxime.belair@canonical.com> wrote:
[...]
> >
> > These two do not feel like real benefits:
> > - One syscall cannot fit all use cases well...
>
> This syscall is not intended to cover every case, nor to replace existing kernel
> interfaces.
>
> Each LSM can decide which operations it wants to support (if any). For example, when
> loading policies, an LSM may choose to allow only policies that further restrict
> privileges.
>
> > - Not working in containers is often not an issue, but a feature.
>
> Indeed, using this syscall requires appropriate capabilities and will not permit
> unprivileged containers to manage policies arbitrarily.
>
> With this syscall, capability checks remain the responsibility of each LSM.
>
> For instance, in the AppArmor patch, a profile can be loaded only if
> aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design,
> policies can be loaded only in the current namespace.
>
> I see this syscall as a middle point between exposing the entire sysfs, creating a large
> attack surface, and blocking everything.
>
> Landlock’s existing syscalls already improve security by allowing processes to further
> restrict their ambient rights while adding only a modest attack surface.
>
> This syscall is a further step in that direction: it lets LSMs add restrictive policies
> without requiring exposing every other interface.

I don't think a syscall makes the API more secure. If necessary, we can add
permission check to each pseudo file. The downside of the syscall, however,
is that all the permission checks are hard-coded in the kernel (except for
BPF LSM); while the sys admin can configure permissions of the pseudo
files in user space.

> Again, each module decides which operations to expose through this syscall. In many cases
> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
> that choose this interface remain secure while gaining its advantages.
>
> >>   - Avoids overhead of other kernel interfaces for better efficiency
> >
> > .. and it is is probably less efficient, because everything need to
> > fit in the same API.
>
> As shown below, the syscall can significantly improve the performance of policy management.
> A more detailed benchmark is available in [1].
>
> The following table presents the time required to load an AppArmor profile.
>
> For every cell, the first value is the total time taken by aa-load, and the value in
> parentheses is the time spent to load the policy in the kernel only (total - dry‑run).
>
> Results are in microseconds and are averaged over 10 000 runs to reduce variance.
>
>
> | t (µs)    | syscall     | pseudofs    | Speedup       |
> |-----------|-------------|-------------|---------------|
> | 1password | 4257 (1127) | 3333 (192)  | x1.28 (x5.86) |
> | Xorg      | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) |
>

I am not sure the performance of loading security policies is on any
critical path.
The implementation calls the hook for each LSM, which is why I think the
syscall is not efficient.

Overall, I am still not convinced a syscall for all LSMs is needed. To
justify such
a syscall, I think we need to show that it is useful in multiple LSMs.
Also, if we
really want to have single set of APIs for all LSMs, we may also need
get_policy,
remove_policy, etc. This set as-is appears to be an incomplete design. The
implementation, with call_int_hook, is also problematic. It can easily
cause some
controversial behaviors.

Thanks,
Song
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months, 1 week ago
On 5/7/25 23:06, Song Liu wrote:
> On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
> <maxime.belair@canonical.com> wrote:
> [...]
>>>
>>> These two do not feel like real benefits:
>>> - One syscall cannot fit all use cases well...
>>
>> This syscall is not intended to cover every case, nor to replace existing kernel
>> interfaces.
>>
>> Each LSM can decide which operations it wants to support (if any). For example, when
>> loading policies, an LSM may choose to allow only policies that further restrict
>> privileges.
>>
>>> - Not working in containers is often not an issue, but a feature.
>>
>> Indeed, using this syscall requires appropriate capabilities and will not permit
>> unprivileged containers to manage policies arbitrarily.
>>
>> With this syscall, capability checks remain the responsibility of each LSM.
>>
>> For instance, in the AppArmor patch, a profile can be loaded only if
>> aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design,
>> policies can be loaded only in the current namespace.
>>
>> I see this syscall as a middle point between exposing the entire sysfs, creating a large
>> attack surface, and blocking everything.
>>
>> Landlock’s existing syscalls already improve security by allowing processes to further
>> restrict their ambient rights while adding only a modest attack surface.
>>
>> This syscall is a further step in that direction: it lets LSMs add restrictive policies
>> without requiring exposing every other interface.
> 
> I don't think a syscall makes the API more secure. If necessary, we can add

It exposes a different attack surface. Requiring mounting of the fs to where it is visible
in the container, provides attack surface, and requires additional external configuration.

Then there is the whole issue of getting the various LSMs to allow another LSM in the
stack to be able manage its own policy.

> permission check to each pseudo file. The downside of the syscall, however,
> is that all the permission checks are hard-coded in the kernel (except for

The permission checks don't have to be hard coded. Each LSM can define how it handles
or manages the syscall. The default is that it isn't supported, but if an lsm decides
to support it, there is now reason that its policy can't determine the use of the
syscall.

> BPF LSM); while the sys admin can configure permissions of the pseudo
> files in user space.
> 
Other LSMs also have policy that can control access to pseudo filesystems and
other resources. Again, the control doesn't have to be hard coded. And seccomp can
be used to block the syscall.



>> Again, each module decides which operations to expose through this syscall. In many cases
>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
>> that choose this interface remain secure while gaining its advantages.
>>
>>>>    - Avoids overhead of other kernel interfaces for better efficiency
>>>
>>> .. and it is is probably less efficient, because everything need to
>>> fit in the same API.
>>
>> As shown below, the syscall can significantly improve the performance of policy management.
>> A more detailed benchmark is available in [1].
>>
>> The following table presents the time required to load an AppArmor profile.
>>
>> For every cell, the first value is the total time taken by aa-load, and the value in
>> parentheses is the time spent to load the policy in the kernel only (total - dry‑run).
>>
>> Results are in microseconds and are averaged over 10 000 runs to reduce variance.
>>
>>
>> | t (µs)    | syscall     | pseudofs    | Speedup       |
>> |-----------|-------------|-------------|---------------|
>> | 1password | 4257 (1127) | 3333 (192)  | x1.28 (x5.86) |
>> | Xorg      | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) |
>>
> 
> I am not sure the performance of loading security policies is on any
> critical path.

generally speaking I agree, but I am also not going to turn down a
performance improvement either. Its a nice to have, but not a strong
argument for need.

> The implementation calls the hook for each LSM, which is why I think the
> syscall is not efficient.
> 
it should only call the LSM identified by the lsmid in the call.

> Overall, I am still not convinced a syscall for all LSMs is needed. To
> justify such

its not needed by all LSMs, just a subset of them, and some nebulous
subset of potentially future LSMs that is entirely undefinable.

If we had had appropriate LSM syscalls landlock wouldn't have needed
to have landlock specific syscalls. Having another LSM go that route
feels wrong especially now that we have some LSM syscalls. If a
syscall is needed by an LSM its better to try hashing something out
that might have utility for multiple LSMs or at the very least,
potentially have utility in the future.


> a syscall, I think we need to show that it is useful in multiple LSMs.
> Also, if we
> really want to have single set of APIs for all LSMs, we may also need
> get_policy,

We are never going to get a single set of APIs for all LSMs. I will
settle for an api that has utility for a subset

> remove_policy, etc. This set as-is appears to be an incomplete design. The

To have a complete design, there needs to be feedback and discussion
from multiple LSMs. This is a starting point.

> implementation, with call_int_hook, is also problematic. It can easily
> cause some> controversial behaviors.
> 
agreed it shouldn't be doing a straight call_int_hook, it should only
call it against the lsm identified by the lsmid

Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Mickaël Salaün 7 months, 1 week ago
On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote:
> On 5/7/25 23:06, Song Liu wrote:
> > On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
> > <maxime.belair@canonical.com> wrote:
> > [...]
> > > > 
> > > > These two do not feel like real benefits:
> > > > - One syscall cannot fit all use cases well...
> > > 
> > > This syscall is not intended to cover every case, nor to replace existing kernel
> > > interfaces.
> > > 
> > > Each LSM can decide which operations it wants to support (if any). For example, when
> > > loading policies, an LSM may choose to allow only policies that further restrict
> > > privileges.
> > > 
> > > > - Not working in containers is often not an issue, but a feature.
> > > 
> > > Indeed, using this syscall requires appropriate capabilities and will not permit
> > > unprivileged containers to manage policies arbitrarily.
> > > 
> > > With this syscall, capability checks remain the responsibility of each LSM.
> > > 
> > > For instance, in the AppArmor patch, a profile can be loaded only if
> > > aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design,
> > > policies can be loaded only in the current namespace.
> > > 
> > > I see this syscall as a middle point between exposing the entire sysfs, creating a large
> > > attack surface, and blocking everything.
> > > 
> > > Landlock’s existing syscalls already improve security by allowing processes to further
> > > restrict their ambient rights while adding only a modest attack surface.
> > > 
> > > This syscall is a further step in that direction: it lets LSMs add restrictive policies
> > > without requiring exposing every other interface.
> > 
> > I don't think a syscall makes the API more secure. If necessary, we can add
> 
> It exposes a different attack surface. Requiring mounting of the fs to where it is visible
> in the container, provides attack surface, and requires additional external configuration.

We should also keep in mind that syscalls could be accessible from
everywhere, by everyone, which may increase the attack surface compared
to a privileged filesystem interface.  Adding a second interface may
also introduce issues.  Anyway, I'm definitely not against syscalls, but
I don't see why the filesystem interface would be "less secure" in this
context.

> 
> Then there is the whole issue of getting the various LSMs to allow another LSM in the
> stack to be able manage its own policy.

Right, and it's a similar issue with seccomp policies wrt syscalls.

> 
> > permission check to each pseudo file. The downside of the syscall, however,
> > is that all the permission checks are hard-coded in the kernel (except for
> 
> The permission checks don't have to be hard coded. Each LSM can define how it handles
> or manages the syscall. The default is that it isn't supported, but if an lsm decides
> to support it, there is now reason that its policy can't determine the use of the
> syscall.

From an interface design point of view, it would be better to clearly
specify the scope of a command (e.g. which components could be impacted
by a command), and make sure the documentation reflect that as well.
Even better, have a syscalls per required privileges and impact (e.g.
privileged or unprivileged).  Going this road, I'm not sure if a
privileged syscall would make sense given the existing filesystem
interface.

> 
> > BPF LSM); while the sys admin can configure permissions of the pseudo
> > files in user space.
> > 
> Other LSMs also have policy that can control access to pseudo filesystems and
> other resources. Again, the control doesn't have to be hard coded. And seccomp can
> be used to block the syscall.
> 
> 
> 
> > > Again, each module decides which operations to expose through this syscall. In many cases
> > > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
> > > that choose this interface remain secure while gaining its advantages.
> > > 
> > > > >    - Avoids overhead of other kernel interfaces for better efficiency
> > > > 
> > > > .. and it is is probably less efficient, because everything need to
> > > > fit in the same API.
> > > 
> > > As shown below, the syscall can significantly improve the performance of policy management.
> > > A more detailed benchmark is available in [1].
> > > 
> > > The following table presents the time required to load an AppArmor profile.
> > > 
> > > For every cell, the first value is the total time taken by aa-load, and the value in
> > > parentheses is the time spent to load the policy in the kernel only (total - dry‑run).
> > > 
> > > Results are in microseconds and are averaged over 10 000 runs to reduce variance.
> > > 
> > > 
> > > | t (µs)    | syscall     | pseudofs    | Speedup       |
> > > |-----------|-------------|-------------|---------------|
> > > | 1password | 4257 (1127) | 3333 (192)  | x1.28 (x5.86) |
> > > | Xorg      | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) |
> > > 
> > 
> > I am not sure the performance of loading security policies is on any
> > critical path.
> 
> generally speaking I agree, but I am also not going to turn down a
> performance improvement either. Its a nice to have, but not a strong
> argument for need.
> 
> > The implementation calls the hook for each LSM, which is why I think the
> > syscall is not efficient.
> > 
> it should only call the LSM identified by the lsmid in the call.
> 
> > Overall, I am still not convinced a syscall for all LSMs is needed. To
> > justify such
> 
> its not needed by all LSMs, just a subset of them, and some nebulous
> subset of potentially future LSMs that is entirely undefinable.
> 
> If we had had appropriate LSM syscalls landlock wouldn't have needed
> to have landlock specific syscalls. Having another LSM go that route
> feels wrong especially now that we have some LSM syscalls.

I don't agree.  Dedicated syscalls are a good thing.  See my other
reply.

> If a
> syscall is needed by an LSM its better to try hashing something out
> that might have utility for multiple LSMs or at the very least,
> potentially have utility in the future.
> 
> 
> > a syscall, I think we need to show that it is useful in multiple LSMs.
> > Also, if we
> > really want to have single set of APIs for all LSMs, we may also need
> > get_policy,
> 
> We are never going to get a single set of APIs for all LSMs. I will
> settle for an api that has utility for a subset
> 
> > remove_policy, etc. This set as-is appears to be an incomplete design. The
> 
> To have a complete design, there needs to be feedback and discussion
> from multiple LSMs. This is a starting point.
> 
> > implementation, with call_int_hook, is also problematic. It can easily
> > cause some> controversial behaviors.
> > 
> agreed it shouldn't be doing a straight call_int_hook, it should only
> call it against the lsm identified by the lsmid

Yes, but then, I don't see the point of a "generic" LSM syscall.
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months, 1 week ago
On 5/9/25 03:26, Mickaël Salaün wrote:
> On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote:
>> On 5/7/25 23:06, Song Liu wrote:
>>> On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
>>> <maxime.belair@canonical.com> wrote:
>>> [...]
>>>>>
>>>>> These two do not feel like real benefits:
>>>>> - One syscall cannot fit all use cases well...
>>>>
>>>> This syscall is not intended to cover every case, nor to replace existing kernel
>>>> interfaces.
>>>>
>>>> Each LSM can decide which operations it wants to support (if any). For example, when
>>>> loading policies, an LSM may choose to allow only policies that further restrict
>>>> privileges.
>>>>
>>>>> - Not working in containers is often not an issue, but a feature.
>>>>
>>>> Indeed, using this syscall requires appropriate capabilities and will not permit
>>>> unprivileged containers to manage policies arbitrarily.
>>>>
>>>> With this syscall, capability checks remain the responsibility of each LSM.
>>>>
>>>> For instance, in the AppArmor patch, a profile can be loaded only if
>>>> aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design,
>>>> policies can be loaded only in the current namespace.
>>>>
>>>> I see this syscall as a middle point between exposing the entire sysfs, creating a large
>>>> attack surface, and blocking everything.
>>>>
>>>> Landlock’s existing syscalls already improve security by allowing processes to further
>>>> restrict their ambient rights while adding only a modest attack surface.
>>>>
>>>> This syscall is a further step in that direction: it lets LSMs add restrictive policies
>>>> without requiring exposing every other interface.
>>>
>>> I don't think a syscall makes the API more secure. If necessary, we can add
>>
>> It exposes a different attack surface. Requiring mounting of the fs to where it is visible
>> in the container, provides attack surface, and requires additional external configuration.
> 
> We should also keep in mind that syscalls could be accessible from
> everywhere, by everyone, which may increase the attack surface compared
> to a privileged filesystem interface.  Adding a second interface may
> also introduce issues.  Anyway, I'm definitely not against syscalls, but
> I don't see why the filesystem interface would be "less secure" in this
> context.
> 

yes syscalls being accessible from everywhere is another form of attack
surface, that needs to be mediated.

the fs can be mediated, its expose is a multiple lsms with multiple
different interfaces on the files within it. What really is more
problematic is makng the fs available in the container. Yes a
container manager can do it but then you are dependent on the
container manager making your interface available.

Other wise you are looking at making mount available to your app
within the container.

>>
>> Then there is the whole issue of getting the various LSMs to allow another LSM in the
>> stack to be able manage its own policy.
> 
> Right, and it's a similar issue with seccomp policies wrt syscalls.
> 
yes, though seccomp I have found to be the easier one to deal with

>>
>>> permission check to each pseudo file. The downside of the syscall, however,
>>> is that all the permission checks are hard-coded in the kernel (except for
>>
>> The permission checks don't have to be hard coded. Each LSM can define how it handles
>> or manages the syscall. The default is that it isn't supported, but if an lsm decides
>> to support it, there is now reason that its policy can't determine the use of the
>> syscall.
> 
>  From an interface design point of view, it would be better to clearly
> specify the scope of a command (e.g. which components could be impacted
> by a command), and make sure the documentation reflect that as well.
> Even better, have a syscalls per required privileges and impact (e.g.
> privileged or unprivileged).  Going this road, I'm not sure if a
> privileged syscall would make sense given the existing filesystem
> interface.
> 

uhhhmmm, not just privileged. As you well know we are looking to use
this for unprivileged policy. The LSM can limit to privileged if it
wants but it doesn't have to limit it to privileged policy.

>>
>>> BPF LSM); while the sys admin can configure permissions of the pseudo
>>> files in user space.
>>>
>> Other LSMs also have policy that can control access to pseudo filesystems and
>> other resources. Again, the control doesn't have to be hard coded. And seccomp can
>> be used to block the syscall.
>>
>>
>>
>>>> Again, each module decides which operations to expose through this syscall. In many cases
>>>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
>>>> that choose this interface remain secure while gaining its advantages.
>>>>
>>>>>>     - Avoids overhead of other kernel interfaces for better efficiency
>>>>>
>>>>> .. and it is is probably less efficient, because everything need to
>>>>> fit in the same API.
>>>>
>>>> As shown below, the syscall can significantly improve the performance of policy management.
>>>> A more detailed benchmark is available in [1].
>>>>
>>>> The following table presents the time required to load an AppArmor profile.
>>>>
>>>> For every cell, the first value is the total time taken by aa-load, and the value in
>>>> parentheses is the time spent to load the policy in the kernel only (total - dry‑run).
>>>>
>>>> Results are in microseconds and are averaged over 10 000 runs to reduce variance.
>>>>
>>>>
>>>> | t (µs)    | syscall     | pseudofs    | Speedup       |
>>>> |-----------|-------------|-------------|---------------|
>>>> | 1password | 4257 (1127) | 3333 (192)  | x1.28 (x5.86) |
>>>> | Xorg      | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) |
>>>>
>>>
>>> I am not sure the performance of loading security policies is on any
>>> critical path.
>>
>> generally speaking I agree, but I am also not going to turn down a
>> performance improvement either. Its a nice to have, but not a strong
>> argument for need.
>>
>>> The implementation calls the hook for each LSM, which is why I think the
>>> syscall is not efficient.
>>>
>> it should only call the LSM identified by the lsmid in the call.
>>
>>> Overall, I am still not convinced a syscall for all LSMs is needed. To
>>> justify such
>>
>> its not needed by all LSMs, just a subset of them, and some nebulous
>> subset of potentially future LSMs that is entirely undefinable.
>>
>> If we had had appropriate LSM syscalls landlock wouldn't have needed
>> to have landlock specific syscalls. Having another LSM go that route
>> feels wrong especially now that we have some LSM syscalls.
> 
> I don't agree.  Dedicated syscalls are a good thing.  See my other
> reply.
> 

I think we can just disagree on this point.

>> If a
>> syscall is needed by an LSM its better to try hashing something out
>> that might have utility for multiple LSMs or at the very least,
>> potentially have utility in the future.
>>
>>
>>> a syscall, I think we need to show that it is useful in multiple LSMs.
>>> Also, if we
>>> really want to have single set of APIs for all LSMs, we may also need
>>> get_policy,
>>
>> We are never going to get a single set of APIs for all LSMs. I will
>> settle for an api that has utility for a subset
>>
>>> remove_policy, etc. This set as-is appears to be an incomplete design. The
>>
>> To have a complete design, there needs to be feedback and discussion
>> from multiple LSMs. This is a starting point.
>>
>>> implementation, with call_int_hook, is also problematic. It can easily
>>> cause some> controversial behaviors.
>>>
>> agreed it shouldn't be doing a straight call_int_hook, it should only
>> call it against the lsm identified by the lsmid
> 
> Yes, but then, I don't see the point of a "generic" LSM syscall.

its not a generic LSM syscall. Its a syscall or maybe a set of syscalls
for a specific scoped problem of loading/managing policy.

Can we come to something acceptable? I don't know but we are going to
look at it before trying for an apparmor specific syscall.
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Mickaël Salaün 7 months, 1 week ago
On Sun, May 11, 2025 at 03:47:21AM -0700, John Johansen wrote:
> On 5/9/25 03:26, Mickaël Salaün wrote:
> > On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote:
> > > On 5/7/25 23:06, Song Liu wrote:
> > > > On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
> > > > <maxime.belair@canonical.com> wrote:
> > > > [...]

> > > > permission check to each pseudo file. The downside of the syscall, however,
> > > > is that all the permission checks are hard-coded in the kernel (except for
> > > 
> > > The permission checks don't have to be hard coded. Each LSM can define how it handles
> > > or manages the syscall. The default is that it isn't supported, but if an lsm decides
> > > to support it, there is now reason that its policy can't determine the use of the
> > > syscall.
> > 
> >  From an interface design point of view, it would be better to clearly
> > specify the scope of a command (e.g. which components could be impacted
> > by a command), and make sure the documentation reflect that as well.
> > Even better, have a syscalls per required privileges and impact (e.g.
> > privileged or unprivileged).  Going this road, I'm not sure if a
> > privileged syscall would make sense given the existing filesystem
> > interface.
> > 
> 
> uhhhmmm, not just privileged. As you well know we are looking to use
> this for unprivileged policy. The LSM can limit to privileged if it
> wants but it doesn't have to limit it to privileged policy.

Yes, I meant to say having a syscall for unprivileged actions, and maybe
another one for privileged ones, but this might be a hard sell. :)

To say it another way, for your use case, do you need this syscall(s)
for privileged operations?  Do you plan to drop (or stop extending) the
filesystem interface or do you think it would be good for (AppArmor)
privileged operations too?  I know syscalls might be attractive and
could be used for everything, but it's good to have a well-defined plan
and semantic to avoid using such syscall as another multiplexer with
unrelated operations and required privileges.

If this syscall should also be a way to do privileged operations, should
we also agree on a common set of permissions (e.g. global CAP_MAC_ADMIN
or user namespace one)?

[...]

> > > > Overall, I am still not convinced a syscall for all LSMs is needed. To
> > > > justify such
> > > 
> > > its not needed by all LSMs, just a subset of them, and some nebulous
> > > subset of potentially future LSMs that is entirely undefinable.
> > > 
> > > If we had had appropriate LSM syscalls landlock wouldn't have needed
> > > to have landlock specific syscalls. Having another LSM go that route
> > > feels wrong especially now that we have some LSM syscalls.
> > 
> > I don't agree.  Dedicated syscalls are a good thing.  See my other
> > reply.
> > 
> 
> I think we can just disagree on this point.
> 
> > > If a
> > > syscall is needed by an LSM its better to try hashing something out
> > > that might have utility for multiple LSMs or at the very least,
> > > potentially have utility in the future.
> > > 
> > > 
> > > > a syscall, I think we need to show that it is useful in multiple LSMs.
> > > > Also, if we
> > > > really want to have single set of APIs for all LSMs, we may also need
> > > > get_policy,
> > > 
> > > We are never going to get a single set of APIs for all LSMs. I will
> > > settle for an api that has utility for a subset
> > > 
> > > > remove_policy, etc. This set as-is appears to be an incomplete design. The
> > > 
> > > To have a complete design, there needs to be feedback and discussion
> > > from multiple LSMs. This is a starting point.
> > > 
> > > > implementation, with call_int_hook, is also problematic. It can easily
> > > > cause some> controversial behaviors.
> > > > 
> > > agreed it shouldn't be doing a straight call_int_hook, it should only
> > > call it against the lsm identified by the lsmid
> > 
> > Yes, but then, I don't see the point of a "generic" LSM syscall.
> 
> its not a generic LSM syscall. Its a syscall or maybe a set of syscalls
> for a specific scoped problem of loading/managing policy.
> 
> Can we come to something acceptable? I don't know but we are going to
> look at it before trying for an apparmor specific syscall.

I understand and it's good to have this discussion.
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months ago
On 5/12/25 03:20, Mickaël Salaün wrote:
> On Sun, May 11, 2025 at 03:47:21AM -0700, John Johansen wrote:
>> On 5/9/25 03:26, Mickaël Salaün wrote:
>>> On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote:
>>>> On 5/7/25 23:06, Song Liu wrote:
>>>>> On Wed, May 7, 2025 at 8:37 AM Maxime Bélair
>>>>> <maxime.belair@canonical.com> wrote:
>>>>> [...]
> 
>>>>> permission check to each pseudo file. The downside of the syscall, however,
>>>>> is that all the permission checks are hard-coded in the kernel (except for
>>>>
>>>> The permission checks don't have to be hard coded. Each LSM can define how it handles
>>>> or manages the syscall. The default is that it isn't supported, but if an lsm decides
>>>> to support it, there is now reason that its policy can't determine the use of the
>>>> syscall.
>>>
>>>   From an interface design point of view, it would be better to clearly
>>> specify the scope of a command (e.g. which components could be impacted
>>> by a command), and make sure the documentation reflect that as well.
>>> Even better, have a syscalls per required privileges and impact (e.g.
>>> privileged or unprivileged).  Going this road, I'm not sure if a
>>> privileged syscall would make sense given the existing filesystem
>>> interface.
>>>
>>
>> uhhhmmm, not just privileged. As you well know we are looking to use
>> this for unprivileged policy. The LSM can limit to privileged if it
>> wants but it doesn't have to limit it to privileged policy.
> 
> Yes, I meant to say having a syscall for unprivileged actions, and maybe
> another one for privileged ones, but this might be a hard sell. :)
> 
indeed, in the apparmor case context would be important. Just exactly
what is privileged. It may be a privileged operation to load policy to one
namespace, but not to another that you are setting up for a child.

> To say it another way, for your use case, do you need this syscall(s)
> for privileged operations?  Do you plan to drop (or stop extending) the

need, probably. That is to say, loading of policy have varying levels
of privilege. root within the container has privilege to load policy
to its namespace, but it might have authority to setup a child namespace
that does not require privilege for it to load policy into, and it
will determine if the child has privilege or unprivleged policy within
it.

Ideally we won't have to use the fs interface within the "privileged"
container, as there are cases where this is currently not done or
undesirable.

> filesystem interface or do you think it would be good for (AppArmor)
> privileged operations too?  I know syscalls might be attractive and
> could be used for everything, but it's good to have a well-defined plan
> and semantic to avoid using such syscall as another multiplexer with
> unrelated operations and required privileges.
> 
sure. But the privilege level is use case dependent, to which policy
namespace is policy being loaded, replaced, ...  The privilege level
very much will depend on what is in the stack/bounding of policy.

> If this syscall should also be a way to do privileged operations, should
> we also agree on a common set of permissions (e.g. global CAP_MAC_ADMIN
> or user namespace one)?
> 
I think requiring something like CAP_MAC_ADMIN would be a per LSM
decision.


> [...]
> 
>>>>> Overall, I am still not convinced a syscall for all LSMs is needed. To
>>>>> justify such
>>>>
>>>> its not needed by all LSMs, just a subset of them, and some nebulous
>>>> subset of potentially future LSMs that is entirely undefinable.
>>>>
>>>> If we had had appropriate LSM syscalls landlock wouldn't have needed
>>>> to have landlock specific syscalls. Having another LSM go that route
>>>> feels wrong especially now that we have some LSM syscalls.
>>>
>>> I don't agree.  Dedicated syscalls are a good thing.  See my other
>>> reply.
>>>
>>
>> I think we can just disagree on this point.
>>
>>>> If a
>>>> syscall is needed by an LSM its better to try hashing something out
>>>> that might have utility for multiple LSMs or at the very least,
>>>> potentially have utility in the future.
>>>>
>>>>
>>>>> a syscall, I think we need to show that it is useful in multiple LSMs.
>>>>> Also, if we
>>>>> really want to have single set of APIs for all LSMs, we may also need
>>>>> get_policy,
>>>>
>>>> We are never going to get a single set of APIs for all LSMs. I will
>>>> settle for an api that has utility for a subset
>>>>
>>>>> remove_policy, etc. This set as-is appears to be an incomplete design. The
>>>>
>>>> To have a complete design, there needs to be feedback and discussion
>>>> from multiple LSMs. This is a starting point.
>>>>
>>>>> implementation, with call_int_hook, is also problematic. It can easily
>>>>> cause some> controversial behaviors.
>>>>>
>>>> agreed it shouldn't be doing a straight call_int_hook, it should only
>>>> call it against the lsm identified by the lsmid
>>>
>>> Yes, but then, I don't see the point of a "generic" LSM syscall.
>>
>> its not a generic LSM syscall. Its a syscall or maybe a set of syscalls
>> for a specific scoped problem of loading/managing policy.
>>
>> Can we come to something acceptable? I don't know but we are going to
>> look at it before trying for an apparmor specific syscall.
> 
> I understand and it's good to have this discussion.

Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Tetsuo Handa 7 months, 1 week ago
On 2025/05/08 0:37, Maxime Bélair wrote:
> Again, each module decides which operations to expose through this syscall. In many cases
> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
> that choose this interface remain secure while gaining its advantages.

If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s
"cmd" argument. Also, there is prctl() which can already carry string-ish parameters
without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ?

Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months, 1 week ago
On 5/7/25 15:04, Tetsuo Handa wrote:
> On 2025/05/08 0:37, Maxime Bélair wrote:
>> Again, each module decides which operations to expose through this syscall. In many cases
>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
>> that choose this interface remain secure while gaining its advantages.
> 
> If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s

yes that does feel like ioctls(), on the other hand defining them at the LSM level won't
offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed
to either, it just hashing out what people want, and what is acceptable.

> "cmd" argument. Also, there is prctl() which can already carry string-ish parameters
> without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ?
> 

prctl() can be used, I used it for the unprivileged policy demo. It has its own set of
problems. While LSM policy could be associated with the process doing the load/replacement
or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not
process specific making prctl() a poor fit.

prctl() requires allocating a global prctl()

prctl() are already being filtered/controlled by LSMs making them a poort fit for
use by an LSM in a stacking situation as it requires updating the policy of other
LSMs on the system. Yes seccomp can filter the syscall but that still is an easier
barrier to overcome than having to have instruction for how to allow your LSMs
prctl() in multiple LSMs.


Mickaël already argued the need for landlock to have syscalls. See
https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/
and the numerous iterations before that.

Ideally those could have been LSM syscalls, with landlock leveraging them. AppArmor
is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls,
netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so,
and ideally any interface we use will be of benefit to some other LSMs in the future.


Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by Mickaël Salaün 7 months, 1 week ago
On Thu, May 08, 2025 at 12:52:55AM -0700, John Johansen wrote:
> On 5/7/25 15:04, Tetsuo Handa wrote:
> > On 2025/05/08 0:37, Maxime Bélair wrote:
> > > Again, each module decides which operations to expose through this syscall. In many cases
> > > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
> > > that choose this interface remain secure while gaining its advantages.
> > 
> > If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s
> 
> yes that does feel like ioctls(), on the other hand defining them at the LSM level won't
> offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed
> to either, it just hashing out what people want, and what is acceptable.
> 
> > "cmd" argument. Also, there is prctl() which can already carry string-ish parameters
> > without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ?
> > 
> 
> prctl() can be used, I used it for the unprivileged policy demo. It has its own set of
> problems. While LSM policy could be associated with the process doing the load/replacement
> or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not
> process specific making prctl() a poor fit.
> 
> prctl() requires allocating a global prctl()
> 
> prctl() are already being filtered/controlled by LSMs making them a poort fit for
> use by an LSM in a stacking situation as it requires updating the policy of other
> LSMs on the system. Yes seccomp can filter the syscall but that still is an easier
> barrier to overcome than having to have instruction for how to allow your LSMs
> prctl() in multiple LSMs.
> 
> 
> Mickaël already argued the need for landlock to have syscalls. See

Landlock indeed requires syscalls mainly because of its unprivileged
nature.

> https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/
> and the numerous iterations before that.

This link might be misleading though, it points to an initial version of
the syscall proposal (v17) and it was then decided to create one syscall
per operation (v34), which is why we ended with 3 syscalls.  See the
changelog:
https://lore.kernel.org/r/20210422154123.13086-9-mic@digikod.net

> 
> Ideally those could have been LSM syscalls, with landlock leveraging them.

I don't agree.  The Landlock syscalls have a well-defined semantic, with
documented security requirements, and they deal with specific kernel
objects identified with file descriptors, including a dedicated one:
[landlock-ruleset].  For the features provided by these Landlock
syscalls, it would not have been a good idea to reuse existing syscalls,
nor to rely on the syscall proposed in this series because the interface
is too specific to some of the current privileged LSMs (i.e. ingest a
policy blob).  Making this interface more generic would lead to even
less defined semantic though.

> AppArmor
> is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls,
> netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so,

I think it would make sense to propose AppArmor-specific syscalls.

> and ideally any interface we use will be of benefit to some other LSMs in the future.

The LSM syscalls may make sense to deal with LSM blobs managed by the
LSM framework (e.g. get/set properties) when the operations are
common/generic.

Security policies are specific to each LSM and they should implement
their own well-defined interface (e.g. filesystem, netlink, syscall).

The LSM framework doesn't provide nor manage any security policy, it
mainly provides a set of consistent and well-defined kernel hooks with
security blobs to enforce a security policy.  I don't think it makes
sense to add LSM syscalls to manage things not managed by the LSM
framework.
Re: [PATCH 1/3] Wire up the lsm_manage_policy syscall
Posted by John Johansen 7 months, 1 week ago
On 5/9/25 03:25, Mickaël Salaün wrote:
> On Thu, May 08, 2025 at 12:52:55AM -0700, John Johansen wrote:
>> On 5/7/25 15:04, Tetsuo Handa wrote:
>>> On 2025/05/08 0:37, Maxime Bélair wrote:
>>>> Again, each module decides which operations to expose through this syscall. In many cases
>>>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments
>>>> that choose this interface remain secure while gaining its advantages.
>>>
>>> If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s
>>
>> yes that does feel like ioctls(), on the other hand defining them at the LSM level won't
>> offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed
>> to either, it just hashing out what people want, and what is acceptable.
>>
>>> "cmd" argument. Also, there is prctl() which can already carry string-ish parameters
>>> without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ?
>>>
>>
>> prctl() can be used, I used it for the unprivileged policy demo. It has its own set of
>> problems. While LSM policy could be associated with the process doing the load/replacement
>> or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not
>> process specific making prctl() a poor fit.
>>
>> prctl() requires allocating a global prctl()
>>
>> prctl() are already being filtered/controlled by LSMs making them a poort fit for
>> use by an LSM in a stacking situation as it requires updating the policy of other
>> LSMs on the system. Yes seccomp can filter the syscall but that still is an easier
>> barrier to overcome than having to have instruction for how to allow your LSMs
>> prctl() in multiple LSMs.
>>
>>
>> Mickaël already argued the need for landlock to have syscalls. See
> 
> Landlock indeed requires syscalls mainly because of its unprivileged
> nature.
> 

yes that is the dominant reason

>> https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/
>> and the numerous iterations before that.
> 
> This link might be misleading though, it points to an initial version of
> the syscall proposal (v17) and it was then decided to create one syscall
> per operation (v34), which is why we ended with 3 syscalls.  See the
> changelog:
> https://lore.kernel.org/r/20210422154123.13086-9-mic@digikod.net
> 

yes and no. I am well aware landlock's syscall got split into three syscalls.

All I was trying to do is reference to the start of the discussion on why
landlock needed a syscall(s). I thought the details of why you have three
etc, really didn't add to the discussion. But yeah not also pointing to
v34 could be considered misleading.


>>
>> Ideally those could have been LSM syscalls, with landlock leveraging them.
> 
> I don't agree.  The Landlock syscalls have a well-defined semantic, with

First I don't begrudge Landlock its syscalls, I think at the time it was
the only way forward.

> documented security requirements, and they deal with specific kernel
> objects identified with file descriptors, including a dedicated one:
> [landlock-ruleset].

I am aware. Those semantics could have been kept and documented, within
a set of LSM syscalls. Yes landlock's syscalls shouldn't have been done
behind a single LSM syscall, I am not advocating for that but maybe
behind several LSM syscalls.

>  For the features provided by these Landlock
> syscalls, it would not have been a good idea to reuse existing syscalls,
> nor to rely on the syscall proposed in this series because the interface
> is too specific to some of the current privileged LSMs (i.e. ingest a
> policy blob).  Making this interface more generic would lead to even
> less defined semantic though.

Right, so again not a generic LSM syscall. But "generic" LSM syscalls
for certain purposes. Let me walk my statement back a little, what I
find unfortunate was that the landlock LSM syscalls didn't get discussed
as a set of generic LSM syscall's with landlock being the first to
implement them.

The question is hashing out where the generic semantics are vs. the
individual LSMs. Having an LSM syscall to deal with specific kernel
objects idenetified with file descriptors, and allowing each LSMs
to deal with that if it needs is possible.

Its a matter of figuring something out. It could be it turns out it is
not worth it. And some individual LSM syscalls like landlocks are the
way to go, its that it wasn't explored. I don't fault you, and think
it really wasn't even an option at the time.

> 
>> AppArmor
>> is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls,
>> netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so,
> 
> I think it would make sense to propose AppArmor-specific syscalls.
> 

that may be the case, but I think we should explore providing a more
LSM generic interface first.

>> and ideally any interface we use will be of benefit to some other LSMs in the future.
> 
> The LSM syscalls may make sense to deal with LSM blobs managed by the
> LSM framework (e.g. get/set properties) when the operations are
> common/generic.
> 
> Security policies are specific to each LSM and they should implement
> their own well-defined interface (e.g. filesystem, netlink, syscall).
> 
policies at some level are just blobs too. It is worth at least
exploring whether there can be a common interface.

> The LSM framework doesn't provide nor manage any security policy, it
> mainly provides a set of consistent and well-defined kernel hooks with
> security blobs to enforce a security policy.  I don't think it makes
> sense to add LSM syscalls to manage things not managed by the LSM
> framework.

we aren't talking about the LSM framework managing security policy,
just whether it makes sense for it to provide a common interface that
an LSM can choose to use to provide it a blob of policy that it
can then manage.

Its just a mechanism. This isn't all that different than using the
filesystem, netlink, or other mechanisms to shuttle the blob
between userspace to the kernel, and then the LSM manages its
policy and data.

The big difference is that using the syscall opens unprivileged
policy up to the LSM more broadly. If we are going to go the syscall
route for apparmor, we might as well see if we can't make that
mechanism more broadly available, and make it easier for other
LSMs in the future.

Again, it might turn out its a fools errand, and we have to do
an apparmor specific syscall, but it is worth exploring first.