Add support for the new lsm_manage_policy syscall, providing a unified
API for loading and modifying LSM policies without requiring the LSM’s
pseudo-filesystem.
Benefits:
- Works even if the LSM pseudo-filesystem isn’t mounted or available
(e.g. in containers)
- Offers a logical and unified interface rather than multiple
heterogeneous pseudo-filesystems.
- Avoids overhead of other kernel interfaces for better efficiency
Signed-off-by: Maxime Bélair <maxime.belair@canonical.com>
---
arch/alpha/kernel/syscalls/syscall.tbl | 1 +
arch/arm/tools/syscall.tbl | 1 +
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 4 ++++
include/uapi/asm-generic/unistd.h | 4 +++-
kernel/sys_ni.c | 1 +
security/lsm_syscalls.c | 6 ++++++
tools/include/uapi/asm-generic/unistd.h | 4 +++-
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 +
10 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 2dd6340de6b4..dfe6cd43c584 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -507,3 +507,4 @@
575 common listxattrat sys_listxattrat
576 common removexattrat sys_removexattrat
577 common open_tree_attr sys_open_tree_attr
+578 common lsm_manage_policy sys_lsm_manage_policy
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 27c1d5ebcd91..60abcb3a8a1b 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -482,3 +482,4 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common lsm_manage_policy sys_lsm_manage_policy
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index ac007ea00979..bb91a929757a 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -473,3 +473,4 @@
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
467 i386 open_tree_attr sys_open_tree_attr
+468 i386 lsm_manage_policy sys_lsm_manage_policy
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..83819d4a5c8a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -391,6 +391,7 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common lsm_manage_policy sys_lsm_manage_policy
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e5603cc91963..f52a0678b1d0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -989,6 +989,10 @@ asmlinkage long sys_lsm_set_self_attr(unsigned int attr, struct lsm_ctx __user *
u32 size, u32 flags);
asmlinkage long sys_lsm_list_modules(u64 __user *ids, u32 __user *size, u32 flags);
+asmlinkage long sys_lsm_manage_policy(u32 lsm_id, u32 op, void __user *buf,
+ u32 __user *size, u32 flags);
+
+
/*
* Architecture-specific system calls
*/
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2892a45023af..b94369baded8 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -851,9 +851,11 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat)
__SYSCALL(__NR_removexattrat, sys_removexattrat)
#define __NR_open_tree_attr 467
__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+#define __NR_lsm_manage_policy 468
+__SYSCALL(__NR_lsm_manage_policy, lsm_manage_policy)
#undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 469
/*
* 32 bit systems traditionally used different
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c00a86931f8c..e556b07d8716 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -172,6 +172,7 @@ COND_SYSCALL_COMPAT(fadvise64_64);
COND_SYSCALL(lsm_get_self_attr);
COND_SYSCALL(lsm_set_self_attr);
COND_SYSCALL(lsm_list_modules);
+COND_SYSCALL(lsm_manage_policy);
/* CONFIG_MMU only */
COND_SYSCALL(swapon);
diff --git a/security/lsm_syscalls.c b/security/lsm_syscalls.c
index 8440948a690c..dcaad8818679 100644
--- a/security/lsm_syscalls.c
+++ b/security/lsm_syscalls.c
@@ -118,3 +118,9 @@ SYSCALL_DEFINE3(lsm_list_modules, u64 __user *, ids, u32 __user *, size,
return lsm_active_cnt;
}
+
+SYSCALL_DEFINE5(lsm_manage_policy, u32, lsm_id, u32, op, void __user *, buf, u32
+ __user *, size, u32, flags)
+{
+ return 0;
+}
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2892a45023af..b94369baded8 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -851,9 +851,11 @@ __SYSCALL(__NR_listxattrat, sys_listxattrat)
__SYSCALL(__NR_removexattrat, sys_removexattrat)
#define __NR_open_tree_attr 467
__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+#define __NR_lsm_manage_policy 468
+__SYSCALL(__NR_lsm_manage_policy, lsm_manage_policy)
#undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 469
/*
* 32 bit systems traditionally used different
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..83819d4a5c8a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -391,6 +391,7 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common lsm_manage_policy sys_lsm_manage_policy
#
# Due to a historical design error, certain syscalls are numbered differently
--
2.48.1
Hi Maxime,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 9c32cda43eb78f78c73aee4aa344b777714e259b]
url: https://github.com/intel-lab-lkp/linux/commits/Maxime-B-lair/Wire-up-the-lsm_manage_policy-syscall/20250506-224212
base: 9c32cda43eb78f78c73aee4aa344b777714e259b
patch link: https://lore.kernel.org/r/20250506143254.718647-2-maxime.belair%40canonical.com
patch subject: [PATCH 1/3] Wire up the lsm_manage_policy syscall
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20250507/202505072131.ogtsaLPI-lkp@intel.com/config)
compiler: clang version 21.0.0git (https://github.com/llvm/llvm-project f819f46284f2a79790038e1f6649172789734ae8)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250507/202505072131.ogtsaLPI-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202505072131.ogtsaLPI-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> <stdin>:1618:2: warning: syscall lsm_manage_policy not implemented [-W#warnings]
1618 | #warning syscall lsm_manage_policy not implemented
| ^
1 warning generated.
--
>> <stdin>:1618:2: warning: syscall lsm_manage_policy not implemented [-W#warnings]
1618 | #warning syscall lsm_manage_policy not implemented
| ^
1 warning generated.
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Tue, May 6, 2025 at 7:40 AM Maxime Bélair <maxime.belair@canonical.com> wrote: > > Add support for the new lsm_manage_policy syscall, providing a unified > API for loading and modifying LSM policies without requiring the LSM’s > pseudo-filesystem. > > Benefits: > - Works even if the LSM pseudo-filesystem isn’t mounted or available > (e.g. in containers) > - Offers a logical and unified interface rather than multiple > heterogeneous pseudo-filesystems. These two do not feel like real benefits: - Not working in containers is often not an issue, but a feature. - One syscall cannot fit all use cases well... > - Avoids overhead of other kernel interfaces for better efficiency .. and it is is probably less efficient, because everything need to fit in the same API. Overall, this set doesn't feel like a good change to me. Thanks, Song
On 5/6/25 23:26, Song Liu wrote: > On Tue, May 6, 2025 at 7:40 AM Maxime Bélair > <maxime.belair@canonical.com> wrote: >> >> Add support for the new lsm_manage_policy syscall, providing a unified >> API for loading and modifying LSM policies without requiring the LSM’s >> pseudo-filesystem. >> >> Benefits: >> - Works even if the LSM pseudo-filesystem isn’t mounted or available >> (e.g. in containers) >> - Offers a logical and unified interface rather than multiple >> heterogeneous pseudo-filesystems. > > These two do not feel like real benefits: > - Not working in containers is often not an issue, but a feature. and the LSM doesn't have to allow the syscall to function in a container where appropriate. Its up to the LSM if the syscall is supported and what kind of permissions are needed. However having the ability to function in a container and not having to mount securityfs, or procfs into a container. similar to what landlock gets with its syscall can be beneficial. > - One syscall cannot fit all use cases well... > of course not, and for those other use cases new syscalls can be added. >> - Avoids overhead of other kernel interfaces for better efficiency > > .. and it is is probably less efficient, because everything need to > fit in the same API. > no not everything, just what fits into the syscall. Nor does an LSM have to use the syscall it is still use what works for it. This could be a little more efficient than the current fs interface used by apparmor/selinux/smack but I don't think efficiency is going to be a huge win for this. > Overall, this set doesn't feel like a good change to me. > > Thanks, > Song
On 5/7/25 08:26, Song Liu wrote: > On Tue, May 6, 2025 at 7:40 AM Maxime Bélair > <maxime.belair@canonical.com> wrote: >> >> Add support for the new lsm_manage_policy syscall, providing a unified >> API for loading and modifying LSM policies without requiring the LSM’s >> pseudo-filesystem. >> >> Benefits: >> - Works even if the LSM pseudo-filesystem isn’t mounted or available >> (e.g. in containers) >> - Offers a logical and unified interface rather than multiple >> heterogeneous pseudo-filesystems. > > These two do not feel like real benefits: > - One syscall cannot fit all use cases well... This syscall is not intended to cover every case, nor to replace existing kernel interfaces. Each LSM can decide which operations it wants to support (if any). For example, when loading policies, an LSM may choose to allow only policies that further restrict privileges. > - Not working in containers is often not an issue, but a feature. Indeed, using this syscall requires appropriate capabilities and will not permit unprivileged containers to manage policies arbitrarily. With this syscall, capability checks remain the responsibility of each LSM. For instance, in the AppArmor patch, a profile can be loaded only if aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design, policies can be loaded only in the current namespace. I see this syscall as a middle point between exposing the entire sysfs, creating a large attack surface, and blocking everything. Landlock’s existing syscalls already improve security by allowing processes to further restrict their ambient rights while adding only a modest attack surface. This syscall is a further step in that direction: it lets LSMs add restrictive policies without requiring exposing every other interface. Again, each module decides which operations to expose through this syscall. In many cases the operation will still require CAP_SYS_ADMIN or a similar capability, so environments that choose this interface remain secure while gaining its advantages. >> - Avoids overhead of other kernel interfaces for better efficiency > > .. and it is is probably less efficient, because everything need to > fit in the same API. As shown below, the syscall can significantly improve the performance of policy management. A more detailed benchmark is available in [1]. The following table presents the time required to load an AppArmor profile. For every cell, the first value is the total time taken by aa-load, and the value in parentheses is the time spent to load the policy in the kernel only (total - dry‑run). Results are in microseconds and are averaged over 10 000 runs to reduce variance. | t (µs) | syscall | pseudofs | Speedup | |-----------|-------------|-------------|---------------| | 1password | 4257 (1127) | 3333 (192) | x1.28 (x5.86) | | Xorg | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) | If an LSM wants to allow several operations for a single LSM_POLICY_XXX it can multiplex a sub‑opcode in flags, and select the appropriate handler, this incurs negligible overhead. Thanks, Maxime [1] https://gitlab.com/-/snippets/4840792
On Wed, May 7, 2025 at 8:37 AM Maxime Bélair <maxime.belair@canonical.com> wrote: [...] > > > > These two do not feel like real benefits: > > - One syscall cannot fit all use cases well... > > This syscall is not intended to cover every case, nor to replace existing kernel > interfaces. > > Each LSM can decide which operations it wants to support (if any). For example, when > loading policies, an LSM may choose to allow only policies that further restrict > privileges. > > > - Not working in containers is often not an issue, but a feature. > > Indeed, using this syscall requires appropriate capabilities and will not permit > unprivileged containers to manage policies arbitrarily. > > With this syscall, capability checks remain the responsibility of each LSM. > > For instance, in the AppArmor patch, a profile can be loaded only if > aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design, > policies can be loaded only in the current namespace. > > I see this syscall as a middle point between exposing the entire sysfs, creating a large > attack surface, and blocking everything. > > Landlock’s existing syscalls already improve security by allowing processes to further > restrict their ambient rights while adding only a modest attack surface. > > This syscall is a further step in that direction: it lets LSMs add restrictive policies > without requiring exposing every other interface. I don't think a syscall makes the API more secure. If necessary, we can add permission check to each pseudo file. The downside of the syscall, however, is that all the permission checks are hard-coded in the kernel (except for BPF LSM); while the sys admin can configure permissions of the pseudo files in user space. > Again, each module decides which operations to expose through this syscall. In many cases > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments > that choose this interface remain secure while gaining its advantages. > > >> - Avoids overhead of other kernel interfaces for better efficiency > > > > .. and it is is probably less efficient, because everything need to > > fit in the same API. > > As shown below, the syscall can significantly improve the performance of policy management. > A more detailed benchmark is available in [1]. > > The following table presents the time required to load an AppArmor profile. > > For every cell, the first value is the total time taken by aa-load, and the value in > parentheses is the time spent to load the policy in the kernel only (total - dry‑run). > > Results are in microseconds and are averaged over 10 000 runs to reduce variance. > > > | t (µs) | syscall | pseudofs | Speedup | > |-----------|-------------|-------------|---------------| > | 1password | 4257 (1127) | 3333 (192) | x1.28 (x5.86) | > | Xorg | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) | > I am not sure the performance of loading security policies is on any critical path. The implementation calls the hook for each LSM, which is why I think the syscall is not efficient. Overall, I am still not convinced a syscall for all LSMs is needed. To justify such a syscall, I think we need to show that it is useful in multiple LSMs. Also, if we really want to have single set of APIs for all LSMs, we may also need get_policy, remove_policy, etc. This set as-is appears to be an incomplete design. The implementation, with call_int_hook, is also problematic. It can easily cause some controversial behaviors. Thanks, Song
On 5/7/25 23:06, Song Liu wrote: > On Wed, May 7, 2025 at 8:37 AM Maxime Bélair > <maxime.belair@canonical.com> wrote: > [...] >>> >>> These two do not feel like real benefits: >>> - One syscall cannot fit all use cases well... >> >> This syscall is not intended to cover every case, nor to replace existing kernel >> interfaces. >> >> Each LSM can decide which operations it wants to support (if any). For example, when >> loading policies, an LSM may choose to allow only policies that further restrict >> privileges. >> >>> - Not working in containers is often not an issue, but a feature. >> >> Indeed, using this syscall requires appropriate capabilities and will not permit >> unprivileged containers to manage policies arbitrarily. >> >> With this syscall, capability checks remain the responsibility of each LSM. >> >> For instance, in the AppArmor patch, a profile can be loaded only if >> aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design, >> policies can be loaded only in the current namespace. >> >> I see this syscall as a middle point between exposing the entire sysfs, creating a large >> attack surface, and blocking everything. >> >> Landlock’s existing syscalls already improve security by allowing processes to further >> restrict their ambient rights while adding only a modest attack surface. >> >> This syscall is a further step in that direction: it lets LSMs add restrictive policies >> without requiring exposing every other interface. > > I don't think a syscall makes the API more secure. If necessary, we can add It exposes a different attack surface. Requiring mounting of the fs to where it is visible in the container, provides attack surface, and requires additional external configuration. Then there is the whole issue of getting the various LSMs to allow another LSM in the stack to be able manage its own policy. > permission check to each pseudo file. The downside of the syscall, however, > is that all the permission checks are hard-coded in the kernel (except for The permission checks don't have to be hard coded. Each LSM can define how it handles or manages the syscall. The default is that it isn't supported, but if an lsm decides to support it, there is now reason that its policy can't determine the use of the syscall. > BPF LSM); while the sys admin can configure permissions of the pseudo > files in user space. > Other LSMs also have policy that can control access to pseudo filesystems and other resources. Again, the control doesn't have to be hard coded. And seccomp can be used to block the syscall. >> Again, each module decides which operations to expose through this syscall. In many cases >> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments >> that choose this interface remain secure while gaining its advantages. >> >>>> - Avoids overhead of other kernel interfaces for better efficiency >>> >>> .. and it is is probably less efficient, because everything need to >>> fit in the same API. >> >> As shown below, the syscall can significantly improve the performance of policy management. >> A more detailed benchmark is available in [1]. >> >> The following table presents the time required to load an AppArmor profile. >> >> For every cell, the first value is the total time taken by aa-load, and the value in >> parentheses is the time spent to load the policy in the kernel only (total - dry‑run). >> >> Results are in microseconds and are averaged over 10 000 runs to reduce variance. >> >> >> | t (µs) | syscall | pseudofs | Speedup | >> |-----------|-------------|-------------|---------------| >> | 1password | 4257 (1127) | 3333 (192) | x1.28 (x5.86) | >> | Xorg | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) | >> > > I am not sure the performance of loading security policies is on any > critical path. generally speaking I agree, but I am also not going to turn down a performance improvement either. Its a nice to have, but not a strong argument for need. > The implementation calls the hook for each LSM, which is why I think the > syscall is not efficient. > it should only call the LSM identified by the lsmid in the call. > Overall, I am still not convinced a syscall for all LSMs is needed. To > justify such its not needed by all LSMs, just a subset of them, and some nebulous subset of potentially future LSMs that is entirely undefinable. If we had had appropriate LSM syscalls landlock wouldn't have needed to have landlock specific syscalls. Having another LSM go that route feels wrong especially now that we have some LSM syscalls. If a syscall is needed by an LSM its better to try hashing something out that might have utility for multiple LSMs or at the very least, potentially have utility in the future. > a syscall, I think we need to show that it is useful in multiple LSMs. > Also, if we > really want to have single set of APIs for all LSMs, we may also need > get_policy, We are never going to get a single set of APIs for all LSMs. I will settle for an api that has utility for a subset > remove_policy, etc. This set as-is appears to be an incomplete design. The To have a complete design, there needs to be feedback and discussion from multiple LSMs. This is a starting point. > implementation, with call_int_hook, is also problematic. It can easily > cause some> controversial behaviors. > agreed it shouldn't be doing a straight call_int_hook, it should only call it against the lsm identified by the lsmid
On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote: > On 5/7/25 23:06, Song Liu wrote: > > On Wed, May 7, 2025 at 8:37 AM Maxime Bélair > > <maxime.belair@canonical.com> wrote: > > [...] > > > > > > > > These two do not feel like real benefits: > > > > - One syscall cannot fit all use cases well... > > > > > > This syscall is not intended to cover every case, nor to replace existing kernel > > > interfaces. > > > > > > Each LSM can decide which operations it wants to support (if any). For example, when > > > loading policies, an LSM may choose to allow only policies that further restrict > > > privileges. > > > > > > > - Not working in containers is often not an issue, but a feature. > > > > > > Indeed, using this syscall requires appropriate capabilities and will not permit > > > unprivileged containers to manage policies arbitrarily. > > > > > > With this syscall, capability checks remain the responsibility of each LSM. > > > > > > For instance, in the AppArmor patch, a profile can be loaded only if > > > aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design, > > > policies can be loaded only in the current namespace. > > > > > > I see this syscall as a middle point between exposing the entire sysfs, creating a large > > > attack surface, and blocking everything. > > > > > > Landlock’s existing syscalls already improve security by allowing processes to further > > > restrict their ambient rights while adding only a modest attack surface. > > > > > > This syscall is a further step in that direction: it lets LSMs add restrictive policies > > > without requiring exposing every other interface. > > > > I don't think a syscall makes the API more secure. If necessary, we can add > > It exposes a different attack surface. Requiring mounting of the fs to where it is visible > in the container, provides attack surface, and requires additional external configuration. We should also keep in mind that syscalls could be accessible from everywhere, by everyone, which may increase the attack surface compared to a privileged filesystem interface. Adding a second interface may also introduce issues. Anyway, I'm definitely not against syscalls, but I don't see why the filesystem interface would be "less secure" in this context. > > Then there is the whole issue of getting the various LSMs to allow another LSM in the > stack to be able manage its own policy. Right, and it's a similar issue with seccomp policies wrt syscalls. > > > permission check to each pseudo file. The downside of the syscall, however, > > is that all the permission checks are hard-coded in the kernel (except for > > The permission checks don't have to be hard coded. Each LSM can define how it handles > or manages the syscall. The default is that it isn't supported, but if an lsm decides > to support it, there is now reason that its policy can't determine the use of the > syscall. From an interface design point of view, it would be better to clearly specify the scope of a command (e.g. which components could be impacted by a command), and make sure the documentation reflect that as well. Even better, have a syscalls per required privileges and impact (e.g. privileged or unprivileged). Going this road, I'm not sure if a privileged syscall would make sense given the existing filesystem interface. > > > BPF LSM); while the sys admin can configure permissions of the pseudo > > files in user space. > > > Other LSMs also have policy that can control access to pseudo filesystems and > other resources. Again, the control doesn't have to be hard coded. And seccomp can > be used to block the syscall. > > > > > > Again, each module decides which operations to expose through this syscall. In many cases > > > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments > > > that choose this interface remain secure while gaining its advantages. > > > > > > > > - Avoids overhead of other kernel interfaces for better efficiency > > > > > > > > .. and it is is probably less efficient, because everything need to > > > > fit in the same API. > > > > > > As shown below, the syscall can significantly improve the performance of policy management. > > > A more detailed benchmark is available in [1]. > > > > > > The following table presents the time required to load an AppArmor profile. > > > > > > For every cell, the first value is the total time taken by aa-load, and the value in > > > parentheses is the time spent to load the policy in the kernel only (total - dry‑run). > > > > > > Results are in microseconds and are averaged over 10 000 runs to reduce variance. > > > > > > > > > | t (µs) | syscall | pseudofs | Speedup | > > > |-----------|-------------|-------------|---------------| > > > | 1password | 4257 (1127) | 3333 (192) | x1.28 (x5.86) | > > > | Xorg | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) | > > > > > > > I am not sure the performance of loading security policies is on any > > critical path. > > generally speaking I agree, but I am also not going to turn down a > performance improvement either. Its a nice to have, but not a strong > argument for need. > > > The implementation calls the hook for each LSM, which is why I think the > > syscall is not efficient. > > > it should only call the LSM identified by the lsmid in the call. > > > Overall, I am still not convinced a syscall for all LSMs is needed. To > > justify such > > its not needed by all LSMs, just a subset of them, and some nebulous > subset of potentially future LSMs that is entirely undefinable. > > If we had had appropriate LSM syscalls landlock wouldn't have needed > to have landlock specific syscalls. Having another LSM go that route > feels wrong especially now that we have some LSM syscalls. I don't agree. Dedicated syscalls are a good thing. See my other reply. > If a > syscall is needed by an LSM its better to try hashing something out > that might have utility for multiple LSMs or at the very least, > potentially have utility in the future. > > > > a syscall, I think we need to show that it is useful in multiple LSMs. > > Also, if we > > really want to have single set of APIs for all LSMs, we may also need > > get_policy, > > We are never going to get a single set of APIs for all LSMs. I will > settle for an api that has utility for a subset > > > remove_policy, etc. This set as-is appears to be an incomplete design. The > > To have a complete design, there needs to be feedback and discussion > from multiple LSMs. This is a starting point. > > > implementation, with call_int_hook, is also problematic. It can easily > > cause some> controversial behaviors. > > > agreed it shouldn't be doing a straight call_int_hook, it should only > call it against the lsm identified by the lsmid Yes, but then, I don't see the point of a "generic" LSM syscall.
On 5/9/25 03:26, Mickaël Salaün wrote: > On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote: >> On 5/7/25 23:06, Song Liu wrote: >>> On Wed, May 7, 2025 at 8:37 AM Maxime Bélair >>> <maxime.belair@canonical.com> wrote: >>> [...] >>>>> >>>>> These two do not feel like real benefits: >>>>> - One syscall cannot fit all use cases well... >>>> >>>> This syscall is not intended to cover every case, nor to replace existing kernel >>>> interfaces. >>>> >>>> Each LSM can decide which operations it wants to support (if any). For example, when >>>> loading policies, an LSM may choose to allow only policies that further restrict >>>> privileges. >>>> >>>>> - Not working in containers is often not an issue, but a feature. >>>> >>>> Indeed, using this syscall requires appropriate capabilities and will not permit >>>> unprivileged containers to manage policies arbitrarily. >>>> >>>> With this syscall, capability checks remain the responsibility of each LSM. >>>> >>>> For instance, in the AppArmor patch, a profile can be loaded only if >>>> aa_policy_admin_capable() succeeds (which requires CAP_MAC_ADMIN). Moreover, by design, >>>> policies can be loaded only in the current namespace. >>>> >>>> I see this syscall as a middle point between exposing the entire sysfs, creating a large >>>> attack surface, and blocking everything. >>>> >>>> Landlock’s existing syscalls already improve security by allowing processes to further >>>> restrict their ambient rights while adding only a modest attack surface. >>>> >>>> This syscall is a further step in that direction: it lets LSMs add restrictive policies >>>> without requiring exposing every other interface. >>> >>> I don't think a syscall makes the API more secure. If necessary, we can add >> >> It exposes a different attack surface. Requiring mounting of the fs to where it is visible >> in the container, provides attack surface, and requires additional external configuration. > > We should also keep in mind that syscalls could be accessible from > everywhere, by everyone, which may increase the attack surface compared > to a privileged filesystem interface. Adding a second interface may > also introduce issues. Anyway, I'm definitely not against syscalls, but > I don't see why the filesystem interface would be "less secure" in this > context. > yes syscalls being accessible from everywhere is another form of attack surface, that needs to be mediated. the fs can be mediated, its expose is a multiple lsms with multiple different interfaces on the files within it. What really is more problematic is makng the fs available in the container. Yes a container manager can do it but then you are dependent on the container manager making your interface available. Other wise you are looking at making mount available to your app within the container. >> >> Then there is the whole issue of getting the various LSMs to allow another LSM in the >> stack to be able manage its own policy. > > Right, and it's a similar issue with seccomp policies wrt syscalls. > yes, though seccomp I have found to be the easier one to deal with >> >>> permission check to each pseudo file. The downside of the syscall, however, >>> is that all the permission checks are hard-coded in the kernel (except for >> >> The permission checks don't have to be hard coded. Each LSM can define how it handles >> or manages the syscall. The default is that it isn't supported, but if an lsm decides >> to support it, there is now reason that its policy can't determine the use of the >> syscall. > > From an interface design point of view, it would be better to clearly > specify the scope of a command (e.g. which components could be impacted > by a command), and make sure the documentation reflect that as well. > Even better, have a syscalls per required privileges and impact (e.g. > privileged or unprivileged). Going this road, I'm not sure if a > privileged syscall would make sense given the existing filesystem > interface. > uhhhmmm, not just privileged. As you well know we are looking to use this for unprivileged policy. The LSM can limit to privileged if it wants but it doesn't have to limit it to privileged policy. >> >>> BPF LSM); while the sys admin can configure permissions of the pseudo >>> files in user space. >>> >> Other LSMs also have policy that can control access to pseudo filesystems and >> other resources. Again, the control doesn't have to be hard coded. And seccomp can >> be used to block the syscall. >> >> >> >>>> Again, each module decides which operations to expose through this syscall. In many cases >>>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments >>>> that choose this interface remain secure while gaining its advantages. >>>> >>>>>> - Avoids overhead of other kernel interfaces for better efficiency >>>>> >>>>> .. and it is is probably less efficient, because everything need to >>>>> fit in the same API. >>>> >>>> As shown below, the syscall can significantly improve the performance of policy management. >>>> A more detailed benchmark is available in [1]. >>>> >>>> The following table presents the time required to load an AppArmor profile. >>>> >>>> For every cell, the first value is the total time taken by aa-load, and the value in >>>> parentheses is the time spent to load the policy in the kernel only (total - dry‑run). >>>> >>>> Results are in microseconds and are averaged over 10 000 runs to reduce variance. >>>> >>>> >>>> | t (µs) | syscall | pseudofs | Speedup | >>>> |-----------|-------------|-------------|---------------| >>>> | 1password | 4257 (1127) | 3333 (192) | x1.28 (x5.86) | >>>> | Xorg | 6099 (2961) | 5167 (2020) | x1.18 (x1.47) | >>>> >>> >>> I am not sure the performance of loading security policies is on any >>> critical path. >> >> generally speaking I agree, but I am also not going to turn down a >> performance improvement either. Its a nice to have, but not a strong >> argument for need. >> >>> The implementation calls the hook for each LSM, which is why I think the >>> syscall is not efficient. >>> >> it should only call the LSM identified by the lsmid in the call. >> >>> Overall, I am still not convinced a syscall for all LSMs is needed. To >>> justify such >> >> its not needed by all LSMs, just a subset of them, and some nebulous >> subset of potentially future LSMs that is entirely undefinable. >> >> If we had had appropriate LSM syscalls landlock wouldn't have needed >> to have landlock specific syscalls. Having another LSM go that route >> feels wrong especially now that we have some LSM syscalls. > > I don't agree. Dedicated syscalls are a good thing. See my other > reply. > I think we can just disagree on this point. >> If a >> syscall is needed by an LSM its better to try hashing something out >> that might have utility for multiple LSMs or at the very least, >> potentially have utility in the future. >> >> >>> a syscall, I think we need to show that it is useful in multiple LSMs. >>> Also, if we >>> really want to have single set of APIs for all LSMs, we may also need >>> get_policy, >> >> We are never going to get a single set of APIs for all LSMs. I will >> settle for an api that has utility for a subset >> >>> remove_policy, etc. This set as-is appears to be an incomplete design. The >> >> To have a complete design, there needs to be feedback and discussion >> from multiple LSMs. This is a starting point. >> >>> implementation, with call_int_hook, is also problematic. It can easily >>> cause some> controversial behaviors. >>> >> agreed it shouldn't be doing a straight call_int_hook, it should only >> call it against the lsm identified by the lsmid > > Yes, but then, I don't see the point of a "generic" LSM syscall. its not a generic LSM syscall. Its a syscall or maybe a set of syscalls for a specific scoped problem of loading/managing policy. Can we come to something acceptable? I don't know but we are going to look at it before trying for an apparmor specific syscall.
On Sun, May 11, 2025 at 03:47:21AM -0700, John Johansen wrote: > On 5/9/25 03:26, Mickaël Salaün wrote: > > On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote: > > > On 5/7/25 23:06, Song Liu wrote: > > > > On Wed, May 7, 2025 at 8:37 AM Maxime Bélair > > > > <maxime.belair@canonical.com> wrote: > > > > [...] > > > > permission check to each pseudo file. The downside of the syscall, however, > > > > is that all the permission checks are hard-coded in the kernel (except for > > > > > > The permission checks don't have to be hard coded. Each LSM can define how it handles > > > or manages the syscall. The default is that it isn't supported, but if an lsm decides > > > to support it, there is now reason that its policy can't determine the use of the > > > syscall. > > > > From an interface design point of view, it would be better to clearly > > specify the scope of a command (e.g. which components could be impacted > > by a command), and make sure the documentation reflect that as well. > > Even better, have a syscalls per required privileges and impact (e.g. > > privileged or unprivileged). Going this road, I'm not sure if a > > privileged syscall would make sense given the existing filesystem > > interface. > > > > uhhhmmm, not just privileged. As you well know we are looking to use > this for unprivileged policy. The LSM can limit to privileged if it > wants but it doesn't have to limit it to privileged policy. Yes, I meant to say having a syscall for unprivileged actions, and maybe another one for privileged ones, but this might be a hard sell. :) To say it another way, for your use case, do you need this syscall(s) for privileged operations? Do you plan to drop (or stop extending) the filesystem interface or do you think it would be good for (AppArmor) privileged operations too? I know syscalls might be attractive and could be used for everything, but it's good to have a well-defined plan and semantic to avoid using such syscall as another multiplexer with unrelated operations and required privileges. If this syscall should also be a way to do privileged operations, should we also agree on a common set of permissions (e.g. global CAP_MAC_ADMIN or user namespace one)? [...] > > > > Overall, I am still not convinced a syscall for all LSMs is needed. To > > > > justify such > > > > > > its not needed by all LSMs, just a subset of them, and some nebulous > > > subset of potentially future LSMs that is entirely undefinable. > > > > > > If we had had appropriate LSM syscalls landlock wouldn't have needed > > > to have landlock specific syscalls. Having another LSM go that route > > > feels wrong especially now that we have some LSM syscalls. > > > > I don't agree. Dedicated syscalls are a good thing. See my other > > reply. > > > > I think we can just disagree on this point. > > > > If a > > > syscall is needed by an LSM its better to try hashing something out > > > that might have utility for multiple LSMs or at the very least, > > > potentially have utility in the future. > > > > > > > > > > a syscall, I think we need to show that it is useful in multiple LSMs. > > > > Also, if we > > > > really want to have single set of APIs for all LSMs, we may also need > > > > get_policy, > > > > > > We are never going to get a single set of APIs for all LSMs. I will > > > settle for an api that has utility for a subset > > > > > > > remove_policy, etc. This set as-is appears to be an incomplete design. The > > > > > > To have a complete design, there needs to be feedback and discussion > > > from multiple LSMs. This is a starting point. > > > > > > > implementation, with call_int_hook, is also problematic. It can easily > > > > cause some> controversial behaviors. > > > > > > > agreed it shouldn't be doing a straight call_int_hook, it should only > > > call it against the lsm identified by the lsmid > > > > Yes, but then, I don't see the point of a "generic" LSM syscall. > > its not a generic LSM syscall. Its a syscall or maybe a set of syscalls > for a specific scoped problem of loading/managing policy. > > Can we come to something acceptable? I don't know but we are going to > look at it before trying for an apparmor specific syscall. I understand and it's good to have this discussion.
On 5/12/25 03:20, Mickaël Salaün wrote: > On Sun, May 11, 2025 at 03:47:21AM -0700, John Johansen wrote: >> On 5/9/25 03:26, Mickaël Salaün wrote: >>> On Thu, May 08, 2025 at 01:18:20AM -0700, John Johansen wrote: >>>> On 5/7/25 23:06, Song Liu wrote: >>>>> On Wed, May 7, 2025 at 8:37 AM Maxime Bélair >>>>> <maxime.belair@canonical.com> wrote: >>>>> [...] > >>>>> permission check to each pseudo file. The downside of the syscall, however, >>>>> is that all the permission checks are hard-coded in the kernel (except for >>>> >>>> The permission checks don't have to be hard coded. Each LSM can define how it handles >>>> or manages the syscall. The default is that it isn't supported, but if an lsm decides >>>> to support it, there is now reason that its policy can't determine the use of the >>>> syscall. >>> >>> From an interface design point of view, it would be better to clearly >>> specify the scope of a command (e.g. which components could be impacted >>> by a command), and make sure the documentation reflect that as well. >>> Even better, have a syscalls per required privileges and impact (e.g. >>> privileged or unprivileged). Going this road, I'm not sure if a >>> privileged syscall would make sense given the existing filesystem >>> interface. >>> >> >> uhhhmmm, not just privileged. As you well know we are looking to use >> this for unprivileged policy. The LSM can limit to privileged if it >> wants but it doesn't have to limit it to privileged policy. > > Yes, I meant to say having a syscall for unprivileged actions, and maybe > another one for privileged ones, but this might be a hard sell. :) > indeed, in the apparmor case context would be important. Just exactly what is privileged. It may be a privileged operation to load policy to one namespace, but not to another that you are setting up for a child. > To say it another way, for your use case, do you need this syscall(s) > for privileged operations? Do you plan to drop (or stop extending) the need, probably. That is to say, loading of policy have varying levels of privilege. root within the container has privilege to load policy to its namespace, but it might have authority to setup a child namespace that does not require privilege for it to load policy into, and it will determine if the child has privilege or unprivleged policy within it. Ideally we won't have to use the fs interface within the "privileged" container, as there are cases where this is currently not done or undesirable. > filesystem interface or do you think it would be good for (AppArmor) > privileged operations too? I know syscalls might be attractive and > could be used for everything, but it's good to have a well-defined plan > and semantic to avoid using such syscall as another multiplexer with > unrelated operations and required privileges. > sure. But the privilege level is use case dependent, to which policy namespace is policy being loaded, replaced, ... The privilege level very much will depend on what is in the stack/bounding of policy. > If this syscall should also be a way to do privileged operations, should > we also agree on a common set of permissions (e.g. global CAP_MAC_ADMIN > or user namespace one)? > I think requiring something like CAP_MAC_ADMIN would be a per LSM decision. > [...] > >>>>> Overall, I am still not convinced a syscall for all LSMs is needed. To >>>>> justify such >>>> >>>> its not needed by all LSMs, just a subset of them, and some nebulous >>>> subset of potentially future LSMs that is entirely undefinable. >>>> >>>> If we had had appropriate LSM syscalls landlock wouldn't have needed >>>> to have landlock specific syscalls. Having another LSM go that route >>>> feels wrong especially now that we have some LSM syscalls. >>> >>> I don't agree. Dedicated syscalls are a good thing. See my other >>> reply. >>> >> >> I think we can just disagree on this point. >> >>>> If a >>>> syscall is needed by an LSM its better to try hashing something out >>>> that might have utility for multiple LSMs or at the very least, >>>> potentially have utility in the future. >>>> >>>> >>>>> a syscall, I think we need to show that it is useful in multiple LSMs. >>>>> Also, if we >>>>> really want to have single set of APIs for all LSMs, we may also need >>>>> get_policy, >>>> >>>> We are never going to get a single set of APIs for all LSMs. I will >>>> settle for an api that has utility for a subset >>>> >>>>> remove_policy, etc. This set as-is appears to be an incomplete design. The >>>> >>>> To have a complete design, there needs to be feedback and discussion >>>> from multiple LSMs. This is a starting point. >>>> >>>>> implementation, with call_int_hook, is also problematic. It can easily >>>>> cause some> controversial behaviors. >>>>> >>>> agreed it shouldn't be doing a straight call_int_hook, it should only >>>> call it against the lsm identified by the lsmid >>> >>> Yes, but then, I don't see the point of a "generic" LSM syscall. >> >> its not a generic LSM syscall. Its a syscall or maybe a set of syscalls >> for a specific scoped problem of loading/managing policy. >> >> Can we come to something acceptable? I don't know but we are going to >> look at it before trying for an apparmor specific syscall. > > I understand and it's good to have this discussion.
On 2025/05/08 0:37, Maxime Bélair wrote: > Again, each module decides which operations to expose through this syscall. In many cases > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments > that choose this interface remain secure while gaining its advantages. If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s "cmd" argument. Also, there is prctl() which can already carry string-ish parameters without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ?
On 5/7/25 15:04, Tetsuo Handa wrote: > On 2025/05/08 0:37, Maxime Bélair wrote: >> Again, each module decides which operations to expose through this syscall. In many cases >> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments >> that choose this interface remain secure while gaining its advantages. > > If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s yes that does feel like ioctls(), on the other hand defining them at the LSM level won't offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed to either, it just hashing out what people want, and what is acceptable. > "cmd" argument. Also, there is prctl() which can already carry string-ish parameters > without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ? > prctl() can be used, I used it for the unprivileged policy demo. It has its own set of problems. While LSM policy could be associated with the process doing the load/replacement or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not process specific making prctl() a poor fit. prctl() requires allocating a global prctl() prctl() are already being filtered/controlled by LSMs making them a poort fit for use by an LSM in a stacking situation as it requires updating the policy of other LSMs on the system. Yes seccomp can filter the syscall but that still is an easier barrier to overcome than having to have instruction for how to allow your LSMs prctl() in multiple LSMs. Mickaël already argued the need for landlock to have syscalls. See https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/ and the numerous iterations before that. Ideally those could have been LSM syscalls, with landlock leveraging them. AppArmor is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls, netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so, and ideally any interface we use will be of benefit to some other LSMs in the future.
On Thu, May 08, 2025 at 12:52:55AM -0700, John Johansen wrote: > On 5/7/25 15:04, Tetsuo Handa wrote: > > On 2025/05/08 0:37, Maxime Bélair wrote: > > > Again, each module decides which operations to expose through this syscall. In many cases > > > the operation will still require CAP_SYS_ADMIN or a similar capability, so environments > > > that choose this interface remain secure while gaining its advantages. > > > > If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s > > yes that does feel like ioctls(), on the other hand defining them at the LSM level won't > offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed > to either, it just hashing out what people want, and what is acceptable. > > > "cmd" argument. Also, there is prctl() which can already carry string-ish parameters > > without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ? > > > > prctl() can be used, I used it for the unprivileged policy demo. It has its own set of > problems. While LSM policy could be associated with the process doing the load/replacement > or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not > process specific making prctl() a poor fit. > > prctl() requires allocating a global prctl() > > prctl() are already being filtered/controlled by LSMs making them a poort fit for > use by an LSM in a stacking situation as it requires updating the policy of other > LSMs on the system. Yes seccomp can filter the syscall but that still is an easier > barrier to overcome than having to have instruction for how to allow your LSMs > prctl() in multiple LSMs. > > > Mickaël already argued the need for landlock to have syscalls. See Landlock indeed requires syscalls mainly because of its unprivileged nature. > https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/ > and the numerous iterations before that. This link might be misleading though, it points to an initial version of the syscall proposal (v17) and it was then decided to create one syscall per operation (v34), which is why we ended with 3 syscalls. See the changelog: https://lore.kernel.org/r/20210422154123.13086-9-mic@digikod.net > > Ideally those could have been LSM syscalls, with landlock leveraging them. I don't agree. The Landlock syscalls have a well-defined semantic, with documented security requirements, and they deal with specific kernel objects identified with file descriptors, including a dedicated one: [landlock-ruleset]. For the features provided by these Landlock syscalls, it would not have been a good idea to reuse existing syscalls, nor to rely on the syscall proposed in this series because the interface is too specific to some of the current privileged LSMs (i.e. ingest a policy blob). Making this interface more generic would lead to even less defined semantic though. > AppArmor > is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls, > netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so, I think it would make sense to propose AppArmor-specific syscalls. > and ideally any interface we use will be of benefit to some other LSMs in the future. The LSM syscalls may make sense to deal with LSM blobs managed by the LSM framework (e.g. get/set properties) when the operations are common/generic. Security policies are specific to each LSM and they should implement their own well-defined interface (e.g. filesystem, netlink, syscall). The LSM framework doesn't provide nor manage any security policy, it mainly provides a set of consistent and well-defined kernel hooks with security blobs to enforce a security policy. I don't think it makes sense to add LSM syscalls to manage things not managed by the LSM framework.
On 5/9/25 03:25, Mickaël Salaün wrote: > On Thu, May 08, 2025 at 12:52:55AM -0700, John Johansen wrote: >> On 5/7/25 15:04, Tetsuo Handa wrote: >>> On 2025/05/08 0:37, Maxime Bélair wrote: >>>> Again, each module decides which operations to expose through this syscall. In many cases >>>> the operation will still require CAP_SYS_ADMIN or a similar capability, so environments >>>> that choose this interface remain secure while gaining its advantages. >>> >>> If the interpretation of "flags" argument varies across LSMs, it sounds like ioctl()'s >> >> yes that does feel like ioctls(), on the other hand defining them at the LSM level won't >> offer LSMs flexibility making it so the syscall covers fewer use cases. I am not opposed >> to either, it just hashing out what people want, and what is acceptable. >> >>> "cmd" argument. Also, there is prctl() which can already carry string-ish parameters >>> without involving open(). Why can't we use prctl() instead of lsm_manage_policy() ? >>> >> >> prctl() can be used, I used it for the unprivileged policy demo. It has its own set of >> problems. While LSM policy could be associated with the process doing the load/replacement >> or what ever operation, it isn't necessarily tied to it. A lot of LSM policy is not >> process specific making prctl() a poor fit. >> >> prctl() requires allocating a global prctl() >> >> prctl() are already being filtered/controlled by LSMs making them a poort fit for >> use by an LSM in a stacking situation as it requires updating the policy of other >> LSMs on the system. Yes seccomp can filter the syscall but that still is an easier >> barrier to overcome than having to have instruction for how to allow your LSMs >> prctl() in multiple LSMs. >> >> >> Mickaël already argued the need for landlock to have syscalls. See > > Landlock indeed requires syscalls mainly because of its unprivileged > nature. > yes that is the dominant reason >> https://lore.kernel.org/lkml/20200511192156.1618284-7-mic@digikod.net/ >> and the numerous iterations before that. > > This link might be misleading though, it points to an initial version of > the syscall proposal (v17) and it was then decided to create one syscall > per operation (v34), which is why we ended with 3 syscalls. See the > changelog: > https://lore.kernel.org/r/20210422154123.13086-9-mic@digikod.net > yes and no. I am well aware landlock's syscall got split into three syscalls. All I was trying to do is reference to the start of the discussion on why landlock needed a syscall(s). I thought the details of why you have three etc, really didn't add to the discussion. But yeah not also pointing to v34 could be considered misleading. >> >> Ideally those could have been LSM syscalls, with landlock leveraging them. > > I don't agree. The Landlock syscalls have a well-defined semantic, with First I don't begrudge Landlock its syscalls, I think at the time it was the only way forward. > documented security requirements, and they deal with specific kernel > objects identified with file descriptors, including a dedicated one: > [landlock-ruleset]. I am aware. Those semantics could have been kept and documented, within a set of LSM syscalls. Yes landlock's syscalls shouldn't have been done behind a single LSM syscall, I am not advocating for that but maybe behind several LSM syscalls. > For the features provided by these Landlock > syscalls, it would not have been a good idea to reuse existing syscalls, > nor to rely on the syscall proposed in this series because the interface > is too specific to some of the current privileged LSMs (i.e. ingest a > policy blob). Making this interface more generic would lead to even > less defined semantic though. Right, so again not a generic LSM syscall. But "generic" LSM syscalls for certain purposes. Let me walk my statement back a little, what I find unfortunate was that the landlock LSM syscalls didn't get discussed as a set of generic LSM syscall's with landlock being the first to implement them. The question is hashing out where the generic semantics are vs. the individual LSMs. Having an LSM syscall to deal with specific kernel objects idenetified with file descriptors, and allowing each LSMs to deal with that if it needs is possible. Its a matter of figuring something out. It could be it turns out it is not worth it. And some individual LSM syscalls like landlocks are the way to go, its that it wasn't explored. I don't fault you, and think it really wasn't even an option at the time. > >> AppArmor >> is getting to where it has similar needs to landlock. Yes we can use ioctls, prctls, >> netlink, the fs, etc. it doesn't mean that those are the best interfaces to do so, > > I think it would make sense to propose AppArmor-specific syscalls. > that may be the case, but I think we should explore providing a more LSM generic interface first. >> and ideally any interface we use will be of benefit to some other LSMs in the future. > > The LSM syscalls may make sense to deal with LSM blobs managed by the > LSM framework (e.g. get/set properties) when the operations are > common/generic. > > Security policies are specific to each LSM and they should implement > their own well-defined interface (e.g. filesystem, netlink, syscall). > policies at some level are just blobs too. It is worth at least exploring whether there can be a common interface. > The LSM framework doesn't provide nor manage any security policy, it > mainly provides a set of consistent and well-defined kernel hooks with > security blobs to enforce a security policy. I don't think it makes > sense to add LSM syscalls to manage things not managed by the LSM > framework. we aren't talking about the LSM framework managing security policy, just whether it makes sense for it to provide a common interface that an LSM can choose to use to provide it a blob of policy that it can then manage. Its just a mechanism. This isn't all that different than using the filesystem, netlink, or other mechanisms to shuttle the blob between userspace to the kernel, and then the LSM manages its policy and data. The big difference is that using the syscall opens unprivileged policy up to the LSM more broadly. If we are going to go the syscall route for apparmor, we might as well see if we can't make that mechanism more broadly available, and make it easier for other LSMs in the future. Again, it might turn out its a fools errand, and we have to do an apparmor specific syscall, but it is worth exploring first.
© 2016 - 2025 Red Hat, Inc.