From: Hongru Zhang <zhanghongru@xiaomi.com>
On mobile device high-load situations, permission check can happen
more than 90,000/s (8 core system). With default 512 cache nodes
configuration, avc cache miss happens more often and occasionally
leads to long time (>2ms) irqs off on both big and little cores,
which decreases system real-time capability.
An actual call stack is as follows:
=> avc_compute_av
=> avc_perm_nonode
=> avc_has_perm_noaudit
=> selinux_capable
=> security_capable
=> capable
=> __sched_setscheduler
=> do_sched_setscheduler
=> __arm64_sys_sched_setscheduler
=> invoke_syscall
=> el0_svc_common
=> do_el0_svc
=> el0_svc
=> el0t_64_sync_handler
=> el0t_64_sync
Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
to mitigate long time irqs off, hash conflicts make the bucket average
length longer because of the fixed size of cache slots, leading to
avc_search_node latency increase.
Make avc cache slot size also configurable, and with fine tuning, we can
mitigate long time irqs off with slightly avc_search_node performance
regression.
Theoretically, the main overhead is memory consumption.
avc_search_node avg latency test results (about 100,000,000 times) on
Qcom SM8750, 6.6.30-android15-8:
Case 1:
+---------+---------------------+------------------------+
| | no-patch (512/512) | with-patch (512/512) |
+---------+---------------------+------------------------+
| latency | 85 ns | 87 ns |
+---------+---------------------+------------------------+
Case 2:
+---------+---------------------+------------------------+
| | no-patch (8192/512) | with-patch (8192/8192) |
+---------+---------------------+------------------------+
| latency | 277 ns | 106 ns |
+---------+---------------------+------------------------+
Case 1 shows 512 nodes configuration has ~2% performance regression
with patch.
Case 2 shows 8192 nodes configuration has ~61% latency benifit with
patch.
Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
---
.../admin-guide/kernel-parameters.txt | 4 ++
security/selinux/avc.c | 68 +++++++++++++------
2 files changed, 50 insertions(+), 22 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..70dc6d659117 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6620,6 +6620,10 @@
1 -- enable.
Default value is 1.
+ selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size.
+ Format: <int> (must be >0, power of 2)
+ Default: 512
+
serialnumber [BUGS=X86-32]
sev=option[,option...] [X86-64]
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 430b0e23ee00..7a7f88012865 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -34,7 +34,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/avc.h>
-#define AVC_CACHE_SLOTS 512
+static int avc_cache_slots __ro_after_init = 512;
#define AVC_DEF_CACHE_THRESHOLD 512
#define AVC_CACHE_RECLAIM 16
@@ -68,9 +68,13 @@ struct avc_xperms_node {
struct list_head xpd_head; /* list head of extended_perms_decision */
};
+struct avc_slot {
+ struct hlist_head slot; /* head for avc_node->list */
+ spinlock_t slot_lock; /* lock for writes */
+};
+
struct avc_cache {
- struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
- spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
+ struct avc_slot *slots;
atomic_t lru_hint; /* LRU hint for reclaim scan */
atomic_t active_nodes;
u32 latest_notif; /* latest revocation notification */
@@ -93,14 +97,34 @@ struct selinux_avc {
static struct selinux_avc selinux_avc;
+static int __init set_selinux_avc_cache_slots(char *str)
+{
+ int val;
+
+ if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) {
+ pr_warn("Unable to set selinux_avc_cache_slots, use default value\n");
+ return 1;
+ }
+
+ avc_cache_slots = val;
+
+ return 1;
+}
+__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots);
+
void selinux_avc_init(void)
{
int i;
+ selinux_avc.avc_cache.slots =
+ kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL);
+ if (!selinux_avc.avc_cache.slots)
+ panic("SELinux: No memory to alloc avc cache slots\n");
+
selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]);
- spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]);
+ for (i = 0; i < avc_cache_slots; i++) {
+ INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot);
+ spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock);
}
atomic_set(&selinux_avc.avc_cache.active_nodes, 0);
atomic_set(&selinux_avc.avc_cache.lru_hint, 0);
@@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init;
static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
{
- return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
+ return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
}
/**
@@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page)
slots_used = 0;
max_chain_len = 0;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
if (!hlist_empty(head)) {
slots_used++;
chain_len = 0;
@@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page)
return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
"longest chain: %d\n",
atomic_read(&selinux_avc.avc_cache.active_nodes),
- slots_used, AVC_CACHE_SLOTS, max_chain_len);
+ slots_used, avc_cache_slots, max_chain_len);
}
/*
@@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void)
struct hlist_head *head;
spinlock_t *lock;
- for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
+ for (try = 0, ecx = 0; try < avc_cache_slots; try++) {
hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) &
- (AVC_CACHE_SLOTS - 1);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ (avc_cache_slots - 1);
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
if (!spin_trylock_irqsave(lock, flags))
continue;
@@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
struct hlist_head *head;
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
hlist_for_each_entry_rcu(node, head, list) {
if (ssid == node->ae.ssid &&
tclass == node->ae.tclass &&
@@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass,
}
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
hlist_for_each_entry(pos, head, list) {
if (pos->ae.ssid == ssid &&
@@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
/* Lock the target slot */
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
@@ -929,9 +953,9 @@ static void avc_flush(void)
unsigned long flag;
int i;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
- lock = &selinux_avc.avc_cache.slots_lock[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
+ lock = &selinux_avc.avc_cache.slots[i].slot_lock;
spin_lock_irqsave(lock, flag);
/*
--
2.43.0
On Sep 26, 2025 Hongru Zhang <zhanghongru06@gmail.com> wrote: > > On mobile device high-load situations, permission check can happen > more than 90,000/s (8 core system). With default 512 cache nodes > configuration, avc cache miss happens more often and occasionally > leads to long time (>2ms) irqs off on both big and little cores, > which decreases system real-time capability. > > An actual call stack is as follows: > => avc_compute_av > => avc_perm_nonode > => avc_has_perm_noaudit > => selinux_capable > => security_capable > => capable > => __sched_setscheduler > => do_sched_setscheduler > => __arm64_sys_sched_setscheduler > => invoke_syscall > => el0_svc_common > => do_el0_svc > => el0_svc > => el0t_64_sync_handler > => el0t_64_sync > > Although we can expand avc nodes through /sys/fs/selinux/cache_threshold > to mitigate long time irqs off, hash conflicts make the bucket average > length longer because of the fixed size of cache slots, leading to > avc_search_node latency increase. > > Make avc cache slot size also configurable, and with fine tuning, we can > mitigate long time irqs off with slightly avc_search_node performance > regression. > > Theoretically, the main overhead is memory consumption. > > avc_search_node avg latency test results (about 100,000,000 times) on > Qcom SM8750, 6.6.30-android15-8: > > Case 1: > +---------+---------------------+------------------------+ > | | no-patch (512/512) | with-patch (512/512) | > +---------+---------------------+------------------------+ > | latency | 85 ns | 87 ns | > +---------+---------------------+------------------------+ > > Case 2: > +---------+---------------------+------------------------+ > | | no-patch (8192/512) | with-patch (8192/8192) | > +---------+---------------------+------------------------+ > | latency | 277 ns | 106 ns | > +---------+---------------------+------------------------+ > > Case 1 shows 512 nodes configuration has ~2% performance regression > with patch. > Case 2 shows 8192 nodes configuration has ~61% latency benifit with > patch. > > Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com> > Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> > --- > .../admin-guide/kernel-parameters.txt | 4 ++ > security/selinux/avc.c | 68 +++++++++++++------ > 2 files changed, 50 insertions(+), 22 deletions(-) I would expect the number of active AVC nodes, and AVC churn in general, to be very policy dependent; some policies and use cases simply result in more AVC nodes than others. With that in mind, I'm wondering if instead of using a kernel command line parameter to specify the number of AVC buckets, we should instead include an AVC size "hint" in the policy that we can use to size the AVC when loading a new policy. Thoughts? I think it would be important to consider it strictly as a "hint" as that would make life easier, e.g. if the previous policy hinted at a larger AVC we may not want to bother with reducing the number of buckets. I would suggest starting with an implementation that uses the hint as a power of two for the number of AVC slots/buckets, with a value of '0' indicating a default value (512 slots, e.g. '2^9'). -- paul-moore.com
On Thu, Oct 16, 2025 at 5:18 PM Paul Moore <paul@paul-moore.com> wrote: > > On Sep 26, 2025 Hongru Zhang <zhanghongru06@gmail.com> wrote: > > > > On mobile device high-load situations, permission check can happen > > more than 90,000/s (8 core system). With default 512 cache nodes > > configuration, avc cache miss happens more often and occasionally > > leads to long time (>2ms) irqs off on both big and little cores, > > which decreases system real-time capability. > > > > An actual call stack is as follows: > > => avc_compute_av > > => avc_perm_nonode > > => avc_has_perm_noaudit > > => selinux_capable > > => security_capable > > => capable > > => __sched_setscheduler > > => do_sched_setscheduler > > => __arm64_sys_sched_setscheduler > > => invoke_syscall > > => el0_svc_common > > => do_el0_svc > > => el0_svc > > => el0t_64_sync_handler > > => el0t_64_sync > > > > Although we can expand avc nodes through /sys/fs/selinux/cache_threshold > > to mitigate long time irqs off, hash conflicts make the bucket average > > length longer because of the fixed size of cache slots, leading to > > avc_search_node latency increase. > > > > Make avc cache slot size also configurable, and with fine tuning, we can > > mitigate long time irqs off with slightly avc_search_node performance > > regression. > > > > Theoretically, the main overhead is memory consumption. > > > > avc_search_node avg latency test results (about 100,000,000 times) on > > Qcom SM8750, 6.6.30-android15-8: > > > > Case 1: > > +---------+---------------------+------------------------+ > > | | no-patch (512/512) | with-patch (512/512) | > > +---------+---------------------+------------------------+ > > | latency | 85 ns | 87 ns | > > +---------+---------------------+------------------------+ > > > > Case 2: > > +---------+---------------------+------------------------+ > > | | no-patch (8192/512) | with-patch (8192/8192) | > > +---------+---------------------+------------------------+ > > | latency | 277 ns | 106 ns | > > +---------+---------------------+------------------------+ > > > > Case 1 shows 512 nodes configuration has ~2% performance regression > > with patch. > > Case 2 shows 8192 nodes configuration has ~61% latency benifit with > > patch. > > > > Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com> > > Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com> > > --- > > .../admin-guide/kernel-parameters.txt | 4 ++ > > security/selinux/avc.c | 68 +++++++++++++------ > > 2 files changed, 50 insertions(+), 22 deletions(-) > > I would expect the number of active AVC nodes, and AVC churn in general, > to be very policy dependent; some policies and use cases simply result in > more AVC nodes than others. With that in mind, I'm wondering if instead > of using a kernel command line parameter to specify the number of AVC > buckets, we should instead include an AVC size "hint" in the policy that > we can use to size the AVC when loading a new policy. > > Thoughts? > > I think it would be important to consider it strictly as a "hint" as > that would make life easier, e.g. if the previous policy hinted at a > larger AVC we may not want to bother with reducing the number of buckets. > I would suggest starting with an implementation that uses the hint as a > power of two for the number of AVC slots/buckets, with a value of '0' > indicating a default value (512 slots, e.g. '2^9'). So, aside from Hongru's points about this requiring a change to the binary policy format and compiler and introducing possible atomicity/locking issues in the AVC code when accessing the number of buckets, I am also uncertain that this is something that is fully determinable from policy alone. A small AVC might be fine even with a large policy depending on the actual workload that is running on the system in question. Hence, I was fine with this being a kernel parameter. If we did want to introduce dynamism here despite these considerations, then two possibilities come to mind: 1. Allow it to be set/modified via a new /sys/fs/selinux/avc node in the same manner as other existing tunables in selinuxfs. This avoids the need to modify the policy format / compiler and allows tuning on an end system based on its workload. and/or 2. Compute the AVC number of buckets based on the policy avtab size which is already available from existing binary policies. This likewise avoids the need to modify the policy format / compiler while automatically tuning the number of buckets based on the size of the policy.
On Fri, Oct 17, 2025 at 7:59 AM Stephen Smalley <stephen.smalley.work@gmail.com> wrote: > On Thu, Oct 16, 2025 at 5:18 PM Paul Moore <paul@paul-moore.com> wrote: > > On Sep 26, 2025 Hongru Zhang <zhanghongru06@gmail.com> wrote: ... > > I would expect the number of active AVC nodes, and AVC churn in general, > > to be very policy dependent; some policies and use cases simply result in > > more AVC nodes than others. With that in mind, I'm wondering if instead > > of using a kernel command line parameter to specify the number of AVC > > buckets, we should instead include an AVC size "hint" in the policy that > > we can use to size the AVC when loading a new policy. > > > > Thoughts? > > > > I think it would be important to consider it strictly as a "hint" as > > that would make life easier, e.g. if the previous policy hinted at a > > larger AVC we may not want to bother with reducing the number of buckets. > > I would suggest starting with an implementation that uses the hint as a > > power of two for the number of AVC slots/buckets, with a value of '0' > > indicating a default value (512 slots, e.g. '2^9'). > > So, aside from Hongru's points about this requiring a change to the > binary policy format and compiler and introducing possible > atomicity/locking issues in the AVC code when accessing the number of > buckets ... I know you've heard me say this before, but for the sake of those who haven't, "because it's a lot of work" isn't something that I consider to be a valid excuse. It's fine, and good (!), to explain the work needed to successfully make a change, but I have an almost allergic reaction to those who use the amount of work needed as an argument against doing The Right Thing. > I am also uncertain that this is something that is fully > determinable from policy alone. Agreed, but if we are going to make this changeable, I'd rather see it as something that could be changed without requiring a reboot. Not wanting to add yet another selinuxfs node, and seeing *some* relationship between AVC size and policy, adding a AVC size hint to the policy seems reasonable. However, as I mentioned in my reply to Hongru, we may be able to solve the immediate problem with a Kconfig tunable. -- paul-moore.com
> On Sep 26, 2025 Hongru Zhang <zhanghongru06@gmail.com> wrote:
> >
> > On mobile device high-load situations, permission check can happen
> > more than 90,000/s (8 core system). With default 512 cache nodes
> > configuration, avc cache miss happens more often and occasionally
> > leads to long time (>2ms) irqs off on both big and little cores,
> > which decreases system real-time capability.
> >
> > An actual call stack is as follows:
> > => avc_compute_av
> > => avc_perm_nonode
> > => avc_has_perm_noaudit
> > => selinux_capable
> > => security_capable
> > => capable
> > => __sched_setscheduler
> > => do_sched_setscheduler
> > => __arm64_sys_sched_setscheduler
> > => invoke_syscall
> > => el0_svc_common
> > => do_el0_svc
> > => el0_svc
> > => el0t_64_sync_handler
> > => el0t_64_sync
> >
> > Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
> > to mitigate long time irqs off, hash conflicts make the bucket average
> > length longer because of the fixed size of cache slots, leading to
> > avc_search_node latency increase.
> >
> > Make avc cache slot size also configurable, and with fine tuning, we can
> > mitigate long time irqs off with slightly avc_search_node performance
> > regression.
> >
> > Theoretically, the main overhead is memory consumption.
> >
> > avc_search_node avg latency test results (about 100,000,000 times) on
> > Qcom SM8750, 6.6.30-android15-8:
> >
> > Case 1:
> > +---------+---------------------+------------------------+
> > | | no-patch (512/512) | with-patch (512/512) |
> > +---------+---------------------+------------------------+
> > | latency | 85 ns | 87 ns |
> > +---------+---------------------+------------------------+
> >
> > Case 2:
> > +---------+---------------------+------------------------+
> > | | no-patch (8192/512) | with-patch (8192/8192) |
> > +---------+---------------------+------------------------+
> > | latency | 277 ns | 106 ns |
> > +---------+---------------------+------------------------+
> >
> > Case 1 shows 512 nodes configuration has ~2% performance regression
> > with patch.
> > Case 2 shows 8192 nodes configuration has ~61% latency benifit with
> > patch.
> >
> > Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
> > Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
> > ---
> > .../admin-guide/kernel-parameters.txt | 4 ++
> > security/selinux/avc.c | 68 +++++++++++++------
> > 2 files changed, 50 insertions(+), 22 deletions(-)
>
> I would expect the number of active AVC nodes, and AVC churn in general,
> to be very policy dependent; some policies and use cases simply result in
> more AVC nodes than others. With that in mind, I'm wondering if instead
> of using a kernel command line parameter to specify the number of AVC
> buckets, we should instead include an AVC size "hint" in the policy that
> we can use to size the AVC when loading a new policy.
>
> Thoughts?
I previously considered supporting dynamic adjustment of slot size during
runtime, but this seems to introduce code complexity and overhead. Every
time avc_lookup() or avc_insert() happens, we would need to check if the
table exists. Adjusting slot size and accessing a specific slot might
occur simultaneously, potentially requiring additional lock protection.
When increasing slot size, we could directly copy the contents from the
old table. When decreasing slot size, nodes exceeding the new slot size
would need to be re-hashed and attached to appropriate positions.
On my Android device, policies are fixed before system image release and
don't change or load dynamically during system running. Using kernel
parameters for adjustment ensures no additional locks or checks are needed
during runtime table access, maintaining simplicity and efficiency of the
lookup code.
If this is necessary, I would appreciate some assistance and need time
to think and develop it. I'm not a senior developer in this field and
lack sufficient knowledge in this area.
To support this, I think I need to:
1. Update the relevant specifications to add descriptions for this new
field;
2. Modify the compiler to support this new field while ensuring
compatibility in the generated policy file;
3. Modify the kernel to parse this field and adjust the slot size
accordingly based on its value.
I'm unsure if this is correct and comprehensive. I would be very grateful
for any guidance.
> I think it would be important to consider it strictly as a "hint" as
> that would make life easier, e.g. if the previous policy hinted at a
> larger AVC we may not want to bother with reducing the number of buckets.
> I would suggest starting with an implementation that uses the hint as a
> power of two for the number of AVC slots/buckets, with a value of '0'
> indicating a default value (512 slots, e.g. '2^9').
Would this approach lead to slot size monotonically increasing, always
remaining at the maximum size? This could potentially cause some memory
waste, though the total amount might not be very large.
On Fri, Oct 17, 2025 at 4:10 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > On Sep 26, 2025 Hongru Zhang <zhanghongru06@gmail.com> wrote: ... > > I would expect the number of active AVC nodes, and AVC churn in general, > > to be very policy dependent; some policies and use cases simply result in > > more AVC nodes than others. With that in mind, I'm wondering if instead > > of using a kernel command line parameter to specify the number of AVC > > buckets, we should instead include an AVC size "hint" in the policy that > > we can use to size the AVC when loading a new policy. > > > > Thoughts? > > I previously considered supporting dynamic adjustment of slot size during > runtime, but this seems to introduce code complexity and overhead. Every > time avc_lookup() or avc_insert() happens, we would need to check if the > table exists. Adjusting slot size and accessing a specific slot might > occur simultaneously, potentially requiring additional lock protection. I would imagine that a very simple implementation would simply convert the selinux_avc variable from an instance of selinux_avc to a RCU protected selinux_avc pointer. As the AVC already uses RCU, I think the number of changes should be relatively minimal: * Ensure we wrap selinux_avc derefs with rcu_dereference(). This should be the only real change needed for lookups and insertions as every search through the AVC will start with deref'ing the selinux_avc pointer. * Update avc_init() to allocate the cache slots with a default value, fail if unable to allocate the cache memory. If we ensure that the selinux_avc pointer will always be valid, we can avoid having to check it. * Policy (re)loads which would change the number of AVC cache slots would allocate and initialize a new selinux_avc then swap the global selinux_avc pointer under spinlock. The old AVC cache could then be free'd according to RCU rules. I haven't thought about it too much, but I suspect we could do away with flushing the old AVC in these cases, even if we can't, flushing the old AVC is easy enough. > When increasing slot size, we could directly copy the contents from the > old table. When decreasing slot size, nodes exceeding the new slot size > would need to be re-hashed and attached to appropriate positions. Changing the number of cache slots should happen infrequently enough that I see no need to migrate the old entries to the new cache instance. It's a cache, it will fill back up naturally. > On my Android device, policies are fixed before system image release and > don't change or load dynamically during system running. Using kernel > parameters for adjustment ensures no additional locks or checks are needed > during runtime table access, maintaining simplicity and efficiency of the > lookup code. If your system does not update its policy over the course of a single boot, and presumably doesn't drastically change its behavior during that time, there is another, simpler option that we should consider: setting AVC_CACHE_SLOTS at compile time based on a Kconfig tunable. The code change would essentially be one line: #define AVC_CACHE_SLOTS (2 << CONFIG_SECURITY_SELINUX_AVC_HASH_BITS) ... with a corresponding entry in security/selinux/Kconfig. That should be a very easy change, and if you set the default value such that AVC_CACHE_SLOTS remains at 512, there should be no impact on existing systems. -- paul-moore.com
> I would imagine that a very simple implementation would simply convert > the selinux_avc variable from an instance of selinux_avc to a RCU > protected selinux_avc pointer. As the AVC already uses RCU, I think > the number of changes should be relatively minimal: > > * Ensure we wrap selinux_avc derefs with rcu_dereference(). This > should be the only real change needed for lookups and insertions as > every search through the AVC will start with deref'ing the selinux_avc > pointer. > > * Update avc_init() to allocate the cache slots with a default value, > fail if unable to allocate the cache memory. If we ensure that the > selinux_avc pointer will always be valid, we can avoid having to check > it. > > * Policy (re)loads which would change the number of AVC cache slots > would allocate and initialize a new selinux_avc then swap the global > selinux_avc pointer under spinlock. The old AVC cache could then be > free'd according to RCU rules. I haven't thought about it too much, > but I suspect we could do away with flushing the old AVC in these > cases, even if we can't, flushing the old AVC is easy enough. > > > When increasing slot size, we could directly copy the contents from the > > old table. When decreasing slot size, nodes exceeding the new slot size > > would need to be re-hashed and attached to appropriate positions. > > Changing the number of cache slots should happen infrequently enough > that I see no need to migrate the old entries to the new cache > instance. It's a cache, it will fill back up naturally. > > > On my Android device, policies are fixed before system image release and > > don't change or load dynamically during system running. Using kernel > > parameters for adjustment ensures no additional locks or checks are neede= > d > > during runtime table access, maintaining simplicity and efficiency of the > > lookup code. > > If your system does not update its policy over the course of a single > boot, and presumably doesn't drastically change its behavior during > that time, there is another, simpler option that we should consider: > setting AVC_CACHE_SLOTS at compile time based on a Kconfig tunable. > The code change would essentially be one line: > > #define AVC_CACHE_SLOTS (2 << CONFIG_SECURITY_SELINUX_AVC_HASH_BITS) > > ... with a corresponding entry in security/selinux/Kconfig. That > should be a very easy change, and if you set the default value such > that AVC_CACHE_SLOTS remains at 512, there should be no impact on > existing systems. > Alright,I will add a CONFIG_SECURITY_SELINUX_AVC_HASH_BITS in security/selinux/Kconfig, the range is between 9 and 14 (512 : 16384), with a default value of 9. And then I will send a new patchset version. I will try to submit the final version in Q1 2026 based on the discussion (Because I have some planned Q4 work that hasn't been completed yet).
On Tue, Oct 21, 2025 at 8:38 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > I would imagine that a very simple implementation would simply convert > > the selinux_avc variable from an instance of selinux_avc to a RCU > > protected selinux_avc pointer. As the AVC already uses RCU, I think > > the number of changes should be relatively minimal: > > > > * Ensure we wrap selinux_avc derefs with rcu_dereference(). This > > should be the only real change needed for lookups and insertions as > > every search through the AVC will start with deref'ing the selinux_avc > > pointer. > > > > * Update avc_init() to allocate the cache slots with a default value, > > fail if unable to allocate the cache memory. If we ensure that the > > selinux_avc pointer will always be valid, we can avoid having to check > > it. > > > > * Policy (re)loads which would change the number of AVC cache slots > > would allocate and initialize a new selinux_avc then swap the global > > selinux_avc pointer under spinlock. The old AVC cache could then be > > free'd according to RCU rules. I haven't thought about it too much, > > but I suspect we could do away with flushing the old AVC in these > > cases, even if we can't, flushing the old AVC is easy enough. > > > > > When increasing slot size, we could directly copy the contents from the > > > old table. When decreasing slot size, nodes exceeding the new slot size > > > would need to be re-hashed and attached to appropriate positions. > > > > Changing the number of cache slots should happen infrequently enough > > that I see no need to migrate the old entries to the new cache > > instance. It's a cache, it will fill back up naturally. > > > > > On my Android device, policies are fixed before system image release and > > > don't change or load dynamically during system running. Using kernel > > > parameters for adjustment ensures no additional locks or checks are neede= > > d > > > during runtime table access, maintaining simplicity and efficiency of the > > > lookup code. > > > > If your system does not update its policy over the course of a single > > boot, and presumably doesn't drastically change its behavior during > > that time, there is another, simpler option that we should consider: > > setting AVC_CACHE_SLOTS at compile time based on a Kconfig tunable. > > The code change would essentially be one line: > > > > #define AVC_CACHE_SLOTS (2 << CONFIG_SECURITY_SELINUX_AVC_HASH_BITS) > > > > ... with a corresponding entry in security/selinux/Kconfig. That > > should be a very easy change, and if you set the default value such > > that AVC_CACHE_SLOTS remains at 512, there should be no impact on > > existing systems. > > Alright,I will add a CONFIG_SECURITY_SELINUX_AVC_HASH_BITS in > security/selinux/Kconfig, the range is between 9 and 14 (512 : 16384), > with a default value of 9. And then I will send a new patchset version. That seems reasonable. I'm sure you've seen it already, but you'll likely need to modify AVC_DEF_CACHE_THRESHOLD as well ... or honestly just remove it in favor of AVC_CACHE_SLOTS, it's only used to set an initial value for the cache threshold. > I will try to submit the final version in Q1 2026 based on the discussion > (Because I have some planned Q4 work that hasn't been completed yet). No worries, thank you! -- paul-moore.com
> That seems reasonable. I'm sure you've seen it already, but you'll > likely need to modify AVC_DEF_CACHE_THRESHOLD as well ... or honestly > just remove it in favor of AVC_CACHE_SLOTS, it's only used to set an > initial value for the cache threshold. How about this? #define AVC_DEF_CACHE_THRESHOLD AVC_CACHE_SLOTS We can preserve the original semantics this way. > > > I will try to submit the final version in Q1 2026 based on the discussion > > (Because I have some planned Q4 work that hasn't been completed yet). > > No worries, thank you!
On Fri, Sep 26, 2025 at 2:23 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
>
> From: Hongru Zhang <zhanghongru@xiaomi.com>
>
> On mobile device high-load situations, permission check can happen
> more than 90,000/s (8 core system). With default 512 cache nodes
> configuration, avc cache miss happens more often and occasionally
> leads to long time (>2ms) irqs off on both big and little cores,
> which decreases system real-time capability.
>
> An actual call stack is as follows:
> => avc_compute_av
> => avc_perm_nonode
> => avc_has_perm_noaudit
> => selinux_capable
> => security_capable
> => capable
> => __sched_setscheduler
> => do_sched_setscheduler
> => __arm64_sys_sched_setscheduler
> => invoke_syscall
> => el0_svc_common
> => do_el0_svc
> => el0_svc
> => el0t_64_sync_handler
> => el0t_64_sync
>
> Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
> to mitigate long time irqs off, hash conflicts make the bucket average
> length longer because of the fixed size of cache slots, leading to
> avc_search_node latency increase.
>
> Make avc cache slot size also configurable, and with fine tuning, we can
> mitigate long time irqs off with slightly avc_search_node performance
> regression.
>
> Theoretically, the main overhead is memory consumption.
>
> avc_search_node avg latency test results (about 100,000,000 times) on
> Qcom SM8750, 6.6.30-android15-8:
>
> Case 1:
> +---------+---------------------+------------------------+
> | | no-patch (512/512) | with-patch (512/512) |
> +---------+---------------------+------------------------+
> | latency | 85 ns | 87 ns |
> +---------+---------------------+------------------------+
>
> Case 2:
> +---------+---------------------+------------------------+
> | | no-patch (8192/512) | with-patch (8192/8192) |
> +---------+---------------------+------------------------+
> | latency | 277 ns | 106 ns |
> +---------+---------------------+------------------------+
>
> Case 1 shows 512 nodes configuration has ~2% performance regression
> with patch.
> Case 2 shows 8192 nodes configuration has ~61% latency benifit with
> patch.
>
> Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
Reviewed-by: Stephen Smalley <stephen.smalley.work@gmail.com>
> ---
> .../admin-guide/kernel-parameters.txt | 4 ++
> security/selinux/avc.c | 68 +++++++++++++------
> 2 files changed, 50 insertions(+), 22 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 747a55abf494..70dc6d659117 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6620,6 +6620,10 @@
> 1 -- enable.
> Default value is 1.
>
> + selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size.
> + Format: <int> (must be >0, power of 2)
> + Default: 512
> +
> serialnumber [BUGS=X86-32]
>
> sev=option[,option...] [X86-64]
> diff --git a/security/selinux/avc.c b/security/selinux/avc.c
> index 430b0e23ee00..7a7f88012865 100644
> --- a/security/selinux/avc.c
> +++ b/security/selinux/avc.c
> @@ -34,7 +34,7 @@
> #define CREATE_TRACE_POINTS
> #include <trace/events/avc.h>
>
> -#define AVC_CACHE_SLOTS 512
> +static int avc_cache_slots __ro_after_init = 512;
> #define AVC_DEF_CACHE_THRESHOLD 512
> #define AVC_CACHE_RECLAIM 16
>
> @@ -68,9 +68,13 @@ struct avc_xperms_node {
> struct list_head xpd_head; /* list head of extended_perms_decision */
> };
>
> +struct avc_slot {
> + struct hlist_head slot; /* head for avc_node->list */
> + spinlock_t slot_lock; /* lock for writes */
> +};
> +
> struct avc_cache {
> - struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
> - spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
> + struct avc_slot *slots;
> atomic_t lru_hint; /* LRU hint for reclaim scan */
> atomic_t active_nodes;
> u32 latest_notif; /* latest revocation notification */
> @@ -93,14 +97,34 @@ struct selinux_avc {
>
> static struct selinux_avc selinux_avc;
>
> +static int __init set_selinux_avc_cache_slots(char *str)
> +{
> + int val;
> +
> + if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) {
> + pr_warn("Unable to set selinux_avc_cache_slots, use default value\n");
> + return 1;
> + }
> +
> + avc_cache_slots = val;
> +
> + return 1;
> +}
> +__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots);
> +
> void selinux_avc_init(void)
> {
> int i;
>
> + selinux_avc.avc_cache.slots =
> + kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL);
> + if (!selinux_avc.avc_cache.slots)
> + panic("SELinux: No memory to alloc avc cache slots\n");
> +
> selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]);
> - spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]);
> + for (i = 0; i < avc_cache_slots; i++) {
> + INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot);
> + spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock);
> }
> atomic_set(&selinux_avc.avc_cache.active_nodes, 0);
> atomic_set(&selinux_avc.avc_cache.lru_hint, 0);
> @@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init;
>
> static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> {
> - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
> + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
> }
>
> /**
> @@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page)
>
> slots_used = 0;
> max_chain_len = 0;
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - head = &selinux_avc.avc_cache.slots[i];
> + for (i = 0; i < avc_cache_slots; i++) {
> + head = &selinux_avc.avc_cache.slots[i].slot;
> if (!hlist_empty(head)) {
> slots_used++;
> chain_len = 0;
> @@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page)
> return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
> "longest chain: %d\n",
> atomic_read(&selinux_avc.avc_cache.active_nodes),
> - slots_used, AVC_CACHE_SLOTS, max_chain_len);
> + slots_used, avc_cache_slots, max_chain_len);
> }
>
> /*
> @@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void)
> struct hlist_head *head;
> spinlock_t *lock;
>
> - for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
> + for (try = 0, ecx = 0; try < avc_cache_slots; try++) {
> hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) &
> - (AVC_CACHE_SLOTS - 1);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + (avc_cache_slots - 1);
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
>
> if (!spin_trylock_irqsave(lock, flags))
> continue;
> @@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
> struct hlist_head *head;
>
> hvalue = avc_hash(ssid, tsid, tclass);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> hlist_for_each_entry_rcu(node, head, list) {
> if (ssid == node->ae.ssid &&
> tclass == node->ae.tclass &&
> @@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass,
> }
>
> hvalue = avc_hash(ssid, tsid, tclass);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
> spin_lock_irqsave(lock, flag);
> hlist_for_each_entry(pos, head, list) {
> if (pos->ae.ssid == ssid &&
> @@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
> /* Lock the target slot */
> hvalue = avc_hash(ssid, tsid, tclass);
>
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
>
> spin_lock_irqsave(lock, flag);
>
> @@ -929,9 +953,9 @@ static void avc_flush(void)
> unsigned long flag;
> int i;
>
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - head = &selinux_avc.avc_cache.slots[i];
> - lock = &selinux_avc.avc_cache.slots_lock[i];
> + for (i = 0; i < avc_cache_slots; i++) {
> + head = &selinux_avc.avc_cache.slots[i].slot;
> + lock = &selinux_avc.avc_cache.slots[i].slot_lock;
>
> spin_lock_irqsave(lock, flag);
> /*
> --
> 2.43.0
>
© 2016 - 2026 Red Hat, Inc.