.../admin-guide/kernel-parameters.txt | 4 ++ security/selinux/avc.c | 68 +++++++++++++------ 2 files changed, 50 insertions(+), 22 deletions(-)
From: Hongru Zhang <zhanghongru@xiaomi.com>
On mobile device high-load situations, permission check can happen
more than 90,000/s (8 core system). With default 512 cache nodes
configuration, avc cache miss happens more often and occasionally
leads to long time (>2ms) irqs off on both big and little cores,
which decreases system real-time capability.
An actual call stack is as follows:
=> avc_compute_av
=> avc_perm_nonode
=> avc_has_perm_noaudit
=> selinux_capable
=> security_capable
=> capable
=> __sched_setscheduler
=> do_sched_setscheduler
=> __arm64_sys_sched_setscheduler
=> invoke_syscall
=> el0_svc_common
=> do_el0_svc
=> el0_svc
=> el0t_64_sync_handler
=> el0t_64_sync
Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
to mitigate long time irqs off, hash conflicts make the bucket average
length longer because of the fixed size of cache slots, leading to
avc_search_node latency increase.
Make avc cache slot size also configurable, and with fine tuning, we can
mitigate long time irqs off with slightly avc_search_node performance
regression.
Theoretically, the main overhead is memory consumption.
avc_search_node avg latency test results (about 100,000,000 times) on
Qcom SM8750, 6.6.30-android15-8:
Case 1:
+---------+---------------------+------------------------+
| | no-patch (512/512) | with-patch (512/512) |
+---------+---------------------+------------------------+
| latency | 85 ns | 87 ns |
+---------+---------------------+------------------------+
Case 2:
+---------+---------------------+------------------------+
| | no-patch (8192/512) | with-patch (8192/8192) |
+---------+---------------------+------------------------+
| latency | 277 ns | 106 ns |
+---------+---------------------+------------------------+
Case 1 shows 512 nodes configuration has ~2% performance regression
with patch.
Case 2 shows 8192 nodes configuration has ~61% latency benifit with
patch.
Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
---
.../admin-guide/kernel-parameters.txt | 4 ++
security/selinux/avc.c | 68 +++++++++++++------
2 files changed, 50 insertions(+), 22 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..70dc6d659117 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6620,6 +6620,10 @@
1 -- enable.
Default value is 1.
+ selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size.
+ Format: <int> (must be >0, power of 2)
+ Default: 512
+
serialnumber [BUGS=X86-32]
sev=option[,option...] [X86-64]
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 430b0e23ee00..35f5436f5da0 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -34,7 +34,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/avc.h>
-#define AVC_CACHE_SLOTS 512
+int avc_cache_slots __ro_after_init = 512;
#define AVC_DEF_CACHE_THRESHOLD 512
#define AVC_CACHE_RECLAIM 16
@@ -68,9 +68,13 @@ struct avc_xperms_node {
struct list_head xpd_head; /* list head of extended_perms_decision */
};
+struct avc_slot {
+ struct hlist_head slot; /* head for avc_node->list */
+ spinlock_t slot_lock; /* lock for writes */
+};
+
struct avc_cache {
- struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
- spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
+ struct avc_slot *slots;
atomic_t lru_hint; /* LRU hint for reclaim scan */
atomic_t active_nodes;
u32 latest_notif; /* latest revocation notification */
@@ -93,14 +97,34 @@ struct selinux_avc {
static struct selinux_avc selinux_avc;
+static int __init set_selinux_avc_cache_slots(char *str)
+{
+ int val;
+
+ if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) {
+ pr_warn("Unable to set selinux_avc_cache_slots, use default value\n");
+ return 1;
+ }
+
+ avc_cache_slots = val;
+
+ return 1;
+}
+__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots);
+
void selinux_avc_init(void)
{
int i;
+ selinux_avc.avc_cache.slots =
+ kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL);
+ if (!selinux_avc.avc_cache.slots)
+ panic("SELinux: No memory to alloc avc cache slots\n");
+
selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]);
- spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]);
+ for (i = 0; i < avc_cache_slots; i++) {
+ INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot);
+ spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock);
}
atomic_set(&selinux_avc.avc_cache.active_nodes, 0);
atomic_set(&selinux_avc.avc_cache.lru_hint, 0);
@@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init;
static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
{
- return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
+ return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
}
/**
@@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page)
slots_used = 0;
max_chain_len = 0;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
if (!hlist_empty(head)) {
slots_used++;
chain_len = 0;
@@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page)
return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
"longest chain: %d\n",
atomic_read(&selinux_avc.avc_cache.active_nodes),
- slots_used, AVC_CACHE_SLOTS, max_chain_len);
+ slots_used, avc_cache_slots, max_chain_len);
}
/*
@@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void)
struct hlist_head *head;
spinlock_t *lock;
- for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
+ for (try = 0, ecx = 0; try < avc_cache_slots; try++) {
hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) &
- (AVC_CACHE_SLOTS - 1);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ (avc_cache_slots - 1);
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
if (!spin_trylock_irqsave(lock, flags))
continue;
@@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
struct hlist_head *head;
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
hlist_for_each_entry_rcu(node, head, list) {
if (ssid == node->ae.ssid &&
tclass == node->ae.tclass &&
@@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass,
}
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
hlist_for_each_entry(pos, head, list) {
if (pos->ae.ssid == ssid &&
@@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
/* Lock the target slot */
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
@@ -929,9 +953,9 @@ static void avc_flush(void)
unsigned long flag;
int i;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
- lock = &selinux_avc.avc_cache.slots_lock[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
+ lock = &selinux_avc.avc_cache.slots[i].slot_lock;
spin_lock_irqsave(lock, flag);
/*
--
2.43.0
On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > From: Hongru Zhang <zhanghongru@xiaomi.com> > > On mobile device high-load situations, permission check can happen > more than 90,000/s (8 core system). With default 512 cache nodes > configuration, avc cache miss happens more often and occasionally > leads to long time (>2ms) irqs off on both big and little cores, > which decreases system real-time capability. > > An actual call stack is as follows: > => avc_compute_av > => avc_perm_nonode > => avc_has_perm_noaudit > => selinux_capable > => security_capable > => capable > => __sched_setscheduler > => do_sched_setscheduler > => __arm64_sys_sched_setscheduler > => invoke_syscall > => el0_svc_common > => do_el0_svc > => el0_svc > => el0t_64_sync_handler > => el0t_64_sync > > Although we can expand avc nodes through /sys/fs/selinux/cache_threshold > to mitigate long time irqs off, hash conflicts make the bucket average > length longer because of the fixed size of cache slots, leading to > avc_search_node latency increase. > > Make avc cache slot size also configurable, and with fine tuning, we can > mitigate long time irqs off with slightly avc_search_node performance > regression. > > Theoretically, the main overhead is memory consumption. > > avc_search_node avg latency test results (about 100,000,000 times) on > Qcom SM8750, 6.6.30-android15-8: > > Case 1: > +---------+---------------------+------------------------+ > | | no-patch (512/512) | with-patch (512/512) | > +---------+---------------------+------------------------+ > | latency | 85 ns | 87 ns | > +---------+---------------------+------------------------+ > > Case 2: > +---------+---------------------+------------------------+ > | | no-patch (8192/512) | with-patch (8192/8192) | > +---------+---------------------+------------------------+ > | latency | 277 ns | 106 ns | > +---------+---------------------+------------------------+ > > Case 1 shows 512 nodes configuration has ~2% performance regression > with patch. > Case 2 shows 8192 nodes configuration has ~61% latency benifit with > patch. > > Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com> > --- > .../admin-guide/kernel-parameters.txt | 4 ++ > security/selinux/avc.c | 68 +++++++++++++------ > 2 files changed, 50 insertions(+), 22 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 747a55abf494..70dc6d659117 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -6620,6 +6620,10 @@ > 1 -- enable. > Default value is 1. > > + selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size. > + Format: <int> (must be >0, power of 2) > + Default: 512 > + > serialnumber [BUGS=X86-32] > > sev=option[,option...] [X86-64] > diff --git a/security/selinux/avc.c b/security/selinux/avc.c > index 430b0e23ee00..35f5436f5da0 100644 > --- a/security/selinux/avc.c > +++ b/security/selinux/avc.c > @@ -34,7 +34,7 @@ > #define CREATE_TRACE_POINTS > #include <trace/events/avc.h> > > -#define AVC_CACHE_SLOTS 512 > +int avc_cache_slots __ro_after_init = 512; > #define AVC_DEF_CACHE_THRESHOLD 512 > #define AVC_CACHE_RECLAIM 16 > > @@ -68,9 +68,13 @@ struct avc_xperms_node { > struct list_head xpd_head; /* list head of extended_perms_decision */ > }; > > +struct avc_slot { > + struct hlist_head slot; /* head for avc_node->list */ > + spinlock_t slot_lock; /* lock for writes */ > +}; > + > struct avc_cache { > - struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */ > - spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */ > + struct avc_slot *slots; > atomic_t lru_hint; /* LRU hint for reclaim scan */ > atomic_t active_nodes; > u32 latest_notif; /* latest revocation notification */ > @@ -93,14 +97,34 @@ struct selinux_avc { > > static struct selinux_avc selinux_avc; > > +static int __init set_selinux_avc_cache_slots(char *str) > +{ > + int val; > + > + if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) { > + pr_warn("Unable to set selinux_avc_cache_slots, use default value\n"); > + return 1; > + } > + > + avc_cache_slots = val; > + > + return 1; > +} > +__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots); > + > void selinux_avc_init(void) > { > int i; > > + selinux_avc.avc_cache.slots = > + kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL); > + if (!selinux_avc.avc_cache.slots) > + panic("SELinux: No memory to alloc avc cache slots\n"); > + > selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; > - for (i = 0; i < AVC_CACHE_SLOTS; i++) { > - INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]); > - spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]); > + for (i = 0; i < avc_cache_slots; i++) { > + INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot); > + spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock); > } > atomic_set(&selinux_avc.avc_cache.active_nodes, 0); > atomic_set(&selinux_avc.avc_cache.lru_hint, 0); > @@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init; > > static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass) > { > - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1); > + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1); If you are making the number of buckets adjustable, you should also change the hash function to better deal with multiple numbers of slots. > } > > /** > @@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page) > > slots_used = 0; > max_chain_len = 0; > - for (i = 0; i < AVC_CACHE_SLOTS; i++) { > - head = &selinux_avc.avc_cache.slots[i]; > + for (i = 0; i < avc_cache_slots; i++) { > + head = &selinux_avc.avc_cache.slots[i].slot; > if (!hlist_empty(head)) { > slots_used++; > chain_len = 0; > @@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page) > return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" > "longest chain: %d\n", > atomic_read(&selinux_avc.avc_cache.active_nodes), > - slots_used, AVC_CACHE_SLOTS, max_chain_len); > + slots_used, avc_cache_slots, max_chain_len); > } > > /* > @@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void) > struct hlist_head *head; > spinlock_t *lock; > > - for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { > + for (try = 0, ecx = 0; try < avc_cache_slots; try++) { > hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) & > - (AVC_CACHE_SLOTS - 1); > - head = &selinux_avc.avc_cache.slots[hvalue]; > - lock = &selinux_avc.avc_cache.slots_lock[hvalue]; > + (avc_cache_slots - 1); > + head = &selinux_avc.avc_cache.slots[hvalue].slot; > + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock; > > if (!spin_trylock_irqsave(lock, flags)) > continue; > @@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) > struct hlist_head *head; > > hvalue = avc_hash(ssid, tsid, tclass); > - head = &selinux_avc.avc_cache.slots[hvalue]; > + head = &selinux_avc.avc_cache.slots[hvalue].slot; > hlist_for_each_entry_rcu(node, head, list) { > if (ssid == node->ae.ssid && > tclass == node->ae.tclass && > @@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass, > } > > hvalue = avc_hash(ssid, tsid, tclass); > - head = &selinux_avc.avc_cache.slots[hvalue]; > - lock = &selinux_avc.avc_cache.slots_lock[hvalue]; > + head = &selinux_avc.avc_cache.slots[hvalue].slot; > + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock; > spin_lock_irqsave(lock, flag); > hlist_for_each_entry(pos, head, list) { > if (pos->ae.ssid == ssid && > @@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm, > /* Lock the target slot */ > hvalue = avc_hash(ssid, tsid, tclass); > > - head = &selinux_avc.avc_cache.slots[hvalue]; > - lock = &selinux_avc.avc_cache.slots_lock[hvalue]; > + head = &selinux_avc.avc_cache.slots[hvalue].slot; > + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock; > > spin_lock_irqsave(lock, flag); > > @@ -929,9 +953,9 @@ static void avc_flush(void) > unsigned long flag; > int i; > > - for (i = 0; i < AVC_CACHE_SLOTS; i++) { > - head = &selinux_avc.avc_cache.slots[i]; > - lock = &selinux_avc.avc_cache.slots_lock[i]; > + for (i = 0; i < avc_cache_slots; i++) { > + head = &selinux_avc.avc_cache.slots[i].slot; > + lock = &selinux_avc.avc_cache.slots[i].slot_lock; > > spin_lock_irqsave(lock, flag); > /* > -- > 2.43.0 >
Hi Hongru, kernel test robot noticed the following build warnings: [auto build test WARNING on pcmoore-selinux/next] [also build test WARNING on linus/master v6.17-rc4 next-20250905] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Hongru-Zhang/selinux-Make-avc-cache-slot-size-configurable-during-boot/20250905-180729 base: https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git next patch link: https://lore.kernel.org/r/20250905100454.685866-1-zhanghongru%40xiaomi.com patch subject: [PATCH] selinux: Make avc cache slot size configurable during boot config: i386-randconfig-063-20250907 (https://download.01.org/0day-ci/archive/20250907/202509071211.k5n864Gr-lkp@intel.com/config) compiler: gcc-13 (Debian 13.3.0-16) 13.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250907/202509071211.k5n864Gr-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202509071211.k5n864Gr-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> security/selinux/avc.c:37:5: sparse: sparse: symbol 'avc_cache_slots' was not declared. Should it be static? vim +/avc_cache_slots +37 security/selinux/avc.c 36 > 37 int avc_cache_slots __ro_after_init = 512; 38 #define AVC_DEF_CACHE_THRESHOLD 512 39 #define AVC_CACHE_RECLAIM 16 40 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > From: Hongru Zhang <zhanghongru@xiaomi.com> > > On mobile device high-load situations ... What are you using for a SELinux policy? -- paul-moore.com
Apologies for the late reply, I sent the patch before leaving work on Friday, and didn't check email all weekend. > On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > > > From: Hongru Zhang <zhanghongru@xiaomi.com> > > > > On mobile device high-load situations ... > > What are you using for a SELinux policy? Android app smoothness test, testing metrics can quantify app smoothness from the user's perspective. The problem is also reproducible in cold-start test scenarios. These two are widely used testing model. I generate a flamegraph to record the sources of permission check: url: https://gist.github.com/zhr250/9f4415bfdefc8ff0d64e78d96351fffb
© 2016 - 2025 Red Hat, Inc.