.../admin-guide/kernel-parameters.txt | 4 ++ security/selinux/avc.c | 68 +++++++++++++------ 2 files changed, 50 insertions(+), 22 deletions(-)
From: Hongru Zhang <zhanghongru@xiaomi.com>
On mobile device high-load situations, permission check can happen
more than 90,000/s (8 core system). With default 512 cache nodes
configuration, avc cache miss happens more often and occasionally
leads to long time (>2ms) irqs off on both big and little cores,
which decreases system real-time capability.
An actual call stack is as follows:
=> avc_compute_av
=> avc_perm_nonode
=> avc_has_perm_noaudit
=> selinux_capable
=> security_capable
=> capable
=> __sched_setscheduler
=> do_sched_setscheduler
=> __arm64_sys_sched_setscheduler
=> invoke_syscall
=> el0_svc_common
=> do_el0_svc
=> el0_svc
=> el0t_64_sync_handler
=> el0t_64_sync
Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
to mitigate long time irqs off, hash conflicts make the bucket average
length longer because of the fixed size of cache slots, leading to
avc_search_node latency increase.
Make avc cache slot size also configurable, and with fine tuning, we can
mitigate long time irqs off with slightly avc_search_node performance
regression.
Theoretically, the main overhead is memory consumption.
avc_search_node avg latency test results (about 100,000,000 times) on
Qcom SM8750, 6.6.30-android15-8:
Case 1:
+---------+---------------------+------------------------+
| | no-patch (512/512) | with-patch (512/512) |
+---------+---------------------+------------------------+
| latency | 85 ns | 87 ns |
+---------+---------------------+------------------------+
Case 2:
+---------+---------------------+------------------------+
| | no-patch (8192/512) | with-patch (8192/8192) |
+---------+---------------------+------------------------+
| latency | 277 ns | 106 ns |
+---------+---------------------+------------------------+
Case 1 shows 512 nodes configuration has ~2% performance regression
with patch.
Case 2 shows 8192 nodes configuration has ~61% latency benifit with
patch.
Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
---
.../admin-guide/kernel-parameters.txt | 4 ++
security/selinux/avc.c | 68 +++++++++++++------
2 files changed, 50 insertions(+), 22 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 747a55abf494..70dc6d659117 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6620,6 +6620,10 @@
1 -- enable.
Default value is 1.
+ selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size.
+ Format: <int> (must be >0, power of 2)
+ Default: 512
+
serialnumber [BUGS=X86-32]
sev=option[,option...] [X86-64]
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 430b0e23ee00..35f5436f5da0 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -34,7 +34,7 @@
#define CREATE_TRACE_POINTS
#include <trace/events/avc.h>
-#define AVC_CACHE_SLOTS 512
+int avc_cache_slots __ro_after_init = 512;
#define AVC_DEF_CACHE_THRESHOLD 512
#define AVC_CACHE_RECLAIM 16
@@ -68,9 +68,13 @@ struct avc_xperms_node {
struct list_head xpd_head; /* list head of extended_perms_decision */
};
+struct avc_slot {
+ struct hlist_head slot; /* head for avc_node->list */
+ spinlock_t slot_lock; /* lock for writes */
+};
+
struct avc_cache {
- struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
- spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
+ struct avc_slot *slots;
atomic_t lru_hint; /* LRU hint for reclaim scan */
atomic_t active_nodes;
u32 latest_notif; /* latest revocation notification */
@@ -93,14 +97,34 @@ struct selinux_avc {
static struct selinux_avc selinux_avc;
+static int __init set_selinux_avc_cache_slots(char *str)
+{
+ int val;
+
+ if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) {
+ pr_warn("Unable to set selinux_avc_cache_slots, use default value\n");
+ return 1;
+ }
+
+ avc_cache_slots = val;
+
+ return 1;
+}
+__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots);
+
void selinux_avc_init(void)
{
int i;
+ selinux_avc.avc_cache.slots =
+ kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL);
+ if (!selinux_avc.avc_cache.slots)
+ panic("SELinux: No memory to alloc avc cache slots\n");
+
selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]);
- spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]);
+ for (i = 0; i < avc_cache_slots; i++) {
+ INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot);
+ spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock);
}
atomic_set(&selinux_avc.avc_cache.active_nodes, 0);
atomic_set(&selinux_avc.avc_cache.lru_hint, 0);
@@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init;
static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
{
- return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
+ return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
}
/**
@@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page)
slots_used = 0;
max_chain_len = 0;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
if (!hlist_empty(head)) {
slots_used++;
chain_len = 0;
@@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page)
return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
"longest chain: %d\n",
atomic_read(&selinux_avc.avc_cache.active_nodes),
- slots_used, AVC_CACHE_SLOTS, max_chain_len);
+ slots_used, avc_cache_slots, max_chain_len);
}
/*
@@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void)
struct hlist_head *head;
spinlock_t *lock;
- for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
+ for (try = 0, ecx = 0; try < avc_cache_slots; try++) {
hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) &
- (AVC_CACHE_SLOTS - 1);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ (avc_cache_slots - 1);
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
if (!spin_trylock_irqsave(lock, flags))
continue;
@@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
struct hlist_head *head;
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
hlist_for_each_entry_rcu(node, head, list) {
if (ssid == node->ae.ssid &&
tclass == node->ae.tclass &&
@@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass,
}
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
hlist_for_each_entry(pos, head, list) {
if (pos->ae.ssid == ssid &&
@@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
/* Lock the target slot */
hvalue = avc_hash(ssid, tsid, tclass);
- head = &selinux_avc.avc_cache.slots[hvalue];
- lock = &selinux_avc.avc_cache.slots_lock[hvalue];
+ head = &selinux_avc.avc_cache.slots[hvalue].slot;
+ lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
spin_lock_irqsave(lock, flag);
@@ -929,9 +953,9 @@ static void avc_flush(void)
unsigned long flag;
int i;
- for (i = 0; i < AVC_CACHE_SLOTS; i++) {
- head = &selinux_avc.avc_cache.slots[i];
- lock = &selinux_avc.avc_cache.slots_lock[i];
+ for (i = 0; i < avc_cache_slots; i++) {
+ head = &selinux_avc.avc_cache.slots[i].slot;
+ lock = &selinux_avc.avc_cache.slots[i].slot_lock;
spin_lock_irqsave(lock, flag);
/*
--
2.43.0
On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
>
> From: Hongru Zhang <zhanghongru@xiaomi.com>
>
> On mobile device high-load situations, permission check can happen
> more than 90,000/s (8 core system). With default 512 cache nodes
> configuration, avc cache miss happens more often and occasionally
> leads to long time (>2ms) irqs off on both big and little cores,
> which decreases system real-time capability.
>
> An actual call stack is as follows:
> => avc_compute_av
> => avc_perm_nonode
> => avc_has_perm_noaudit
> => selinux_capable
> => security_capable
> => capable
> => __sched_setscheduler
> => do_sched_setscheduler
> => __arm64_sys_sched_setscheduler
> => invoke_syscall
> => el0_svc_common
> => do_el0_svc
> => el0_svc
> => el0t_64_sync_handler
> => el0t_64_sync
>
> Although we can expand avc nodes through /sys/fs/selinux/cache_threshold
> to mitigate long time irqs off, hash conflicts make the bucket average
> length longer because of the fixed size of cache slots, leading to
> avc_search_node latency increase.
>
> Make avc cache slot size also configurable, and with fine tuning, we can
> mitigate long time irqs off with slightly avc_search_node performance
> regression.
>
> Theoretically, the main overhead is memory consumption.
>
> avc_search_node avg latency test results (about 100,000,000 times) on
> Qcom SM8750, 6.6.30-android15-8:
>
> Case 1:
> +---------+---------------------+------------------------+
> | | no-patch (512/512) | with-patch (512/512) |
> +---------+---------------------+------------------------+
> | latency | 85 ns | 87 ns |
> +---------+---------------------+------------------------+
>
> Case 2:
> +---------+---------------------+------------------------+
> | | no-patch (8192/512) | with-patch (8192/8192) |
> +---------+---------------------+------------------------+
> | latency | 277 ns | 106 ns |
> +---------+---------------------+------------------------+
>
> Case 1 shows 512 nodes configuration has ~2% performance regression
> with patch.
> Case 2 shows 8192 nodes configuration has ~61% latency benifit with
> patch.
>
> Signed-off-by: Hongru Zhang <zhanghongru@xiaomi.com>
> ---
> .../admin-guide/kernel-parameters.txt | 4 ++
> security/selinux/avc.c | 68 +++++++++++++------
> 2 files changed, 50 insertions(+), 22 deletions(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 747a55abf494..70dc6d659117 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6620,6 +6620,10 @@
> 1 -- enable.
> Default value is 1.
>
> + selinux_avc_cache_slots= [SELINUX] Set the avc cache slot size.
> + Format: <int> (must be >0, power of 2)
> + Default: 512
> +
> serialnumber [BUGS=X86-32]
>
> sev=option[,option...] [X86-64]
> diff --git a/security/selinux/avc.c b/security/selinux/avc.c
> index 430b0e23ee00..35f5436f5da0 100644
> --- a/security/selinux/avc.c
> +++ b/security/selinux/avc.c
> @@ -34,7 +34,7 @@
> #define CREATE_TRACE_POINTS
> #include <trace/events/avc.h>
>
> -#define AVC_CACHE_SLOTS 512
> +int avc_cache_slots __ro_after_init = 512;
> #define AVC_DEF_CACHE_THRESHOLD 512
> #define AVC_CACHE_RECLAIM 16
>
> @@ -68,9 +68,13 @@ struct avc_xperms_node {
> struct list_head xpd_head; /* list head of extended_perms_decision */
> };
>
> +struct avc_slot {
> + struct hlist_head slot; /* head for avc_node->list */
> + spinlock_t slot_lock; /* lock for writes */
> +};
> +
> struct avc_cache {
> - struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */
> - spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */
> + struct avc_slot *slots;
> atomic_t lru_hint; /* LRU hint for reclaim scan */
> atomic_t active_nodes;
> u32 latest_notif; /* latest revocation notification */
> @@ -93,14 +97,34 @@ struct selinux_avc {
>
> static struct selinux_avc selinux_avc;
>
> +static int __init set_selinux_avc_cache_slots(char *str)
> +{
> + int val;
> +
> + if ((kstrtoint(str, 0, &val)) || !is_power_of_2(val)) {
> + pr_warn("Unable to set selinux_avc_cache_slots, use default value\n");
> + return 1;
> + }
> +
> + avc_cache_slots = val;
> +
> + return 1;
> +}
> +__setup("selinux_avc_cache_slots=", set_selinux_avc_cache_slots);
> +
> void selinux_avc_init(void)
> {
> int i;
>
> + selinux_avc.avc_cache.slots =
> + kmalloc_array(avc_cache_slots, sizeof(struct avc_slot), GFP_KERNEL);
> + if (!selinux_avc.avc_cache.slots)
> + panic("SELinux: No memory to alloc avc cache slots\n");
> +
> selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD;
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]);
> - spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]);
> + for (i = 0; i < avc_cache_slots; i++) {
> + INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i].slot);
> + spin_lock_init(&selinux_avc.avc_cache.slots[i].slot_lock);
> }
> atomic_set(&selinux_avc.avc_cache.active_nodes, 0);
> atomic_set(&selinux_avc.avc_cache.lru_hint, 0);
> @@ -124,7 +148,7 @@ static struct kmem_cache *avc_xperms_cachep __ro_after_init;
>
> static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> {
> - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
> + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
If you are making the number of buckets adjustable, you should also
change the hash function to better deal with multiple numbers of
slots.
> }
>
> /**
> @@ -150,8 +174,8 @@ int avc_get_hash_stats(char *page)
>
> slots_used = 0;
> max_chain_len = 0;
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - head = &selinux_avc.avc_cache.slots[i];
> + for (i = 0; i < avc_cache_slots; i++) {
> + head = &selinux_avc.avc_cache.slots[i].slot;
> if (!hlist_empty(head)) {
> slots_used++;
> chain_len = 0;
> @@ -167,7 +191,7 @@ int avc_get_hash_stats(char *page)
> return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n"
> "longest chain: %d\n",
> atomic_read(&selinux_avc.avc_cache.active_nodes),
> - slots_used, AVC_CACHE_SLOTS, max_chain_len);
> + slots_used, avc_cache_slots, max_chain_len);
> }
>
> /*
> @@ -463,11 +487,11 @@ static inline int avc_reclaim_node(void)
> struct hlist_head *head;
> spinlock_t *lock;
>
> - for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) {
> + for (try = 0, ecx = 0; try < avc_cache_slots; try++) {
> hvalue = atomic_inc_return(&selinux_avc.avc_cache.lru_hint) &
> - (AVC_CACHE_SLOTS - 1);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + (avc_cache_slots - 1);
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
>
> if (!spin_trylock_irqsave(lock, flags))
> continue;
> @@ -524,7 +548,7 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass)
> struct hlist_head *head;
>
> hvalue = avc_hash(ssid, tsid, tclass);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> hlist_for_each_entry_rcu(node, head, list) {
> if (ssid == node->ae.ssid &&
> tclass == node->ae.tclass &&
> @@ -625,8 +649,8 @@ static void avc_insert(u32 ssid, u32 tsid, u16 tclass,
> }
>
> hvalue = avc_hash(ssid, tsid, tclass);
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
> spin_lock_irqsave(lock, flag);
> hlist_for_each_entry(pos, head, list) {
> if (pos->ae.ssid == ssid &&
> @@ -846,8 +870,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
> /* Lock the target slot */
> hvalue = avc_hash(ssid, tsid, tclass);
>
> - head = &selinux_avc.avc_cache.slots[hvalue];
> - lock = &selinux_avc.avc_cache.slots_lock[hvalue];
> + head = &selinux_avc.avc_cache.slots[hvalue].slot;
> + lock = &selinux_avc.avc_cache.slots[hvalue].slot_lock;
>
> spin_lock_irqsave(lock, flag);
>
> @@ -929,9 +953,9 @@ static void avc_flush(void)
> unsigned long flag;
> int i;
>
> - for (i = 0; i < AVC_CACHE_SLOTS; i++) {
> - head = &selinux_avc.avc_cache.slots[i];
> - lock = &selinux_avc.avc_cache.slots_lock[i];
> + for (i = 0; i < avc_cache_slots; i++) {
> + head = &selinux_avc.avc_cache.slots[i].slot;
> + lock = &selinux_avc.avc_cache.slots[i].slot_lock;
>
> spin_lock_irqsave(lock, flag);
> /*
> --
> 2.43.0
>
> > static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> > {
> > - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
> > + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
>
> If you are making the number of buckets adjustable, you should also
> change the hash function to better deal with multiple numbers of
Thank you for the advice. When running the test model, I sampled
/sys/fs/selinux/avc/hash_stats once per second for a total of 1800 times
and analyzed the distribution uniformity of the hash algorithm using the
sampled data.
Baseline: 512 nodes, 512 buckets
Comparison: 8192 nodes, 8192 buckets
Metrics (Average value over 1800 samples):
* Bucket utilization rate (higher -> better, same chain length assumed)
* Baseline: 52.5%
* Comparison: 49.5%
* Max chain length (lower -> better, positive correlation with worst-case latency)
* Baseline: 7.5
* Comparison: 11.4
Experimental results show that scaling buckets and nodes from 512 to 8192:
1. The distribution uniformity under the current hash algorithm does not
degrade significantly;
2. The maximum chain length rise significantly, potentially degrading
worst-case performance (ignoring other code in avc_search_node function).
Details:
url: https://gist.github.com/zhr250/cb7ebca61ff5455098082677d75b1795
I will modify the hash algorithm in the avc_hash function and collect data
again to see if we can achieve performance improvements.
On Thu, Sep 11, 2025 at 9:07 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
>
> > > static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> > > {
> > > - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
> > > + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
> >
> > If you are making the number of buckets adjustable, you should also
> > change the hash function to better deal with multiple numbers of
>
> Thank you for the advice. When running the test model, I sampled
> /sys/fs/selinux/avc/hash_stats once per second for a total of 1800 times
> and analyzed the distribution uniformity of the hash algorithm using the
> sampled data.
>
> Baseline: 512 nodes, 512 buckets
> Comparison: 8192 nodes, 8192 buckets
>
> Metrics (Average value over 1800 samples):
> * Bucket utilization rate (higher -> better, same chain length assumed)
> * Baseline: 52.5%
> * Comparison: 49.5%
> * Max chain length (lower -> better, positive correlation with worst-case latency)
> * Baseline: 7.5
> * Comparison: 11.4
>
> Experimental results show that scaling buckets and nodes from 512 to 8192:
> 1. The distribution uniformity under the current hash algorithm does not
> degrade significantly;
> 2. The maximum chain length rise significantly, potentially degrading
> worst-case performance (ignoring other code in avc_search_node function).
>
> Details:
> url: https://gist.github.com/zhr250/cb7ebca61ff5455098082677d75b1795
>
> I will modify the hash algorithm in the avc_hash function and collect data
> again to see if we can achieve performance improvements.
If you look elsewhere in the SELinux code, you'll see that others have
been converting other hash tables to using the jhash functions, so may
want to try those here too.
On Thu, Sep 11, 2025 at 9:23 AM Stephen Smalley
<stephen.smalley.work@gmail.com> wrote:
>
> On Thu, Sep 11, 2025 at 9:07 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
> >
> > > > static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> > > > {
> > > > - return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
> > > > + return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (avc_cache_slots - 1);
> > >
> > > If you are making the number of buckets adjustable, you should also
> > > change the hash function to better deal with multiple numbers of
> >
> > Thank you for the advice. When running the test model, I sampled
> > /sys/fs/selinux/avc/hash_stats once per second for a total of 1800 times
> > and analyzed the distribution uniformity of the hash algorithm using the
> > sampled data.
> >
> > Baseline: 512 nodes, 512 buckets
> > Comparison: 8192 nodes, 8192 buckets
> >
> > Metrics (Average value over 1800 samples):
> > * Bucket utilization rate (higher -> better, same chain length assumed)
> > * Baseline: 52.5%
> > * Comparison: 49.5%
> > * Max chain length (lower -> better, positive correlation with worst-case latency)
> > * Baseline: 7.5
> > * Comparison: 11.4
> >
> > Experimental results show that scaling buckets and nodes from 512 to 8192:
> > 1. The distribution uniformity under the current hash algorithm does not
> > degrade significantly;
> > 2. The maximum chain length rise significantly, potentially degrading
> > worst-case performance (ignoring other code in avc_search_node function).
> >
> > Details:
> > url: https://gist.github.com/zhr250/cb7ebca61ff5455098082677d75b1795
> >
> > I will modify the hash algorithm in the avc_hash function and collect data
> > again to see if we can achieve performance improvements.
>
> If you look elsewhere in the SELinux code, you'll see that others have
> been converting other hash tables to using the jhash functions, so may
> want to try those here too.
Or you could follow the example of ss/avtab.c which was converted to
MurmurHash3.
> > > Baseline: 512 nodes, 512 buckets
> > > Comparison: 8192 nodes, 8192 buckets
> > >
> > > Metrics (Average value over 1800 samples):
> > > * Bucket utilization rate (higher -> better, same chain length assumed)
> > > * Baseline: 52.5%
> > > * Comparison: 49.5%
> > > * Max chain length (lower -> better, positive correlation with worst-case latency)
> > > * Baseline: 7.5
> > > * Comparison: 11.4
> > >
> > > Experimental results show that scaling buckets and nodes from 512 to 8192:
> > > 1. The distribution uniformity under the current hash algorithm does not
> > > degrade significantly;
> > > 2. The maximum chain length rise significantly, potentially degrading
> > > worst-case performance (ignoring other code in avc_search_node function).
> > >
> > > Details:
> > > url: https://gist.github.com/zhr250/cb7ebca61ff5455098082677d75b1795
> > >
> > > I will modify the hash algorithm in the avc_hash function and collect data
> > > again to see if we can achieve performance improvements.
> >
> > If you look elsewhere in the SELinux code, you'll see that others have
> > been converting other hash tables to using the jhash functions, so may
> > want to try those here too.
>
> Or you could follow the example of ss/avtab.c which was converted to
> MurmurHash3.
I read the code of jhash and avtab_hash, it seems these algorithms are
more complex, with higher overhead compared to avc_hash. For cases with
a large number of nodes and limited number of buckets, the benefits can
be significant. However, for scenarios with a small number of nodes, the
overhead may already outweigh the gains. In my case, 8192 nodes are
sufficient, and I'm not sure if there are other cases with higher
requirements.
Based on the 'Max chain length' data I presented earlier, if we want the
hash operation and table lookup to yield an overall performance gain, the
overhead of the hash operation should not exceed the cost of traversing
4 additional nodes (11.4 - 7.5 ~= 4). If measured by
'Bucket utilization rate' (~50%, means average 2 nodes per chain), this
overhead should not exceed the cost of traversing 1 extra node. Otherwise,
even if uniform distribution improves lookup performance, the hash
computation overhead could still degrade overall performance.
In scenarios requiring a large number of nodes, it seems necessary to
optimize the hash algorithm to avoid excessive bucket allocation for
maintaining performance, which would otherwise increase memory overhead.
I will first refer to the avtab_hash code to improve the avc_hash
function, and then show you the data. I will collect the following
information based on several different configuration using the same test
model:
Baseline: 512 nodes, 512 buckets, original avc_hash
A: 512 nodes, 512 buckets
B: 8192 nodes, 8192 buckets
C: 8192 nodes, 8192 buckets, original avc_hash
D: 8192 nodes, 4096 buckets ("large number" of nodes in limited buckets)
1. /sys/fs/selinux/avc/hash_stats
a. assess the effectiveness of the optimized algorithm based on A, B
and Baseline. Expect bucket utilization rate: A ~= B > Baseline.
2. total latency of hash operation and table lookup
a. A vs Baseline: expect A is no obvious latency increasing
b. B vs A: expect B is close to A
c. C vs B: expect B is no worse than C
c. D vs C: see if we can save some memory with no obvious latency increasing
On Fri, Sep 12, 2025 at 7:00 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
>
> > > > Baseline: 512 nodes, 512 buckets
> > > > Comparison: 8192 nodes, 8192 buckets
> > > >
> > > > Metrics (Average value over 1800 samples):
> > > > * Bucket utilization rate (higher -> better, same chain length assumed)
> > > > * Baseline: 52.5%
> > > > * Comparison: 49.5%
> > > > * Max chain length (lower -> better, positive correlation with worst-case latency)
> > > > * Baseline: 7.5
> > > > * Comparison: 11.4
> > > >
> > > > Experimental results show that scaling buckets and nodes from 512 to 8192:
> > > > 1. The distribution uniformity under the current hash algorithm does not
> > > > degrade significantly;
> > > > 2. The maximum chain length rise significantly, potentially degrading
> > > > worst-case performance (ignoring other code in avc_search_node function).
> > > >
> > > > Details:
> > > > url: https://gist.github.com/zhr250/cb7ebca61ff5455098082677d75b1795
> > > >
> > > > I will modify the hash algorithm in the avc_hash function and collect data
> > > > again to see if we can achieve performance improvements.
> > >
> > > If you look elsewhere in the SELinux code, you'll see that others have
> > > been converting other hash tables to using the jhash functions, so may
> > > want to try those here too.
> >
> > Or you could follow the example of ss/avtab.c which was converted to
> > MurmurHash3.
>
> I read the code of jhash and avtab_hash, it seems these algorithms are
> more complex, with higher overhead compared to avc_hash. For cases with
> a large number of nodes and limited number of buckets, the benefits can
> be significant. However, for scenarios with a small number of nodes, the
> overhead may already outweigh the gains. In my case, 8192 nodes are
> sufficient, and I'm not sure if there are other cases with higher
> requirements.
>
> Based on the 'Max chain length' data I presented earlier, if we want the
> hash operation and table lookup to yield an overall performance gain, the
> overhead of the hash operation should not exceed the cost of traversing
> 4 additional nodes (11.4 - 7.5 ~= 4). If measured by
> 'Bucket utilization rate' (~50%, means average 2 nodes per chain), this
> overhead should not exceed the cost of traversing 1 extra node. Otherwise,
> even if uniform distribution improves lookup performance, the hash
> computation overhead could still degrade overall performance.
>
> In scenarios requiring a large number of nodes, it seems necessary to
> optimize the hash algorithm to avoid excessive bucket allocation for
> maintaining performance, which would otherwise increase memory overhead.
>
> I will first refer to the avtab_hash code to improve the avc_hash
> function, and then show you the data. I will collect the following
> information based on several different configuration using the same test
> model:
>
> Baseline: 512 nodes, 512 buckets, original avc_hash
> A: 512 nodes, 512 buckets
> B: 8192 nodes, 8192 buckets
> C: 8192 nodes, 8192 buckets, original avc_hash
> D: 8192 nodes, 4096 buckets ("large number" of nodes in limited buckets)
>
> 1. /sys/fs/selinux/avc/hash_stats
> a. assess the effectiveness of the optimized algorithm based on A, B
> and Baseline. Expect bucket utilization rate: A ~= B > Baseline.
> 2. total latency of hash operation and table lookup
> a. A vs Baseline: expect A is no obvious latency increasing
> b. B vs A: expect B is close to A
> c. C vs B: expect B is no worse than C
> c. D vs C: see if we can save some memory with no obvious latency increasing
Thank you, looking forward to the results. Fun fact: the current
avc_hash() function logic hasn't been changed since SELinux was first
merged into Linux 2.6.0-test3.
> > Baseline: 512 nodes, 512 buckets, original avc_hash
> > A: 512 nodes, 512 buckets
> > B: 8192 nodes, 8192 buckets
> > C: 8192 nodes, 8192 buckets, original avc_hash
> > D: 8192 nodes, 4096 buckets ("large number" of nodes in limited buckets)
> >
> > 1. /sys/fs/selinux/avc/hash_stats
> > a. assess the effectiveness of the optimized algorithm based on A, B
> > and Baseline. Expect bucket utilization rate: A ~= B > Baseline.
> > 2. total latency of hash operation and table lookup
> > a. A vs Baseline: expect A is no obvious latency increasing
> > b. B vs A: expect B is close to A
> > c. C vs B: expect B is no worse than C
> > c. D vs C: see if we can save some memory with no obvious latency increasing
>
> Thank you, looking forward to the results. Fun fact: the current
> avc_hash() function logic hasn't been changed since SELinux was first
> merged into Linux 2.6.0-test3.
Yes, I also noticed that. I tried MurmurHash3 and another algorithm (refered to as Muladd below),
it seems performance of Muladd > MurmurHash3 > current algorithm. How
about using Muladd?
Implementation of Muladd:
static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
{
return (ssid * 0x9E3779B9 + tsid * 0x85EBCA77 + tclass * 0xC2B2AE35) & (avc_cache_slots - 1);
}
Note: all test results are based on patch "selinux: Make avc cache slot size configurable during boot"
Here are the results:
1. Bucket utilization rate and length of longest chain
+--------------------------+-----------------------------------------+
| | Bucket utilization rate / longest chain |
| +------------+---------------+------------+
| | original | MurmurHash3 | Muladd |
+--------------------------+------------+---------------+------------+
| 512 nodes, 512 buckets | 52.5%/7.5 | 60.2%/5.7 * | 58.2%/6.2 *|
+--------------------------+------------+---------------+------------+
| 1024 nodes, 512 buckets | 68.9%/12.1 | 80.2%/9.7 | 82.4%/8.9 |
+--------------------------+------------+---------------+------------+
| 2048 nodes, 512 buckets | 83.7%/19.4 | 93.4%/16.3 | 94.8%/15.2 |
+--------------------------+------------+---------------+------------+
| 8192 nodes, 8192 buckets | 49.5%/11.4 | 60.3%/7.4 | 61.9%/6.6 |
+--------------------------+------------+---------------+------------+
2. avc_search_node latency (total latency of hash operation and table lookup)
+--------------------------+-----------------------------------------+
| | latency of function avc_search_node |
| +------------+---------------+------------+
| | original | MurmurHash3 | Muladd |
+--------------------------+------------+---------------+------------+
| 512 nodes, 512 buckets | 87ns | 84ns * | 79ns * |
+--------------------------+------------+---------------+------------+
| 1024 nodes, 512 buckets | 97ns | 96ns | 91ns |
+--------------------------+------------+---------------+------------+
| 2048 nodes, 512 buckets | 118ns | 113ns | 110ns |
+--------------------------+------------+---------------+------------+
| 8192 nodes, 8192 buckets | 106ns | 99ns | 94ns |
+--------------------------+------------+---------------+------------+
The values in the starred cells could be because MurmurHash3 has greater
overhead than Muladd.
Details:
url: https://gist.github.com/zhr250/198717da076a808b5cc78762f27be77e
On Fri, Sep 19, 2025 at 9:58 AM Hongru Zhang <zhanghongru06@gmail.com> wrote:
>
> > > Baseline: 512 nodes, 512 buckets, original avc_hash
> > > A: 512 nodes, 512 buckets
> > > B: 8192 nodes, 8192 buckets
> > > C: 8192 nodes, 8192 buckets, original avc_hash
> > > D: 8192 nodes, 4096 buckets ("large number" of nodes in limited buckets)
> > >
> > > 1. /sys/fs/selinux/avc/hash_stats
> > > a. assess the effectiveness of the optimized algorithm based on A, B
> > > and Baseline. Expect bucket utilization rate: A ~= B > Baseline.
> > > 2. total latency of hash operation and table lookup
> > > a. A vs Baseline: expect A is no obvious latency increasing
> > > b. B vs A: expect B is close to A
> > > c. C vs B: expect B is no worse than C
> > > c. D vs C: see if we can save some memory with no obvious latency increasing
> >
> > Thank you, looking forward to the results. Fun fact: the current
> > avc_hash() function logic hasn't been changed since SELinux was first
> > merged into Linux 2.6.0-test3.
>
> Yes, I also noticed that. I tried MurmurHash3 and another algorithm (refered to as Muladd below),
> it seems performance of Muladd > MurmurHash3 > current algorithm. How
> about using Muladd?
>
> Implementation of Muladd:
> static inline u32 avc_hash(u32 ssid, u32 tsid, u16 tclass)
> {
> return (ssid * 0x9E3779B9 + tsid * 0x85EBCA77 + tclass * 0xC2B2AE35) & (avc_cache_slots - 1);
> }
Can you cite the source of this hash function? Is it public domain or
otherwise GPLv2-compatible?
>
> Note: all test results are based on patch "selinux: Make avc cache slot size configurable during boot"
>
> Here are the results:
> 1. Bucket utilization rate and length of longest chain
> +--------------------------+-----------------------------------------+
> | | Bucket utilization rate / longest chain |
> | +------------+---------------+------------+
> | | original | MurmurHash3 | Muladd |
> +--------------------------+------------+---------------+------------+
> | 512 nodes, 512 buckets | 52.5%/7.5 | 60.2%/5.7 * | 58.2%/6.2 *|
> +--------------------------+------------+---------------+------------+
> | 1024 nodes, 512 buckets | 68.9%/12.1 | 80.2%/9.7 | 82.4%/8.9 |
> +--------------------------+------------+---------------+------------+
> | 2048 nodes, 512 buckets | 83.7%/19.4 | 93.4%/16.3 | 94.8%/15.2 |
> +--------------------------+------------+---------------+------------+
> | 8192 nodes, 8192 buckets | 49.5%/11.4 | 60.3%/7.4 | 61.9%/6.6 |
> +--------------------------+------------+---------------+------------+
>
> 2. avc_search_node latency (total latency of hash operation and table lookup)
> +--------------------------+-----------------------------------------+
> | | latency of function avc_search_node |
> | +------------+---------------+------------+
> | | original | MurmurHash3 | Muladd |
> +--------------------------+------------+---------------+------------+
> | 512 nodes, 512 buckets | 87ns | 84ns * | 79ns * |
> +--------------------------+------------+---------------+------------+
> | 1024 nodes, 512 buckets | 97ns | 96ns | 91ns |
> +--------------------------+------------+---------------+------------+
> | 2048 nodes, 512 buckets | 118ns | 113ns | 110ns |
> +--------------------------+------------+---------------+------------+
> | 8192 nodes, 8192 buckets | 106ns | 99ns | 94ns |
> +--------------------------+------------+---------------+------------+
>
> The values in the starred cells could be because MurmurHash3 has greater
> overhead than Muladd.
>
> Details:
> url: https://gist.github.com/zhr250/198717da076a808b5cc78762f27be77e
Hi Hongru,
kernel test robot noticed the following build warnings:
[auto build test WARNING on pcmoore-selinux/next]
[also build test WARNING on linus/master v6.17-rc4 next-20250905]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Hongru-Zhang/selinux-Make-avc-cache-slot-size-configurable-during-boot/20250905-180729
base: https://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/selinux.git next
patch link: https://lore.kernel.org/r/20250905100454.685866-1-zhanghongru%40xiaomi.com
patch subject: [PATCH] selinux: Make avc cache slot size configurable during boot
config: i386-randconfig-063-20250907 (https://download.01.org/0day-ci/archive/20250907/202509071211.k5n864Gr-lkp@intel.com/config)
compiler: gcc-13 (Debian 13.3.0-16) 13.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250907/202509071211.k5n864Gr-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202509071211.k5n864Gr-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> security/selinux/avc.c:37:5: sparse: sparse: symbol 'avc_cache_slots' was not declared. Should it be static?
vim +/avc_cache_slots +37 security/selinux/avc.c
36
> 37 int avc_cache_slots __ro_after_init = 512;
38 #define AVC_DEF_CACHE_THRESHOLD 512
39 #define AVC_CACHE_RECLAIM 16
40
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > From: Hongru Zhang <zhanghongru@xiaomi.com> > > On mobile device high-load situations ... What are you using for a SELinux policy? -- paul-moore.com
Apologies for the late reply, I sent the patch before leaving work on Friday, and didn't check email all weekend. > On Fri, Sep 5, 2025 at 6:05 AM Hongru Zhang <zhanghongru06@gmail.com> wrote: > > > > From: Hongru Zhang <zhanghongru@xiaomi.com> > > > > On mobile device high-load situations ... > > What are you using for a SELinux policy? Android app smoothness test, testing metrics can quantify app smoothness from the user's perspective. The problem is also reproducible in cold-start test scenarios. These two are widely used testing model. I generate a flamegraph to record the sources of permission check: url: https://gist.github.com/zhr250/9f4415bfdefc8ff0d64e78d96351fffb
© 2016 - 2026 Red Hat, Inc.