xsm/flask: add AVC pre-allocation boot parameter

[RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Sergiy Kibrik 2 months, 2 weeks ago

In the system configured for safety it may not be desirable to do guest-triggered
dynamic memory allocations at runtime, or allow changing memory layout by
adjusting AVC size after boot.
This patch introduces `avc_prealloc` boot parameter that enables allocation of
AVC nodes in advance so that no Xen heap allocations take place at runtime.

In case when pre-allocated cache is exhausted the reclaim of old nodes is
started, just as before, but no new node added to cache while cache is full.
AV is computed and returned to requester, but not cached during the reclaim.
So there's some performance penalty here.

Signed-off-by: Sergiy Kibrik <Sergiy_Kibrik@epam.com>
---
This RFC presents one possible way to hard limit the memory AVC can consume.
With current implementation of AV cache threshold reclaim of memory only starts
after threshold has been reached, and we only reclaim up to AVC_CACHE_RECLAIM=16
nodes at a time.
As allocation of new nodes is always allowed, in a hypothetical situation of
great AVC pressure when more then AVC_CACHE_RECLAIM new nodes are quickly
requested while reclaim is still in progress, it won't be possible to decrease
allocation below threshold.
Does this scenario make sense? Or is there a better way to address it? Thank you!

  -Sergiy
---
 docs/misc/xen-command-line.pandoc |  9 +++++++
 xen/xsm/flask/avc.c               | 44 ++++++++++++++++++++++++++-----
 xen/xsm/flask/flask_op.c          |  3 +++
 xen/xsm/flask/include/avc.h       |  2 ++
 4 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index a75b6c9301..9044827e78 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
 Specify a maximum amount of available memory, to which Xen will clamp
 the e820 table.
 
+### avc_prealloc
+> `= <boolean>`
+
+> Default: `false`
+
+Allocate XSM Access Vector Cache at boot. This forbids runtime dynamic
+allocation of AVC nodes from Xen heap and changing AVC size via
+FLASK_SETAVC_THRESHOLD hypercall.
+
 ### badpage
 > `= List of [ <integer> | <integer>-<integer> ]`
 
diff --git a/xen/xsm/flask/avc.c b/xen/xsm/flask/avc.c
index 3d39e55cae..19e524fe35 100644
--- a/xen/xsm/flask/avc.c
+++ b/xen/xsm/flask/avc.c
@@ -21,6 +21,7 @@
 #include <xen/types.h>
 #include <xen/list.h>
 #include <xen/spinlock.h>
+#include <xen/param.h>
 #include <xen/prefetch.h>
 #include <xen/kernel.h>
 #include <xen/sched.h>
@@ -84,6 +85,7 @@ struct avc_cache {
     atomic_t        lru_hint;    /* LRU hint for reclaim scan */
     atomic_t        active_nodes;
     u32            latest_notif;    /* latest revocation notification */
+    struct hlist_head freelist;
 };
 
 /* Exported via Flask hypercall */
@@ -97,6 +99,9 @@ static struct avc_cache avc_cache;
 
 static DEFINE_RCU_READ_LOCK(avc_rcu_lock);
 
+bool __read_mostly opt_avc_prealloc = false;
+boolean_param("avc_prealloc", opt_avc_prealloc);
+
 static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass)
 {
     return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1);
@@ -239,6 +244,16 @@ void __init avc_init(void)
     }
     atomic_set(&avc_cache.active_nodes, 0);
     atomic_set(&avc_cache.lru_hint, 0);
+    INIT_HLIST_HEAD(&avc_cache.freelist);
+    for ( i = 0; opt_avc_prealloc && i < avc_cache_threshold + 1; i++ )
+    {
+        struct avc_node *node = xzalloc(struct avc_node);
+        if ( !node )
+        {
+            break;
+        }
+        hlist_add_head(&node->list, &avc_cache.freelist);
+    }
 }
 
 int avc_get_hash_stats(struct xen_flask_hash_stats *arg)
@@ -280,7 +295,10 @@ int avc_get_hash_stats(struct xen_flask_hash_stats *arg)
 static void cf_check avc_node_free(struct rcu_head *rhead)
 {
     struct avc_node *node = container_of(rhead, struct avc_node, rhead);
-    xfree(node);
+    if ( opt_avc_prealloc )
+        hlist_add_head(&node->list, &avc_cache.freelist);
+    else
+        xfree(node);
     avc_cache_stats_incr(frees);
 }
 
@@ -293,7 +311,10 @@ static void avc_node_delete(struct avc_node *node)
 
 static void avc_node_kill(struct avc_node *node)
 {
-    xfree(node);
+    if ( opt_avc_prealloc )
+        hlist_add_head(&node->list, &avc_cache.freelist);
+    else
+        xfree(node);
     avc_cache_stats_incr(frees);
     atomic_dec(&avc_cache.active_nodes);
 }
@@ -344,11 +365,22 @@ static inline int avc_reclaim_node(void)
 
 static struct avc_node *avc_alloc_node(void)
 {
-    struct avc_node *node;
+    struct avc_node *node = NULL;
 
-    node = xzalloc(struct avc_node);
-    if (!node)
-        goto out;
+    if ( opt_avc_prealloc )
+    {
+        if ( hlist_empty(&avc_cache.freelist) )
+            goto out;
+
+        node = hlist_entry(avc_cache.freelist.first, struct avc_node, list);
+        hlist_del(&node->list);
+    }
+    else
+    {
+        node = xzalloc(struct avc_node);
+        if ( !node )
+            goto out;
+    }
 
     INIT_RCU_HEAD(&node->rhead);
     INIT_HLIST_NODE(&node->list);
diff --git a/xen/xsm/flask/flask_op.c b/xen/xsm/flask/flask_op.c
index ea7dd10dc8..5415a696a0 100644
--- a/xen/xsm/flask/flask_op.c
+++ b/xen/xsm/flask/flask_op.c
@@ -226,6 +226,9 @@ static int flask_security_setavc_threshold(
 {
     int rv = 0;
 
+    if ( opt_avc_prealloc )
+        return -EACCES;
+
     if ( arg->threshold != avc_cache_threshold )
     {
         rv = domain_has_security(current->domain, SECURITY__SETSECPARAM);
diff --git a/xen/xsm/flask/include/avc.h b/xen/xsm/flask/include/avc.h
index e29949f5a8..d6122f3dc9 100644
--- a/xen/xsm/flask/include/avc.h
+++ b/xen/xsm/flask/include/avc.h
@@ -95,6 +95,8 @@ int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, u32 requested,
 struct xen_flask_hash_stats;
 int avc_get_hash_stats(struct xen_flask_hash_stats *arg);
 extern unsigned int avc_cache_threshold;
+/* pre-allocate AVC at boot and forbid SETAVC_THRESHOLD flask_op */
+extern bool opt_avc_prealloc;
 
 #ifdef CONFIG_XSM_FLASK_AVC_STATS
 DECLARE_PER_CPU(struct avc_cache_stats, avc_cache_stats);
-- 
2.25.1

Re: [RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Jan Beulich 2 months, 2 weeks ago

On 15.08.2025 12:23, Sergiy Kibrik wrote:
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
>  Specify a maximum amount of available memory, to which Xen will clamp
>  the e820 table.
>  
> +### avc_prealloc

In addition to what Andrew said, please prefer dashes over underscores in new
options.

> @@ -97,6 +99,9 @@ static struct avc_cache avc_cache;
>  
>  static DEFINE_RCU_READ_LOCK(avc_rcu_lock);
>  
> +bool __read_mostly opt_avc_prealloc = false;

__ro_after_init?

Jan

Re: [RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Sergiy Kibrik 2 months, 1 week ago

15.08.25 15:20, Jan Beulich:
> On 15.08.2025 12:23, Sergiy Kibrik wrote:
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
>>   Specify a maximum amount of available memory, to which Xen will clamp
>>   the e820 table.
>>   
>> +### avc_prealloc
> 
> In addition to what Andrew said, please prefer dashes over underscores in new
> options.
> 
>> @@ -97,6 +99,9 @@ static struct avc_cache avc_cache;
>>   
>>   static DEFINE_RCU_READ_LOCK(avc_rcu_lock);
>>   
>> +bool __read_mostly opt_avc_prealloc = false;
> 
> __ro_after_init?
> 

sure, will do that. Thank you!

   -Sergiy

Re: [RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Andrew Cooper 2 months, 2 weeks ago

On 15/08/2025 11:23 am, Sergiy Kibrik wrote:
> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
> index a75b6c9301..9044827e78 100644
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
>  Specify a maximum amount of available memory, to which Xen will clamp
>  the e820 table.
>  
> +### avc_prealloc
> +> `= <boolean>`
> +
> +> Default: `false`
> +
> +Allocate XSM Access Vector Cache at boot. This forbids runtime dynamic
> +allocation of AVC nodes from Xen heap and changing AVC size via
> +FLASK_SETAVC_THRESHOLD hypercall.

I don't have any input on memory allocation side of things, but this
needs to be a sub-option under the existing flask=, and it looks like
you're going to need to turn it into a comma separated list.

Also, if you actually want to use Flask in a safety system, Flask needs
to become security supported in Xen.

~Andrew

Re: [RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Andrew Cooper 2 months, 2 weeks ago

On 15/08/2025 12:21 pm, Andrew Cooper wrote:
> On 15/08/2025 11:23 am, Sergiy Kibrik wrote:
>> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
>> index a75b6c9301..9044827e78 100644
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
>>  Specify a maximum amount of available memory, to which Xen will clamp
>>  the e820 table.
>>  
>> +### avc_prealloc
>> +> `= <boolean>`
>> +
>> +> Default: `false`
>> +
>> +Allocate XSM Access Vector Cache at boot. This forbids runtime dynamic
>> +allocation of AVC nodes from Xen heap and changing AVC size via
>> +FLASK_SETAVC_THRESHOLD hypercall.
> I don't have any input on memory allocation side of things, but this
> needs to be a sub-option under the existing flask=, and it looks like
> you're going to need to turn it into a comma separated list.
>
> Also, if you actually want to use Flask in a safety system, Flask needs
> to become security supported in Xen.

Sorry, sent a little too early.  x86's dom0= is probably the closes good
example to follow, having both comma separated booleans and a choice-of-$N.

~Andrew

Re: [RFC PATCH] xsm/flask: add AVC pre-allocation boot parameter

Posted by Sergiy Kibrik 2 months, 1 week ago


15.08.25 14:43, Andrew Cooper:
> On 15/08/2025 12:21 pm, Andrew Cooper wrote:
>> On 15/08/2025 11:23 am, Sergiy Kibrik wrote:
>>> diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
>>> index a75b6c9301..9044827e78 100644
>>> --- a/docs/misc/xen-command-line.pandoc
>>> +++ b/docs/misc/xen-command-line.pandoc
>>> @@ -238,6 +238,15 @@ loops for Queued Invalidation completions.**
>>>   Specify a maximum amount of available memory, to which Xen will clamp
>>>   the e820 table.
>>>   
>>> +### avc_prealloc
>>> +> `= <boolean>`
>>> +
>>> +> Default: `false`
>>> +
>>> +Allocate XSM Access Vector Cache at boot. This forbids runtime dynamic
>>> +allocation of AVC nodes from Xen heap and changing AVC size via
>>> +FLASK_SETAVC_THRESHOLD hypercall.
>> I don't have any input on memory allocation side of things, but this
>> needs to be a sub-option under the existing flask=, and it looks like
>> you're going to need to turn it into a comma separated list.
>>
>> Also, if you actually want to use Flask in a safety system, Flask needs
>> to become security supported in Xen.
> 
> Sorry, sent a little too early.  x86's dom0= is probably the closes good
> example to follow, having both comma separated booleans and a choice-of-$N.
> 

yes, I'll try to integrate that option into flask=

  -Sergiy