[v5] x86/resctrl: monitored closid+rmid together, separate arch/fs locking

[PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by James Morse 2 years, 6 months ago

MPAM's PMG bits extend its PARTID space, meaning the same PMG value can be
used for different control groups.

This means once a CLOSID is allocated, all its monitoring ids may still be
dirty, and held in limbo.

Keep track of the number of RMID held in limbo each CLOSID has. This will
allow a future helper to find the 'cleanest' CLOSID when allocating.

The array is only needed when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is
defined. This will never be the case on x86.

Signed-off-by: James Morse <james.morse@arm.com>
---
Changes since v4:
 * Moved closid_num_dirty_rmid[] update under entry->busy check
 * Take the mutex in dom_data_init() as the caller doesn't.
---
 arch/x86/kernel/cpu/resctrl/monitor.c | 49 +++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index de91ca781d9f..44addc0126fc 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -43,6 +43,13 @@ struct rmid_entry {
  */
 static LIST_HEAD(rmid_free_lru);
 
+/**
+ * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
+ * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
+ * Indexed by CLOSID. Protected by rdtgroup_mutex.
+ */
+static int *closid_num_dirty_rmid;
+
 /**
  * @rmid_limbo_count     count of currently unused but (potentially)
  *     dirty RMIDs.
@@ -285,6 +292,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
 	return 0;
 }
 
+static void limbo_release_entry(struct rmid_entry *entry)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	rmid_limbo_count--;
+	list_add_tail(&entry->list, &rmid_free_lru);
+
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+		closid_num_dirty_rmid[entry->closid]--;
+}
+
 /*
  * Check the RMIDs that are marked as busy for this domain. If the
  * reported LLC occupancy is below the threshold clear the busy bit and
@@ -321,10 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
 
 		if (force_free || !rmid_dirty) {
 			clear_bit(idx, d->rmid_busy_llc);
-			if (!--entry->busy) {
-				rmid_limbo_count--;
-				list_add_tail(&entry->list, &rmid_free_lru);
-			}
+			if (!--entry->busy)
+				limbo_release_entry(entry);
 		}
 		cur_idx = idx + 1;
 	}
@@ -391,6 +407,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 	u64 val = 0;
 	u32 idx;
 
+	lockdep_assert_held(&rdtgroup_mutex);
+
 	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
 
 	entry->busy = 0;
@@ -416,9 +434,11 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 	}
 	put_cpu();
 
-	if (entry->busy)
+	if (entry->busy) {
 		rmid_limbo_count++;
-	else
+		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
+			closid_num_dirty_rmid[entry->closid]++;
+	} else
 		list_add_tail(&entry->list, &rmid_free_lru);
 }
 
@@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
 static int dom_data_init(struct rdt_resource *r)
 {
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
+	u32 num_closid = resctrl_arch_get_num_closid(r);
 	struct rmid_entry *entry = NULL;
 	u32 idx;
 	int i;
 
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		int *tmp;
+
+		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
+		if (!tmp)
+			return -ENOMEM;
+
+		mutex_lock(&rdtgroup_mutex);
+		closid_num_dirty_rmid = tmp;
+		mutex_unlock(&rdtgroup_mutex);
+	}
+
 	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
-	if (!rmid_ptrs)
+	if (!rmid_ptrs) {
+		kfree(closid_num_dirty_rmid);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < idx_limit; i++) {
 		entry = &rmid_ptrs[i];
-- 
2.39.2

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by Fenghua Yu 2 years, 5 months ago

Hi, James,

On 7/28/23 09:42, James Morse wrote:
> MPAM's PMG bits extend its PARTID space, meaning the same PMG value can be
> used for different control groups.
> 
> This means once a CLOSID is allocated, all its monitoring ids may still be
> dirty, and held in limbo.
> 
> Keep track of the number of RMID held in limbo each CLOSID has. This will
> allow a future helper to find the 'cleanest' CLOSID when allocating.
> 
> The array is only needed when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is
> defined. This will never be the case on x86.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v4:
>   * Moved closid_num_dirty_rmid[] update under entry->busy check
>   * Take the mutex in dom_data_init() as the caller doesn't.
> ---
>   arch/x86/kernel/cpu/resctrl/monitor.c | 49 +++++++++++++++++++++++----
>   1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index de91ca781d9f..44addc0126fc 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -43,6 +43,13 @@ struct rmid_entry {
>    */
>   static LIST_HEAD(rmid_free_lru);
>   

Better to add:

#if CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
> +/**
> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
> + */
> +static int *closid_num_dirty_rmid;
#endif

Then the global variable won't exist on x86 to avoid confusion and space.

Some code related to the CONFIG also needs to be changed accordingly.

> +
>   /**
>    * @rmid_limbo_count     count of currently unused but (potentially)
>    *     dirty RMIDs.
> @@ -285,6 +292,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>   	return 0;
>   }
>   
> +static void limbo_release_entry(struct rmid_entry *entry)
> +{
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	rmid_limbo_count--;
> +	list_add_tail(&entry->list, &rmid_free_lru);
> +
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +		closid_num_dirty_rmid[entry->closid]--;


Maybe define some helpers (along with other similar ones) in resctrl.h 
like this:

#ifdef CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
static inline void closid_num_dirty_rmid_dec(struct rmid_entry *entry)
{
		closid_num_dirty_rmid[entry->closid]--;
}
...
#else
static inline void closid_num_dirty_rmid_dec(struct rmid_entry *unused)
{
}
...
#endif

Then directly call the helper here:

+		closid_num_dirty_rmid_dec(entry);

On x86 this is noop without occupy any space and cleaner code.

> +}
> +
>   /*
>    * Check the RMIDs that are marked as busy for this domain. If the
>    * reported LLC occupancy is below the threshold clear the busy bit and
> @@ -321,10 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>   
>   		if (force_free || !rmid_dirty) {
>   			clear_bit(idx, d->rmid_busy_llc);
> -			if (!--entry->busy) {
> -				rmid_limbo_count--;
> -				list_add_tail(&entry->list, &rmid_free_lru);
> -			}
> +			if (!--entry->busy)
> +				limbo_release_entry(entry);
>   		}
>   		cur_idx = idx + 1;
>   	}
> @@ -391,6 +407,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>   	u64 val = 0;
>   	u32 idx;
>   
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
>   	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>   
>   	entry->busy = 0;
> @@ -416,9 +434,11 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>   	}
>   	put_cpu();
>   
> -	if (entry->busy)
> +	if (entry->busy) {
>   		rmid_limbo_count++;
> -	else
> +		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +			closid_num_dirty_rmid[entry->closid]++;

Ditto.

> +	} else
>   		list_add_tail(&entry->list, &rmid_free_lru);
>   }
>   
> @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>   static int dom_data_init(struct rdt_resource *r)
>   {
>   	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +	u32 num_closid = resctrl_arch_get_num_closid(r);
>   	struct rmid_entry *entry = NULL;
>   	u32 idx;
>   	int i;
>   
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
> +		int *tmp;
> +
> +		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
> +		if (!tmp)
> +			return -ENOMEM;
> +
> +		mutex_lock(&rdtgroup_mutex);

data_init() is called in __init. No need to lock here, right?

> +		closid_num_dirty_rmid = tmp;
> +		mutex_unlock(&rdtgroup_mutex);
> +	}
> +

This code is also can be defined as a helper in resctrl.h.

>   	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
> -	if (!rmid_ptrs)
> +	if (!rmid_ptrs) {
> +		kfree(closid_num_dirty_rmid);
>   		return -ENOMEM;
> +	}
>   
>   	for (i = 0; i < idx_limit; i++) {
>   		entry = &rmid_ptrs[i];

Thanks.

-Fenghua

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by James Morse 2 years, 5 months ago

Hi Fenghua

On 15/08/2023 03:37, Fenghua Yu wrote:
> On 7/28/23 09:42, James Morse wrote:
>> MPAM's PMG bits extend its PARTID space, meaning the same PMG value can be
>> used for different control groups.
>>
>> This means once a CLOSID is allocated, all its monitoring ids may still be
>> dirty, and held in limbo.
>>
>> Keep track of the number of RMID held in limbo each CLOSID has. This will
>> allow a future helper to find the 'cleanest' CLOSID when allocating.
>>
>> The array is only needed when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is
>> defined. This will never be the case on x86.

>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index de91ca781d9f..44addc0126fc 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -43,6 +43,13 @@ struct rmid_entry {
>>    */
>>   static LIST_HEAD(rmid_free_lru);
>>   
> 
> Better to add:
> 
> #if CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
>> +/**
>> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
>> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
>> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
>> + */
>> +static int *closid_num_dirty_rmid;
> #endif
> 
> Then the global variable won't exist on x86 to avoid confusion and space.
> 
> Some code related to the CONFIG also needs to be changed accordingly.

Uh-huh, that would force me to put #ifdef warts all over the code that accesses that variable.

Modern compilers are really smart. Because this is static, the compiler is free to remove
it if there are no users. All the users are behind if(IS_ENABLED()), meaning the compilers
dead-code elimination will cull the lot, and this variable too:
morse@eglon:~/kernel/mpam/build_x86_64/fs/resctrl$ nm -s monitor.o | grep closid_num_dirty
morse@eglon:~/kernel/mpam/build_arm64/fs/resctrl$ nm -s monitor.o | grep closid_num_dirty
0000000000000000 b closid_num_dirty_rmid
morse@eglon:~/kernel/mpam/build_arm64/fs/resctrl$


Using #ifdef is not only ugly - it prevents the compiler from seeing all the code, so the
CI build systems get worse coverage.


>> +
>>   /**
>>    * @rmid_limbo_count     count of currently unused but (potentially)
>>    *     dirty RMIDs.
>> @@ -285,6 +292,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct
>> rdt_domain *d,
>>       return 0;
>>   }
>>   +static void limbo_release_entry(struct rmid_entry *entry)
>> +{
>> +    lockdep_assert_held(&rdtgroup_mutex);
>> +
>> +    rmid_limbo_count--;
>> +    list_add_tail(&entry->list, &rmid_free_lru);
>> +
>> +    if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
>> +        closid_num_dirty_rmid[entry->closid]--;
> 
> 
> Maybe define some helpers (along with other similar ones) in resctrl.h like this:
> 
> #ifdef CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID
> static inline void closid_num_dirty_rmid_dec(struct rmid_entry *entry)
> {
>         closid_num_dirty_rmid[entry->closid]--;
> }
> ...
> #else
> static inline void closid_num_dirty_rmid_dec(struct rmid_entry *unused)
> {
> }
> ...
> #endif
> 
> Then directly call the helper here:
> 
> +        closid_num_dirty_rmid_dec(entry);
> 
> On x86 this is noop without

and the compiler knows this.


> occupy any space

Literally more lines of code.


> and cleaner code.

Maybe, this would hide the IS_ENABLED() check - but moving that out as a single use helper
would required closid_num_dirty_rmid[] to be exported from this file - which would prevent
it being optimised out. You'd get the result you were trying to avoid.


>> +}
>> +
>>   /*
>>    * Check the RMIDs that are marked as busy for this domain. If the
>>    * reported LLC occupancy is below the threshold clear the busy bit and
>> @@ -321,10 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>>             if (force_free || !rmid_dirty) {
>>               clear_bit(idx, d->rmid_busy_llc);
>> -            if (!--entry->busy) {
>> -                rmid_limbo_count--;
>> -                list_add_tail(&entry->list, &rmid_free_lru);
>> -            }
>> +            if (!--entry->busy)
>> +                limbo_release_entry(entry);
>>           }
>>           cur_idx = idx + 1;
>>       }
>> @@ -391,6 +407,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>>       u64 val = 0;
>>       u32 idx;
>>   +    lockdep_assert_held(&rdtgroup_mutex);
>> +
>>       idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>>         entry->busy = 0;
>> @@ -416,9 +434,11 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>>       }
>>       put_cpu();
>>   -    if (entry->busy)
>> +    if (entry->busy) {
>>           rmid_limbo_count++;
>> -    else
>> +        if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
>> +            closid_num_dirty_rmid[entry->closid]++;
> 
> Ditto.
> 
>> +    } else
>>           list_add_tail(&entry->list, &rmid_free_lru);
>>   }
>>   @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned
>> long delay_ms)
>>   static int dom_data_init(struct rdt_resource *r)
>>   {
>>       u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>> +    u32 num_closid = resctrl_arch_get_num_closid(r);
>>       struct rmid_entry *entry = NULL;
>>       u32 idx;
>>       int i;
>>   +    if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
>> +        int *tmp;
>> +
>> +        tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
>> +        if (!tmp)
>> +            return -ENOMEM;
>> +
>> +        mutex_lock(&rdtgroup_mutex);

> data_init() is called in __init. No need to lock here, right?

__init code can still race with other callers - especially as there are
CPUHP_AP_ONLINE_DYN cpuhp callbacks that are expected to sleep.

This is about ensuring all accesses to those global variables are protected by the lock.
This saves me a memory ordering headache.


Thanks,

James

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by Fenghua Yu 2 years, 5 months ago

Hi, James,

On 7/28/23 09:42, James Morse wrote:
> MPAM's PMG bits extend its PARTID space, meaning the same PMG value can be
> used for different control groups.
> 
> This means once a CLOSID is allocated, all its monitoring ids may still be
> dirty, and held in limbo.
> 
> Keep track of the number of RMID held in limbo each CLOSID has. This will
> allow a future helper to find the 'cleanest' CLOSID when allocating.
> 
> The array is only needed when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is
> defined. This will never be the case on x86.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---
> Changes since v4:
>   * Moved closid_num_dirty_rmid[] update under entry->busy check
>   * Take the mutex in dom_data_init() as the caller doesn't.
> ---
>   arch/x86/kernel/cpu/resctrl/monitor.c | 49 +++++++++++++++++++++++----
>   1 file changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index de91ca781d9f..44addc0126fc 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -43,6 +43,13 @@ struct rmid_entry {
>    */
>   static LIST_HEAD(rmid_free_lru);
>   
> +/**
> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
> + */
> +static int *closid_num_dirty_rmid;
> +
>   /**
>    * @rmid_limbo_count     count of currently unused but (potentially)
>    *     dirty RMIDs.
> @@ -285,6 +292,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>   	return 0;
>   }
>   
> +static void limbo_release_entry(struct rmid_entry *entry)
> +{
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	rmid_limbo_count--;
> +	list_add_tail(&entry->list, &rmid_free_lru);
> +
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +		closid_num_dirty_rmid[entry->closid]--;
> +}
> +
>   /*
>    * Check the RMIDs that are marked as busy for this domain. If the
>    * reported LLC occupancy is below the threshold clear the busy bit and
> @@ -321,10 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>   
>   		if (force_free || !rmid_dirty) {
>   			clear_bit(idx, d->rmid_busy_llc);
> -			if (!--entry->busy) {
> -				rmid_limbo_count--;
> -				list_add_tail(&entry->list, &rmid_free_lru);
> -			}
> +			if (!--entry->busy)
> +				limbo_release_entry(entry);
>   		}
>   		cur_idx = idx + 1;
>   	}
> @@ -391,6 +407,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>   	u64 val = 0;
>   	u32 idx;
>   
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
>   	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>   
>   	entry->busy = 0;
> @@ -416,9 +434,11 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>   	}
>   	put_cpu();
>   
> -	if (entry->busy)
> +	if (entry->busy) {
>   		rmid_limbo_count++;
> -	else
> +		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +			closid_num_dirty_rmid[entry->closid]++;
> +	} else
>   		list_add_tail(&entry->list, &rmid_free_lru);

Unbalanced braces in if-else. Need to add braces in "else".

>   }
>   
> @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>   static int dom_data_init(struct rdt_resource *r)
>   {
>   	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +	u32 num_closid = resctrl_arch_get_num_closid(r);
>   	struct rmid_entry *entry = NULL;
>   	u32 idx;
>   	int i;
>   
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
> +		int *tmp;
> +
> +		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
> +		if (!tmp)
> +			return -ENOMEM;
> +
> +		mutex_lock(&rdtgroup_mutex);
> +		closid_num_dirty_rmid = tmp;
> +		mutex_unlock(&rdtgroup_mutex);
> +	}
> +
>   	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
> -	if (!rmid_ptrs)
> +	if (!rmid_ptrs) {
> +		kfree(closid_num_dirty_rmid);
>   		return -ENOMEM;
> +	}
>   
>   	for (i = 0; i < idx_limit; i++) {
>   		entry = &rmid_ptrs[i];

Thanks.

-Fenghua

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by Reinette Chatre 2 years, 6 months ago

Hi James,

On 7/28/2023 9:42 AM, James Morse wrote:
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index de91ca781d9f..44addc0126fc 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -43,6 +43,13 @@ struct rmid_entry {
>   */
>  static LIST_HEAD(rmid_free_lru);
>  
> +/**
> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
> + */
> +static int *closid_num_dirty_rmid;
> +

Will the values ever be negative?

>  /**
>   * @rmid_limbo_count     count of currently unused but (potentially)
>   *     dirty RMIDs.
> @@ -285,6 +292,17 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
>  	return 0;
>  }
>  
> +static void limbo_release_entry(struct rmid_entry *entry)
> +{
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	rmid_limbo_count--;
> +	list_add_tail(&entry->list, &rmid_free_lru);
> +
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +		closid_num_dirty_rmid[entry->closid]--;
> +}
> +
>  /*
>   * Check the RMIDs that are marked as busy for this domain. If the
>   * reported LLC occupancy is below the threshold clear the busy bit and
> @@ -321,10 +339,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
>  
>  		if (force_free || !rmid_dirty) {
>  			clear_bit(idx, d->rmid_busy_llc);
> -			if (!--entry->busy) {
> -				rmid_limbo_count--;
> -				list_add_tail(&entry->list, &rmid_free_lru);
> -			}
> +			if (!--entry->busy)
> +				limbo_release_entry(entry);
>  		}
>  		cur_idx = idx + 1;
>  	}
> @@ -391,6 +407,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>  	u64 val = 0;
>  	u32 idx;
>  
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
>  	idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
>  
>  	entry->busy = 0;
> @@ -416,9 +434,11 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
>  	}
>  	put_cpu();
>  
> -	if (entry->busy)
> +	if (entry->busy) {
>  		rmid_limbo_count++;
> -	else
> +		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
> +			closid_num_dirty_rmid[entry->closid]++;
> +	} else
>  		list_add_tail(&entry->list, &rmid_free_lru);
>  }

This new addition breaks the coding style with the last statement
now also needing a brace.

>  
> @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>  static int dom_data_init(struct rdt_resource *r)
>  {
>  	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
> +	u32 num_closid = resctrl_arch_get_num_closid(r);
>  	struct rmid_entry *entry = NULL;
>  	u32 idx;
>  	int i;
>  
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
> +		int *tmp;
> +
> +		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
> +		if (!tmp)
> +			return -ENOMEM;
> +
> +		mutex_lock(&rdtgroup_mutex);
> +		closid_num_dirty_rmid = tmp;
> +		mutex_unlock(&rdtgroup_mutex);
> +	}
> +

It does no harm but I cannot see why the mutex is needed here. 

>  	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
> -	if (!rmid_ptrs)
> +	if (!rmid_ptrs) {
> +		kfree(closid_num_dirty_rmid);
>  		return -ENOMEM;
> +	}
>  
>  	for (i = 0; i < idx_limit; i++) {
>  		entry = &rmid_ptrs[i];

How will this new memory be freed? Actually I cannot find where
rmid_ptrs is freed either .... is a "dom_data_free()" needed?

Reinette

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by James Morse 2 years, 5 months ago

Hi Reinette,

On 09/08/2023 23:33, Reinette Chatre wrote:
> On 7/28/2023 9:42 AM, James Morse wrote:
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index de91ca781d9f..44addc0126fc 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -43,6 +43,13 @@ struct rmid_entry {
>>   */
>>  static LIST_HEAD(rmid_free_lru);
>>  
>> +/**
>> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
>> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
>> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
>> + */
>> +static int *closid_num_dirty_rmid;
>> +
> 
> Will the values ever be negative?

Nope, int is just fewer keystrokes. I'll change it to unsigned int.

>>  /**
>>   * @rmid_limbo_count     count of currently unused but (potentially)
>>   *     dirty RMIDs.

>> @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>>  static int dom_data_init(struct rdt_resource *r)
>>  {
>>  	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>> +	u32 num_closid = resctrl_arch_get_num_closid(r);
>>  	struct rmid_entry *entry = NULL;
>>  	u32 idx;
>>  	int i;
>>  
>> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
>> +		int *tmp;
>> +
>> +		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
>> +		if (!tmp)
>> +			return -ENOMEM;
>> +
>> +		mutex_lock(&rdtgroup_mutex);
>> +		closid_num_dirty_rmid = tmp;
>> +		mutex_unlock(&rdtgroup_mutex);
>> +	}
>> +
> 
> It does no harm but I cannot see why the mutex is needed here. 

It's belt-and-braces to ensure that all accesses to that global variable are protected by
that lock. This avoids giving me a memory ordering headache.
rmid_ptrs and the call to __rmid_entry() that dereferences it should probably get the same
treatment.

I'll move the locking to the caller as the least-churny way of covering both.

>>  	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
>> -	if (!rmid_ptrs)
>> +	if (!rmid_ptrs) {
>> +		kfree(closid_num_dirty_rmid);
>>  		return -ENOMEM;
>> +	}
>>  
>>  	for (i = 0; i < idx_limit; i++) {
>>  		entry = &rmid_ptrs[i];
> 
> How will this new memory be freed? Actually I cannot find where
> rmid_ptrs is freed either .... is a "dom_data_free()" needed?

Oh that's not deliberate? :P

rmid_ptrs has been immortal since the beginning. The good news is resctrl_exit() goes in
the exitcall section, which is in the DISCARDS section of the linker script as resctrl
can't be built as a module. It isn't possible to tear resctrl down, so no-one will notice
this leak.

Something on my eternal-todo-list is to make the filesystem parts of resctrl a loadable
module (if Tony doesn't get there first!). That would flush this sort of thing out.
Last time I triggered resctrl_exit() manually not all of the files got cleaned up - I
haven't investigated it further.

I agree it should probably have a kfree() call somewhere under rdtgroup_exit(), as its
only the L3 that needs any of this, I'll add resctrl_exit_mon_l3_config() for
rdtgroup_exit() to call.

Another option is to rip out all the __exit text as its discarded anyway. But if loadable
modules is the direction of travel, it probably make more sense to fix it.

Thanks,

James

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by Tony Luck 2 years, 5 months ago

On Thu, Aug 24, 2023 at 05:53:03PM +0100, James Morse wrote:
> Something on my eternal-todo-list is to make the filesystem parts of resctrl a loadable
> module (if Tony doesn't get there first!). That would flush this sort of thing out.
> Last time I triggered resctrl_exit() manually not all of the files got cleaned up - I
> haven't investigated it further.

James,

I looked at going to a full loadable module approach for about 3 seconds,
and found none of the kernfs support functions are exported. So I also
put that on the eternal-todo-list :-)

There are possibly a few other functions that need exporting like
get_cpu_cacheinfo(), and two or three others from the "perf"
code for pseudo-lock debugfs support.

-Tony

P.S. Latest version of my re-write is at:

https://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git/log/?h=resctrl2_v65rc7

Well, almost latest. I haven't pushed the changes to auto-load all the
modules for basic X86 functions based on X86_FEATURE_* bits.

Re: [PATCH v5 06/24] x86/resctrl: Track the number of dirty RMID a CLOSID has

Posted by Reinette Chatre 2 years, 5 months ago

Hi James,

On 8/24/2023 9:53 AM, James Morse wrote:
> Hi Reinette,
> 
> On 09/08/2023 23:33, Reinette Chatre wrote:
>> On 7/28/2023 9:42 AM, James Morse wrote:
>>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>>> index de91ca781d9f..44addc0126fc 100644
>>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>>> @@ -43,6 +43,13 @@ struct rmid_entry {
>>>   */
>>>  static LIST_HEAD(rmid_free_lru);
>>>  
>>> +/**
>>> + * @closid_num_dirty_rmid    The number of dirty RMID each CLOSID has.
>>> + * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
>>> + * Indexed by CLOSID. Protected by rdtgroup_mutex.
>>> + */
>>> +static int *closid_num_dirty_rmid;
>>> +
>>
>> Will the values ever be negative?
> 
> Nope, int is just fewer keystrokes. I'll change it to unsigned int.
> 
> 
>>>  /**
>>>   * @rmid_limbo_count     count of currently unused but (potentially)
>>>   *     dirty RMIDs.
> 
> 
>>> @@ -782,13 +802,28 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms)
>>>  static int dom_data_init(struct rdt_resource *r)
>>>  {
>>>  	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
>>> +	u32 num_closid = resctrl_arch_get_num_closid(r);
>>>  	struct rmid_entry *entry = NULL;
>>>  	u32 idx;
>>>  	int i;
>>>  
>>> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
>>> +		int *tmp;
>>> +
>>> +		tmp = kcalloc(num_closid, sizeof(int), GFP_KERNEL);
>>> +		if (!tmp)
>>> +			return -ENOMEM;
>>> +
>>> +		mutex_lock(&rdtgroup_mutex);
>>> +		closid_num_dirty_rmid = tmp;
>>> +		mutex_unlock(&rdtgroup_mutex);
>>> +	}
>>> +
>>
>> It does no harm but I cannot see why the mutex is needed here. 
> 
> It's belt-and-braces to ensure that all accesses to that global variable are protected by
> that lock. This avoids giving me a memory ordering headache.
> rmid_ptrs and the call to __rmid_entry() that dereferences it should probably get the same
> treatment.

This is fair.

> I'll move the locking to the caller as the least-churny way of covering both.

This is not clear to me. From what I can tell all the sites you mention
are in dom_data_init() so keeping the locking there (but covering the
additional sites) seem appropriate?

> 
>>>  	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
>>> -	if (!rmid_ptrs)
>>> +	if (!rmid_ptrs) {
>>> +		kfree(closid_num_dirty_rmid);
>>>  		return -ENOMEM;
>>> +	}
>>>  
>>>  	for (i = 0; i < idx_limit; i++) {
>>>  		entry = &rmid_ptrs[i];
>>
>> How will this new memory be freed? Actually I cannot find where
>> rmid_ptrs is freed either .... is a "dom_data_free()" needed?
> 
> Oh that's not deliberate? :P
> 
> rmid_ptrs has been immortal since the beginning. The good news is resctrl_exit() goes in
> the exitcall section, which is in the DISCARDS section of the linker script as resctrl
> can't be built as a module. It isn't possible to tear resctrl down, so no-one will notice
> this leak.
> 
> Something on my eternal-todo-list is to make the filesystem parts of resctrl a loadable
> module (if Tony doesn't get there first!). That would flush this sort of thing out.
> Last time I triggered resctrl_exit() manually not all of the files got cleaned up - I
> haven't investigated it further.
> 
> 
> I agree it should probably have a kfree() call somewhere under rdtgroup_exit(), as its
> only the L3 that needs any of this, I'll add resctrl_exit_mon_l3_config() for
> rdtgroup_exit() to call.

I'd prefer that allocation and free are clearly symmetrical. Doing so helps
to make the code easier to understand. rdtgroup_exit() is intended to clean
up after rdtgroup_init(). Since this allocation does not occur within rdtgroup_init()
I do not think rdtgroup_exit() is the best place for this cleanup. resctrl_exit() looks
more appropriate to me. Having a dom_data_free() to clean up after a dom_data_init() also
seems like an addition that will help to make the code easier to understand but that
is without a clear understanding about what you have in mind for
resctrl_exit_mon_l3_config().

> 
> Another option is to rip out all the __exit text as its discarded anyway. But if loadable
> modules is the direction of travel, it probably make more sense to fix it.

My preference is to do the cleanup properly.

Reinette