[patch V2 14/37] rseq: Cache CPU ID and MM CID values

Thomas Gleixner posted 37 patches 1 month, 1 week ago
There is a newer version of this series
[patch V2 14/37] rseq: Cache CPU ID and MM CID values
Posted by Thomas Gleixner 1 month, 1 week ago
In preparation for rewriting RSEQ exit to user space handling provide
storage to cache the CPU ID and MM CID values which were written to user
space. That prepares for a quick check, which avoids the update when
nothing changed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rseq.h        |    3 +++
 include/linux/rseq_types.h  |   19 +++++++++++++++++++
 include/linux/sched.h       |    1 +
 include/trace/events/rseq.h |    4 ++--
 kernel/rseq.c               |    4 ++++
 5 files changed, 29 insertions(+), 2 deletions(-)

--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -64,11 +64,13 @@ static inline void rseq_fork(struct task
 		t->rseq = NULL;
 		t->rseq_len = 0;
 		t->rseq_sig = 0;
+		t->rseq_ids.cpu_cid = ~0ULL;
 		t->rseq_event.all = 0;
 	} else {
 		t->rseq = current->rseq;
 		t->rseq_len = current->rseq_len;
 		t->rseq_sig = current->rseq_sig;
+		t->rseq_ids.cpu_cid = ~0ULL;
 		t->rseq_event = current->rseq_event;
 	}
 }
@@ -78,6 +80,7 @@ static inline void rseq_execve(struct ta
 	t->rseq = NULL;
 	t->rseq_len = 0;
 	t->rseq_sig = 0;
+	t->rseq_ids.cpu_cid = ~0ULL;
 	t->rseq_event.all = 0;
 }
 
--- a/include/linux/rseq_types.h
+++ b/include/linux/rseq_types.h
@@ -27,4 +27,23 @@ struct rseq_event {
 	};
 };
 
+/*
+ * struct rseq_ids - Cache for ids, which need to be updated
+ * @cpu_cid:	Compound of @cpu_id and @mm_cid to make the
+ *		compiler emit a single compare on 64-bit
+ * @cpu_id:	The CPU ID which was written last to user space
+ * @mm_cid:	The MM CID which was written last to user space
+ *
+ * @cpu_id and @mm_cid are updated when the data is written to user space.
+ */
+struct rseq_ids {
+	union {
+		u64		cpu_cid;
+		struct {
+			u32	cpu_id;
+			u32	mm_cid;
+		};
+	};
+};
+
 #endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1406,6 +1406,7 @@ struct task_struct {
 	u32				rseq_len;
 	u32				rseq_sig;
 	struct rseq_event		rseq_event;
+	struct rseq_ids			rseq_ids;
 # ifdef CONFIG_DEBUG_RSEQ
 	/*
 	 * This is a place holder to save a copy of the rseq fields for
--- a/include/trace/events/rseq.h
+++ b/include/trace/events/rseq.h
@@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
 	),
 
 	TP_fast_assign(
-		__entry->cpu_id = raw_smp_processor_id();
+		__entry->cpu_id = t->rseq_ids.cpu_id;
 		__entry->node_id = cpu_to_node(__entry->cpu_id);
-		__entry->mm_cid = task_mm_cid(t);
+		__entry->mm_cid = t->rseq_ids.mm_cid;
 	),
 
 	TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -184,6 +184,10 @@ static int rseq_update_cpu_node_id(struc
 	rseq_unsafe_put_user(t, node_id, node_id, efault_end);
 	rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
 
+	/* Cache the user space values */
+	t->rseq_ids.cpu_id = cpu_id;
+	t->rseq_ids.mm_cid = mm_cid;
+
 	/*
 	 * Additional feature fields added after ORIG_RSEQ_SIZE
 	 * need to be conditionally updated only if
Re: [patch V2 14/37] rseq: Cache CPU ID and MM CID values
Posted by Mathieu Desnoyers 1 month, 1 week ago
On 2025-08-23 12:39, Thomas Gleixner wrote:
> In preparation for rewriting RSEQ exit to user space handling provide
> storage to cache the CPU ID and MM CID values which were written to user
> space. That prepares for a quick check, which avoids the update when
> nothing changed.

What should we do about the numa node_id field ?

On pretty much all arch except powerpc (AFAIK) it's invariant for
the topology, so derived from cpu_id.

On powerpc, we could perhaps reset the cached cpu_id to ~0U for
each thread to trigger an update ? Or just don't care about this ?

Thanks,

Mathieu

> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   include/linux/rseq.h        |    3 +++
>   include/linux/rseq_types.h  |   19 +++++++++++++++++++
>   include/linux/sched.h       |    1 +
>   include/trace/events/rseq.h |    4 ++--
>   kernel/rseq.c               |    4 ++++
>   5 files changed, 29 insertions(+), 2 deletions(-)
> 
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -64,11 +64,13 @@ static inline void rseq_fork(struct task
>   		t->rseq = NULL;
>   		t->rseq_len = 0;
>   		t->rseq_sig = 0;
> +		t->rseq_ids.cpu_cid = ~0ULL;
>   		t->rseq_event.all = 0;
>   	} else {
>   		t->rseq = current->rseq;
>   		t->rseq_len = current->rseq_len;
>   		t->rseq_sig = current->rseq_sig;
> +		t->rseq_ids.cpu_cid = ~0ULL;
>   		t->rseq_event = current->rseq_event;
>   	}
>   }
> @@ -78,6 +80,7 @@ static inline void rseq_execve(struct ta
>   	t->rseq = NULL;
>   	t->rseq_len = 0;
>   	t->rseq_sig = 0;
> +	t->rseq_ids.cpu_cid = ~0ULL;
>   	t->rseq_event.all = 0;
>   }
>   
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -27,4 +27,23 @@ struct rseq_event {
>   	};
>   };
>   
> +/*
> + * struct rseq_ids - Cache for ids, which need to be updated
> + * @cpu_cid:	Compound of @cpu_id and @mm_cid to make the
> + *		compiler emit a single compare on 64-bit
> + * @cpu_id:	The CPU ID which was written last to user space
> + * @mm_cid:	The MM CID which was written last to user space
> + *
> + * @cpu_id and @mm_cid are updated when the data is written to user space.
> + */
> +struct rseq_ids {
> +	union {
> +		u64		cpu_cid;
> +		struct {
> +			u32	cpu_id;
> +			u32	mm_cid;
> +		};
> +	};
> +};
> +
>   #endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1406,6 +1406,7 @@ struct task_struct {
>   	u32				rseq_len;
>   	u32				rseq_sig;
>   	struct rseq_event		rseq_event;
> +	struct rseq_ids			rseq_ids;
>   # ifdef CONFIG_DEBUG_RSEQ
>   	/*
>   	 * This is a place holder to save a copy of the rseq fields for
> --- a/include/trace/events/rseq.h
> +++ b/include/trace/events/rseq.h
> @@ -21,9 +21,9 @@ TRACE_EVENT(rseq_update,
>   	),
>   
>   	TP_fast_assign(
> -		__entry->cpu_id = raw_smp_processor_id();
> +		__entry->cpu_id = t->rseq_ids.cpu_id;
>   		__entry->node_id = cpu_to_node(__entry->cpu_id);
> -		__entry->mm_cid = task_mm_cid(t);
> +		__entry->mm_cid = t->rseq_ids.mm_cid;
>   	),
>   
>   	TP_printk("cpu_id=%d node_id=%d mm_cid=%d", __entry->cpu_id,
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -184,6 +184,10 @@ static int rseq_update_cpu_node_id(struc
>   	rseq_unsafe_put_user(t, node_id, node_id, efault_end);
>   	rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
>   
> +	/* Cache the user space values */
> +	t->rseq_ids.cpu_id = cpu_id;
> +	t->rseq_ids.mm_cid = mm_cid;
> +
>   	/*
>   	 * Additional feature fields added after ORIG_RSEQ_SIZE
>   	 * need to be conditionally updated only if
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Re: [patch V2 14/37] rseq: Cache CPU ID and MM CID values
Posted by Thomas Gleixner 1 month ago
On Mon, Aug 25 2025 at 14:19, Mathieu Desnoyers wrote:
> On 2025-08-23 12:39, Thomas Gleixner wrote:
>> In preparation for rewriting RSEQ exit to user space handling provide
>> storage to cache the CPU ID and MM CID values which were written to user
>> space. That prepares for a quick check, which avoids the update when
>> nothing changed.
>
> What should we do about the numa node_id field ?
>
> On pretty much all arch except powerpc (AFAIK) it's invariant for
> the topology, so derived from cpu_id.
>
> On powerpc, we could perhaps reset the cached cpu_id to ~0U for
> each thread to trigger an update ? Or just don't care about this ?

It's invariant on powerPC as well after the CPU was [hot]added to the
kernel.

Otherwise any usage of cpu_to_node() would be broken on powerPC, no?

Thanks,

        tglx
Re: [patch V2 14/37] rseq: Cache CPU ID and MM CID values
Posted by Mathieu Desnoyers 4 weeks, 1 day ago
On 2025-09-02 09:48, Thomas Gleixner wrote:
> On Mon, Aug 25 2025 at 14:19, Mathieu Desnoyers wrote:
>> On 2025-08-23 12:39, Thomas Gleixner wrote:
>>> In preparation for rewriting RSEQ exit to user space handling provide
>>> storage to cache the CPU ID and MM CID values which were written to user
>>> space. That prepares for a quick check, which avoids the update when
>>> nothing changed.
>>
>> What should we do about the numa node_id field ?
>>
>> On pretty much all arch except powerpc (AFAIK) it's invariant for
>> the topology, so derived from cpu_id.
>>
>> On powerpc, we could perhaps reset the cached cpu_id to ~0U for
>> each thread to trigger an update ? Or just don't care about this ?
> 
> It's invariant on powerPC as well after the CPU was [hot]added to the
> kernel.
> 
> Otherwise any usage of cpu_to_node() would be broken on powerPC, no?

Agreed. I've added powerpc maintainers on the other leg of this thread
to validate my understanding.

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com