struct pt_regs is hard to read because the member or section related
comments are not aligned with the members.
The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
in reality they are only 16-bit wide. This works so far as the
remaining space is unused, but FRED will use the remaining bits for
other purposes.
To prepare for FRED:
- Cleanup the formatting
- Convert 'cs' and 'ss' to u16 and embed them into an union
with a u64
- Fixup the related printk() format strings
Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
Change since v12:
* Put comments ontop, not on the side (Borislav Petkov).
---
arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
arch/x86/include/asm/ptrace.h | 48 +++++++++++++++++++--------
arch/x86/kernel/process_64.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca8120aea8..a3c0df11d0e6 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals)
return;
- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b09c8f..b268cd2a2d01 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -57,17 +57,19 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +79,38 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* The full 64-bit data slot containing CS */
+ u64 csx;
+ /* CS selector */
+ u16 cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* The full 64-bit data slot containing SS */
+ u64 ssx;
+ /* SS selector */
+ u16 ss;
+ };
+
+ /*
+ * Top of stack on IDT systems.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1553e19904e0..b924477c5ba8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
log_lvl, fs, fsindex, gs, gsindex, shadowgs);
- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
log_lvl, regs->cs, ds, es, cr0);
printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
log_lvl, cr2, cr3, cr4);
--
2.43.0
The following commit has been merged into the x86/fred branch of tip:
Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
Author: Xin Li <xin3.li@intel.com>
AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
Committer: Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
x86/ptrace: Cleanup the definition of the pt_regs structure
struct pt_regs is hard to read because the member or section related
comments are not aligned with the members.
The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
in reality they are only 16-bit wide. This works so far as the
remaining space is unused, but FRED will use the remaining bits for
other purposes.
To prepare for FRED:
- Cleanup the formatting
- Convert 'cs' and 'ss' to u16 and embed them into an union
with a u64
- Fixup the related printk() format strings
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Shan Kang <shan.kang@intel.com>
Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
---
arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
arch/x86/include/asm/ptrace.h | 48 ++++++++++++++++++--------
arch/x86/kernel/process_64.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca812..a3c0df1 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals)
return;
- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b..b268cd2 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -57,17 +57,19 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +79,38 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* The full 64-bit data slot containing CS */
+ u64 csx;
+ /* CS selector */
+ u16 cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* The full 64-bit data slot containing SS */
+ u64 ssx;
+ /* SS selector */
+ u16 ss;
+ };
+
+ /*
+ * Top of stack on IDT systems.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 33b2687..0f78b58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
log_lvl, fs, fsindex, gs, gsindex, shadowgs);
- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
log_lvl, regs->cs, ds, es, cr0);
printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
log_lvl, cr2, cr3, cr4);
On January 31, 2024 1:14:52 PM PST, tip-bot2 for Xin Li <tip-bot2@linutronix.de> wrote:
>The following commit has been merged into the x86/fred branch of tip:
>
>Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
>Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
>Author: Xin Li <xin3.li@intel.com>
>AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
>Committer: Borislav Petkov (AMD) <bp@alien8.de>
>CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
>
>x86/ptrace: Cleanup the definition of the pt_regs structure
>
>struct pt_regs is hard to read because the member or section related
>comments are not aligned with the members.
>
>The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
>in reality they are only 16-bit wide. This works so far as the
>remaining space is unused, but FRED will use the remaining bits for
>other purposes.
>
>To prepare for FRED:
>
> - Cleanup the formatting
> - Convert 'cs' and 'ss' to u16 and embed them into an union
> with a u64
> - Fixup the related printk() format strings
>
>Suggested-by: Thomas Gleixner <tglx@linutronix.de>
>Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>Signed-off-by: Xin Li <xin3.li@intel.com>
>Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
>Tested-by: Shan Kang <shan.kang@intel.com>
>Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
>---
> arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
> arch/x86/include/asm/ptrace.h | 48 ++++++++++++++++++--------
> arch/x86/kernel/process_64.c | 2 +-
> 3 files changed, 37 insertions(+), 15 deletions(-)
>
>diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
>index e0ca812..a3c0df1 100644
>--- a/arch/x86/entry/vsyscall/vsyscall_64.c
>+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
>@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
> if (!show_unhandled_signals)
> return;
>
>- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
>+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
> level, current->comm, task_pid_nr(current),
> message, regs->ip, regs->cs,
> regs->sp, regs->ax, regs->si, regs->di);
>diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
>index f4db78b..b268cd2 100644
>--- a/arch/x86/include/asm/ptrace.h
>+++ b/arch/x86/include/asm/ptrace.h
>@@ -57,17 +57,19 @@ struct pt_regs {
> #else /* __i386__ */
>
> struct pt_regs {
>-/*
>- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
>- * unless syscall needs a complete, fully filled "struct pt_regs".
>- */
>+ /*
>+ * C ABI says these regs are callee-preserved. They aren't saved on
>+ * kernel entry unless syscall needs a complete, fully filled
>+ * "struct pt_regs".
>+ */
> unsigned long r15;
> unsigned long r14;
> unsigned long r13;
> unsigned long r12;
> unsigned long bp;
> unsigned long bx;
>-/* These regs are callee-clobbered. Always saved on kernel entry. */
>+
>+ /* These regs are callee-clobbered. Always saved on kernel entry. */
> unsigned long r11;
> unsigned long r10;
> unsigned long r9;
>@@ -77,18 +79,38 @@ struct pt_regs {
> unsigned long dx;
> unsigned long si;
> unsigned long di;
>-/*
>- * On syscall entry, this is syscall#. On CPU exception, this is error code.
>- * On hw interrupt, it's IRQ number:
>- */
>+
>+ /*
>+ * orig_ax is used on entry for:
>+ * - the syscall number (syscall, sysenter, int80)
>+ * - error_code stored by the CPU on traps and exceptions
>+ * - the interrupt number for device interrupts
>+ */
> unsigned long orig_ax;
>-/* Return frame for iretq */
>+
>+ /* The IRETQ return frame starts here */
> unsigned long ip;
>- unsigned long cs;
>+
>+ union {
>+ /* The full 64-bit data slot containing CS */
>+ u64 csx;
>+ /* CS selector */
>+ u16 cs;
>+ };
>+
> unsigned long flags;
> unsigned long sp;
>- unsigned long ss;
>-/* top of stack page */
>+
>+ union {
>+ /* The full 64-bit data slot containing SS */
>+ u64 ssx;
>+ /* SS selector */
>+ u16 ss;
>+ };
>+
>+ /*
>+ * Top of stack on IDT systems.
>+ */
> };
>
> #endif /* !__i386__ */
>diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>index 33b2687..0f78b58 100644
>--- a/arch/x86/kernel/process_64.c
>+++ b/arch/x86/kernel/process_64.c
>@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
>
> printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
> log_lvl, fs, fsindex, gs, gsindex, shadowgs);
>- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
>+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
> log_lvl, regs->cs, ds, es, cr0);
> printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
> log_lvl, cr2, cr3, cr4);
Incidentally, the comment about callee-saved registers is long since both obsolete and is now outright wrong.
The next version of gcc (14 I think) will have an attribute to turn off saving registers which we can use for top-level C functions.
On 2/3/2024 3:52 PM, H. Peter Anvin wrote:
> On January 31, 2024 1:14:52 PM PST, tip-bot2 for Xin Li <tip-bot2@linutronix.de> wrote:
>> The following commit has been merged into the x86/fred branch of tip:
>>
>> Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
>> Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
>> Author: Xin Li <xin3.li@intel.com>
>> AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
>> Committer: Borislav Petkov (AMD) <bp@alien8.de>
>> CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
>>
>> x86/ptrace: Cleanup the definition of the pt_regs structure
>>
>> struct pt_regs is hard to read because the member or section related
>> comments are not aligned with the members.
>>
>> The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
>> in reality they are only 16-bit wide. This works so far as the
>> remaining space is unused, but FRED will use the remaining bits for
>> other purposes.
>>
>> To prepare for FRED:
>>
>> - Cleanup the formatting
>> - Convert 'cs' and 'ss' to u16 and embed them into an union
>> with a u64
>> - Fixup the related printk() format strings
>>
>> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
>> Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>> Signed-off-by: Xin Li <xin3.li@intel.com>
>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
>> Tested-by: Shan Kang <shan.kang@intel.com>
>> Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
[...]
>> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>> index 33b2687..0f78b58 100644
>> --- a/arch/x86/kernel/process_64.c
>> +++ b/arch/x86/kernel/process_64.c
>> @@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
>>
>> printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
>> log_lvl, fs, fsindex, gs, gsindex, shadowgs);
>> - printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
>> + printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
>> log_lvl, regs->cs, ds, es, cr0);
>> printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
>> log_lvl, cr2, cr3, cr4);
>
> Incidentally, the comment about callee-saved registers is long since both obsolete and is now outright wrong.
>
> The next version of gcc (14 I think) will have an attribute to turn off saving registers which we can use for top-level C functions.
>
Forgive my ignorance, do we have an official definition for "top-level C
functions"?
Thanks!
Xin
On February 6, 2024 11:04:13 AM PST, Xin Li <xin@zytor.com> wrote:
>On 2/3/2024 3:52 PM, H. Peter Anvin wrote:
>> On January 31, 2024 1:14:52 PM PST, tip-bot2 for Xin Li <tip-bot2@linutronix.de> wrote:
>>> The following commit has been merged into the x86/fred branch of tip:
>>>
>>> Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
>>> Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
>>> Author: Xin Li <xin3.li@intel.com>
>>> AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
>>> Committer: Borislav Petkov (AMD) <bp@alien8.de>
>>> CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
>>>
>>> x86/ptrace: Cleanup the definition of the pt_regs structure
>>>
>>> struct pt_regs is hard to read because the member or section related
>>> comments are not aligned with the members.
>>>
>>> The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
>>> in reality they are only 16-bit wide. This works so far as the
>>> remaining space is unused, but FRED will use the remaining bits for
>>> other purposes.
>>>
>>> To prepare for FRED:
>>>
>>> - Cleanup the formatting
>>> - Convert 'cs' and 'ss' to u16 and embed them into an union
>>> with a u64
>>> - Fixup the related printk() format strings
>>>
>>> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
>>> Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
>>> Signed-off-by: Xin Li <xin3.li@intel.com>
>>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
>>> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
>>> Tested-by: Shan Kang <shan.kang@intel.com>
>>> Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
>
>[...]
>
>>> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>>> index 33b2687..0f78b58 100644
>>> --- a/arch/x86/kernel/process_64.c
>>> +++ b/arch/x86/kernel/process_64.c
>>> @@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
>>>
>>> printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
>>> log_lvl, fs, fsindex, gs, gsindex, shadowgs);
>>> - printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
>>> + printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
>>> log_lvl, regs->cs, ds, es, cr0);
>>> printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
>>> log_lvl, cr2, cr3, cr4);
>>
>> Incidentally, the comment about callee-saved registers is long since both obsolete and is now outright wrong.
>>
>> The next version of gcc (14 I think) will have an attribute to turn off saving registers which we can use for top-level C functions.
>>
>
>Forgive my ignorance, do we have an official definition for "top-level C functions"?
>
>Thanks!
> Xin
>
(Adding H.J., who did the gcc implementation of __attribute__((no_callee_saved_registers))).
The top level C functions are the ones whose stack frame are immediately below the exception/syscall frame, i.e. the C function called from the entry assembly code and functions tailcalled from those (unless they set up a stack frame for things like memory structures passed to the called function.)
Note that the implementation should properly handle the case when calling these functions from C (accidentally, or because it is a rare case that can be validly pessimized.)
On Tue, Feb 6, 2024 at 12:45 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> On February 6, 2024 11:04:13 AM PST, Xin Li <xin@zytor.com> wrote:
> >On 2/3/2024 3:52 PM, H. Peter Anvin wrote:
> >> On January 31, 2024 1:14:52 PM PST, tip-bot2 for Xin Li <tip-bot2@linutronix.de> wrote:
> >>> The following commit has been merged into the x86/fred branch of tip:
> >>>
> >>> Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
> >>> Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
> >>> Author: Xin Li <xin3.li@intel.com>
> >>> AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
> >>> Committer: Borislav Petkov (AMD) <bp@alien8.de>
> >>> CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
> >>>
> >>> x86/ptrace: Cleanup the definition of the pt_regs structure
> >>>
> >>> struct pt_regs is hard to read because the member or section related
> >>> comments are not aligned with the members.
> >>>
> >>> The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
> >>> in reality they are only 16-bit wide. This works so far as the
> >>> remaining space is unused, but FRED will use the remaining bits for
> >>> other purposes.
> >>>
> >>> To prepare for FRED:
> >>>
> >>> - Cleanup the formatting
> >>> - Convert 'cs' and 'ss' to u16 and embed them into an union
> >>> with a u64
> >>> - Fixup the related printk() format strings
> >>>
> >>> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
> >>> Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
> >>> Signed-off-by: Xin Li <xin3.li@intel.com>
> >>> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> >>> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> >>> Tested-by: Shan Kang <shan.kang@intel.com>
> >>> Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
> >
> >[...]
> >
> >>> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> >>> index 33b2687..0f78b58 100644
> >>> --- a/arch/x86/kernel/process_64.c
> >>> +++ b/arch/x86/kernel/process_64.c
> >>> @@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
> >>>
> >>> printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
> >>> log_lvl, fs, fsindex, gs, gsindex, shadowgs);
> >>> - printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
> >>> + printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
> >>> log_lvl, regs->cs, ds, es, cr0);
> >>> printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
> >>> log_lvl, cr2, cr3, cr4);
> >>
> >> Incidentally, the comment about callee-saved registers is long since both obsolete and is now outright wrong.
> >>
> >> The next version of gcc (14 I think) will have an attribute to turn off saving registers which we can use for top-level C functions.
__attribute__((no_callee_saved_registers))) has been added to GCC 14.
> >
> >Forgive my ignorance, do we have an official definition for "top-level C functions"?
> >
> >Thanks!
> > Xin
> >
>
> (Adding H.J., who did the gcc implementation of __attribute__((no_callee_saved_registers))).
>
> The top level C functions are the ones whose stack frame are immediately below the exception/syscall frame, i.e. the C function called from the entry assembly code and functions tailcalled from those (unless they set up a stack frame for things like memory structures passed to the called function.)
>
> Note that the implementation should properly handle the case when calling these functions from C (accidentally, or because it is a rare case that can be validly pessimized.)
GCC 14 should handle it properly. If not, please open a GCC bug.
--
H.J.
The following commit has been merged into the x86/fred branch of tip:
Commit-ID: ed262541af195f452c43cd4f28310a09065039ec
Gitweb: https://git.kernel.org/tip/ed262541af195f452c43cd4f28310a09065039ec
Author: Xin Li <xin3.li@intel.com>
AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
Committer: Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Tue, 30 Jan 2024 18:20:34 +01:00
x86/ptrace: Cleanup the definition of the pt_regs structure
struct pt_regs is hard to read because the member or section related
comments are not aligned with the members.
The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
in reality they are only 16-bit wide. This works so far as the
remaining space is unused, but FRED will use the remaining bits for
other purposes.
To prepare for FRED:
- Cleanup the formatting
- Convert 'cs' and 'ss' to u16 and embed them into an union
with a u64
- Fixup the related printk() format strings
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Shan Kang <shan.kang@intel.com>
Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
---
arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
arch/x86/include/asm/ptrace.h | 48 ++++++++++++++++++--------
arch/x86/kernel/process_64.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca812..a3c0df1 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals)
return;
- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b..b268cd2 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -57,17 +57,19 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +79,38 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* The full 64-bit data slot containing CS */
+ u64 csx;
+ /* CS selector */
+ u16 cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* The full 64-bit data slot containing SS */
+ u64 ssx;
+ /* SS selector */
+ u16 ss;
+ };
+
+ /*
+ * Top of stack on IDT systems.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 33b2687..0f78b58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
log_lvl, fs, fsindex, gs, gsindex, shadowgs);
- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
log_lvl, regs->cs, ds, es, cr0);
printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
log_lvl, cr2, cr3, cr4);
The following commit has been merged into the x86/fred branch of tip:
Commit-ID: a75d2eeda289ae87896013df488081c62a50bff6
Gitweb: https://git.kernel.org/tip/a75d2eeda289ae87896013df488081c62a50bff6
Author: Xin Li <xin3.li@intel.com>
AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 25 Jan 2024 19:10:31 +01:00
x86/ptrace: Cleanup the definition of the pt_regs structure
struct pt_regs is hard to read because the member or section related
comments are not aligned with the members.
The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
in reality they are only 16-bit wide. This works so far as the
remaining space is unused, but FRED will use the remaining bits for
other purposes.
To prepare for FRED:
- Cleanup the formatting
- Convert 'cs' and 'ss' to u16 and embed them into an union
with a u64
- Fixup the related printk() format strings
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Originally-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Shan Kang <shan.kang@intel.com>
Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@intel.com
---
arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
arch/x86/include/asm/ptrace.h | 48 ++++++++++++++++++--------
arch/x86/kernel/process_64.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e0ca812..a3c0df1 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
if (!show_unhandled_signals)
return;
- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
level, current->comm, task_pid_nr(current),
message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index f4db78b..b268cd2 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -57,17 +57,19 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
-/*
- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
- * unless syscall needs a complete, fully filled "struct pt_regs".
- */
+ /*
+ * C ABI says these regs are callee-preserved. They aren't saved on
+ * kernel entry unless syscall needs a complete, fully filled
+ * "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* These regs are callee-clobbered. Always saved on kernel entry. */
+
+ /* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -77,18 +79,38 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
-/*
- * On syscall entry, this is syscall#. On CPU exception, this is error code.
- * On hw interrupt, it's IRQ number:
- */
+
+ /*
+ * orig_ax is used on entry for:
+ * - the syscall number (syscall, sysenter, int80)
+ * - error_code stored by the CPU on traps and exceptions
+ * - the interrupt number for device interrupts
+ */
unsigned long orig_ax;
-/* Return frame for iretq */
+
+ /* The IRETQ return frame starts here */
unsigned long ip;
- unsigned long cs;
+
+ union {
+ /* The full 64-bit data slot containing CS */
+ u64 csx;
+ /* CS selector */
+ u16 cs;
+ };
+
unsigned long flags;
unsigned long sp;
- unsigned long ss;
-/* top of stack page */
+
+ union {
+ /* The full 64-bit data slot containing SS */
+ u64 ssx;
+ /* SS selector */
+ u16 ss;
+ };
+
+ /*
+ * Top of stack on IDT systems.
+ */
};
#endif /* !__i386__ */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 33b2687..0f78b58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
log_lvl, fs, fsindex, gs, gsindex, shadowgs);
- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
log_lvl, regs->cs, ds, es, cr0);
printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
log_lvl, cr2, cr3, cr4);
© 2016 - 2025 Red Hat, Inc.