[PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value

Xin Li (Intel) posted 3 patches 3 months, 4 weeks ago
[PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Xin Li (Intel) 3 months, 4 weeks ago
Initialize DR7 by writing its architectural reset value to ensure
compliance with the specification.

Since clear_all_debug_regs() no longer zeros all debug registers,
rename it to initialize_debug_regs() to better reflect its current
behavior.

While at it, replace the hardcoded debug register number 7 with the
existing DR_CONTROL macro for clarity.

Reviewed-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
 arch/x86/include/asm/debugreg.h | 12 ++++++------
 arch/x86/kernel/cpu/common.c    | 17 +++++++----------
 arch/x86/kernel/hw_breakpoint.c |  6 +++---
 arch/x86/kernel/kgdb.c          |  4 ++--
 arch/x86/kernel/process_32.c    |  4 ++--
 arch/x86/kernel/process_64.c    |  4 ++--
 arch/x86/kvm/vmx/nested.c       |  2 +-
 arch/x86/kvm/x86.c              |  4 ++--
 8 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 363110e6b2e3..bfa8cfcb9732 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -100,8 +100,8 @@ static __always_inline void native_set_debugreg(int regno, unsigned long value)
 
 static inline void hw_breakpoint_disable(void)
 {
-	/* Zero the control register for HW Breakpoint */
-	set_debugreg(0UL, 7);
+	/* Reset the control register for HW Breakpoint */
+	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
 
 	/* Zero-out the individual HW breakpoint address registers */
 	set_debugreg(0UL, 0);
@@ -124,10 +124,10 @@ static __always_inline unsigned long local_db_save(void)
 	if (static_cpu_has(X86_FEATURE_HYPERVISOR) && !hw_breakpoint_active())
 		return 0;
 
-	get_debugreg(dr7, 7);
-	dr7 &= ~0x400; /* architecturally set bit */
+	get_debugreg(dr7, DR_CONTROL);
+	dr7 &= ~DR7_RESET_VALUE; /* architecturally set bit */
 	if (dr7)
-		set_debugreg(0, 7);
+		set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
 	/*
 	 * Ensure the compiler doesn't lower the above statements into
 	 * the critical section; disabling breakpoints late would not
@@ -147,7 +147,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
 	 */
 	barrier();
 	if (dr7)
-		set_debugreg(dr7, 7);
+		set_debugreg(dr7, DR_CONTROL);
 }
 
 #ifdef CONFIG_CPU_SUP_AMD
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8feb8fd2957a..628aa43acb41 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2243,20 +2243,17 @@ EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
 #endif
 #endif
 
-/*
- * Clear all 6 debug registers:
- */
-static void clear_all_debug_regs(void)
+static void initialize_debug_regs(void)
 {
 	int i;
 
-	for (i = 0; i < 8; i++) {
-		/* Ignore db4, db5 */
-		if ((i == 4) || (i == 5))
-			continue;
+	/* Control register first */
+	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
+	set_debugreg(0, DR_STATUS);
 
+	/* Ignore db4, db5 */
+	for (i = DR_FIRSTADDR; i <= DR_LASTADDR; i++)
 		set_debugreg(0, i);
-	}
 }
 
 #ifdef CONFIG_KGDB
@@ -2417,7 +2414,7 @@ void cpu_init(void)
 
 	load_mm_ldt(&init_mm);
 
-	clear_all_debug_regs();
+	initialize_debug_regs();
 	dbg_restore_debug_regs();
 
 	doublefault_init_cpu_tss();
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index b01644c949b2..29f4473817a1 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -125,7 +125,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 	 */
 	barrier();
 
-	set_debugreg(*dr7, 7);
+	set_debugreg(*dr7, DR_CONTROL);
 	if (info->mask)
 		amd_set_dr_addr_mask(info->mask, i);
 
@@ -164,7 +164,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 	dr7 = this_cpu_read(cpu_dr7);
 	dr7 &= ~__encode_dr7(i, info->len, info->type);
 
-	set_debugreg(dr7, 7);
+	set_debugreg(dr7, DR_CONTROL);
 	if (info->mask)
 		amd_set_dr_addr_mask(0, i);
 
@@ -487,7 +487,7 @@ void hw_breakpoint_restore(void)
 	set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
 	set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
 	set_debugreg(DR6_RESERVED, 6);
-	set_debugreg(__this_cpu_read(cpu_dr7), 7);
+	set_debugreg(__this_cpu_read(cpu_dr7), DR_CONTROL);
 }
 EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
 
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 102641fd2172..1b7a63f18c6d 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -202,7 +202,7 @@ static void kgdb_correct_hw_break(void)
 			early_dr7 |= encode_dr7(breakno,
 						breakinfo[breakno].len,
 						breakinfo[breakno].type);
-			set_debugreg(early_dr7, 7);
+			set_debugreg(early_dr7, DR_CONTROL);
 			continue;
 		}
 		bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu);
@@ -385,7 +385,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs)
 	struct perf_event *bp;
 
 	/* Disable hardware debugging while we are in kgdb: */
-	set_debugreg(0UL, 7);
+	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
 	for (i = 0; i < HBP_NUM; i++) {
 		if (!breakinfo[i].enabled)
 			continue;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a10e180cbf23..f5f28a8fa44c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -89,11 +89,11 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
 	get_debugreg(d2, 2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
-	get_debugreg(d7, 7);
+	get_debugreg(d7, DR_CONTROL);
 
 	/* Only print out debug registers if they are in their non-default state. */
 	if ((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
-	    (d6 == DR6_RESERVED) && (d7 == 0x400))
+	    (d6 == DR6_RESERVED) && (d7 == DR7_RESET_VALUE))
 		return;
 
 	printk("%sDR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8d6cf25127aa..1eb1ac948878 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -129,11 +129,11 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
 	get_debugreg(d2, 2);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
-	get_debugreg(d7, 7);
+	get_debugreg(d7, DR_CONTROL);
 
 	/* Only print out debug registers if they are in their non-default state. */
 	if (!((d0 == 0) && (d1 == 0) && (d2 == 0) && (d3 == 0) &&
-	    (d6 == DR6_RESERVED) && (d7 == 0x400))) {
+	    (d6 == DR6_RESERVED) && (d7 == DR7_RESET_VALUE))) {
 		printk("%sDR0: %016lx DR1: %016lx DR2: %016lx\n",
 		       log_lvl, d0, d1, d2);
 		printk("%sDR3: %016lx DR6: %016lx DR7: %016lx\n",
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7211c71d4241..6bad44db2168 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3270,7 +3270,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
 	 * VMExit clears RFLAGS.IF and DR7, even on a consistency check.
 	 */
 	if (hw_breakpoint_active())
-		set_debugreg(__this_cpu_read(cpu_dr7), 7);
+		set_debugreg(__this_cpu_read(cpu_dr7), DR_CONTROL);
 	local_irq_enable();
 	preempt_enable();
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b58a74c1722d..20fa9733aed1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11035,7 +11035,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	if (unlikely(vcpu->arch.switch_db_regs &&
 		     !(vcpu->arch.switch_db_regs & KVM_DEBUGREG_AUTO_SWITCH))) {
-		set_debugreg(0, 7);
+		set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
 		set_debugreg(vcpu->arch.eff_db[0], 0);
 		set_debugreg(vcpu->arch.eff_db[1], 1);
 		set_debugreg(vcpu->arch.eff_db[2], 2);
@@ -11044,7 +11044,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))
 			kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6);
 	} else if (unlikely(hw_breakpoint_active())) {
-		set_debugreg(0, 7);
+		set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
 	}
 
 	vcpu->arch.host_debugctl = get_debugctlmsr();
-- 
2.49.0
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Peter Zijlstra 3 months, 4 weeks ago
On Fri, Jun 13, 2025 at 12:01:16AM -0700, Xin Li (Intel) wrote:

> While at it, replace the hardcoded debug register number 7 with the
> existing DR_CONTROL macro for clarity.

Yeah, not really a fan of that... IMO that obfuscates the code more than
it helps, consider:

> -	get_debugreg(dr7, 7);
> +	get_debugreg(dr7, DR_CONTROL);

and:

> -	for (i = 0; i < 8; i++) {
> -		/* Ignore db4, db5 */
> -		if ((i == 4) || (i == 5))
> -			continue;
> +	/* Control register first */
> +	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
> +	set_debugreg(0, DR_STATUS);
>  
> +	/* Ignore db4, db5 */
> +	for (i = DR_FIRSTADDR; i <= DR_LASTADDR; i++)

I had to git-grep DR_{FIRST,LAST}ADDR to double check this was correct :(

Also, you now write them in the order:

  dr7, dr6, /* dr4, dr5 */, dr0, dr1, dr2, dr3

My OCD disagrees with this :-)
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Sean Christopherson 3 months, 4 weeks ago
On Fri, Jun 13, 2025, Peter Zijlstra wrote:
> On Fri, Jun 13, 2025 at 12:01:16AM -0700, Xin Li (Intel) wrote:
> 
> > While at it, replace the hardcoded debug register number 7 with the
> > existing DR_CONTROL macro for clarity.
> 
> Yeah, not really a fan of that... IMO that obfuscates the code more than
> it helps, consider:

+1, and NAK to the KVM changes.  Pretty much everything in KVM deals with the
"raw" names.  The use of dr6 and dr7 is pervasive throughout the VMX and SVM
architectures:

 vmcs.GUEST_DR7
 vmcb.save.dr6
 vmcb.save.dr7

And is cemented in KVM's uAPI:

 kvm_debug_exit_arch.dr6
 kvm_debug_exit_arch.dr7
 kvm_debugregs.dr6
 kvm_debugregs.dr7

Using DR_STATUS and DR_CONTROL is not an improvement when everything else is using
'6' and '7'.  E.g. I skipped the changelog and was very confused by the '6' =>
DR_STATUS change in the next patch.

And don't even think about renaming the prefixes on these :-)

#define DR6_BUS_LOCK   (1 << 11)
#define DR6_BD		(1 << 13)
#define DR6_BS		(1 << 14)
#define DR6_BT		(1 << 15)
#define DR6_RTM		(1 << 16)
/*
 * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
 * We can regard all the bits in DR6_FIXED_1 as active_low bits;
 * they will never be 0 for now, but when they are defined
 * in the future it will require no code change.
 *
 * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
 */
#define DR6_ACTIVE_LOW	0xffff0ff0
#define DR6_VOLATILE	0x0001e80f
#define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)

#define DR7_BP_EN_MASK	0x000000ff
#define DR7_GE		(1 << 9)
#define DR7_GD		(1 << 13)
#define DR7_FIXED_1	0x00000400
#define DR7_VOLATILE	0xffff2bff
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Xin Li 3 months, 4 weeks ago
On 6/13/2025 7:10 AM, Sean Christopherson wrote:
> On Fri, Jun 13, 2025, Peter Zijlstra wrote:
>> On Fri, Jun 13, 2025 at 12:01:16AM -0700, Xin Li (Intel) wrote:
>>
>>> While at it, replace the hardcoded debug register number 7 with the
>>> existing DR_CONTROL macro for clarity.
>>
>> Yeah, not really a fan of that... IMO that obfuscates the code more than
>> it helps, consider:
> 
> +1, and NAK to the KVM changes.

I guess I was too aggressive to make overkill changes in a bug-fixing
patch, which will be back-ported.

I will revise the patch set to focus on bug fixing first.

> Pretty much everything in KVM deals with the
> "raw" names.  The use of dr6 and dr7 is pervasive throughout the VMX and SVM
> architectures:
> 
>   vmcs.GUEST_DR7
>   vmcb.save.dr6
>   vmcb.save.dr7
> 
> And is cemented in KVM's uAPI:
> 
>   kvm_debug_exit_arch.dr6
>   kvm_debug_exit_arch.dr7
>   kvm_debugregs.dr6
>   kvm_debugregs.dr7
> 
> Using DR_STATUS and DR_CONTROL is not an improvement when everything else is using
> '6' and '7'.  E.g. I skipped the changelog and was very confused by the '6' =>
> DR_STATUS change in the next patch.
> 
> And don't even think about renaming the prefixes on these :-)

I did think about changing DR6_ to DR_STATUS_ and DR7_ to DR_CONTROL_ ;)

However it seems that DR7_ and DR6_ are de facto prefixes in the kernel
code, and everyone appears to recognize their intended use.  It's better
for me to leave them as-is.

> 
> #define DR6_BUS_LOCK   (1 << 11)
> #define DR6_BD		(1 << 13)
> #define DR6_BS		(1 << 14)
> #define DR6_BT		(1 << 15)
> #define DR6_RTM		(1 << 16)
> /*
>   * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
>   * We can regard all the bits in DR6_FIXED_1 as active_low bits;
>   * they will never be 0 for now, but when they are defined
>   * in the future it will require no code change.
>   *
>   * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
>   */
> #define DR6_ACTIVE_LOW	0xffff0ff0
> #define DR6_VOLATILE	0x0001e80f
> #define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
> 
> #define DR7_BP_EN_MASK	0x000000ff
> #define DR7_GE		(1 << 9)
> #define DR7_GD		(1 << 13)
> #define DR7_FIXED_1	0x00000400
> #define DR7_VOLATILE	0xffff2bff
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Xin Li 3 months, 4 weeks ago
On 6/13/2025 12:15 AM, Peter Zijlstra wrote:
> On Fri, Jun 13, 2025 at 12:01:16AM -0700, Xin Li (Intel) wrote:
> 
>> While at it, replace the hardcoded debug register number 7 with the
>> existing DR_CONTROL macro for clarity.
> 
> Yeah, not really a fan of that... IMO that obfuscates the code more than
> it helps, consider:
> 
>> -	get_debugreg(dr7, 7);
>> +	get_debugreg(dr7, DR_CONTROL);

Actually I kind of agree with you that it may not help, because I had
thought to rename DR7_RESET_VALUE to DR_CONTROL_RESET_VALUE.

Yes, we should remember DR7 is the control register, however I also hate
to decode it when looking at the code.


> 
> and:
> 
>> -	for (i = 0; i < 8; i++) {
>> -		/* Ignore db4, db5 */
>> -		if ((i == 4) || (i == 5))
>> -			continue;
>> +	/* Control register first */
>> +	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
>> +	set_debugreg(0, DR_STATUS);
>>   
>> +	/* Ignore db4, db5 */
>> +	for (i = DR_FIRSTADDR; i <= DR_LASTADDR; i++)
> 
> I had to git-grep DR_{FIRST,LAST}ADDR to double check this was correct :(
> 
> Also, you now write them in the order:
> 
>    dr7, dr6, /* dr4, dr5 */, dr0, dr1, dr2, dr3
> 
> My OCD disagrees with this :-)
> 

The order of the other debug registers doesn't seem critical, however
the control debug register should be the first, right?

Here I prefer to use "control register" rather than "dr7" here :)

Thanks!
     Xin
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by H. Peter Anvin 3 months, 4 weeks ago
On June 13, 2025 12:51:31 AM PDT, Xin Li <xin@zytor.com> wrote:
>On 6/13/2025 12:15 AM, Peter Zijlstra wrote:
>> On Fri, Jun 13, 2025 at 12:01:16AM -0700, Xin Li (Intel) wrote:
>> 
>>> While at it, replace the hardcoded debug register number 7 with the
>>> existing DR_CONTROL macro for clarity.
>> 
>> Yeah, not really a fan of that... IMO that obfuscates the code more than
>> it helps, consider:
>> 
>>> -	get_debugreg(dr7, 7);
>>> +	get_debugreg(dr7, DR_CONTROL);
>
>Actually I kind of agree with you that it may not help, because I had
>thought to rename DR7_RESET_VALUE to DR_CONTROL_RESET_VALUE.
>
>Yes, we should remember DR7 is the control register, however I also hate
>to decode it when looking at the code.
>
>
>> 
>> and:
>> 
>>> -	for (i = 0; i < 8; i++) {
>>> -		/* Ignore db4, db5 */
>>> -		if ((i == 4) || (i == 5))
>>> -			continue;
>>> +	/* Control register first */
>>> +	set_debugreg(DR7_RESET_VALUE, DR_CONTROL);
>>> +	set_debugreg(0, DR_STATUS);
>>>   +	/* Ignore db4, db5 */
>>> +	for (i = DR_FIRSTADDR; i <= DR_LASTADDR; i++)
>> 
>> I had to git-grep DR_{FIRST,LAST}ADDR to double check this was correct :(
>> 
>> Also, you now write them in the order:
>> 
>>    dr7, dr6, /* dr4, dr5 */, dr0, dr1, dr2, dr3
>> 
>> My OCD disagrees with this :-)
>> 
>
>The order of the other debug registers doesn't seem critical, however
>the control debug register should be the first, right?
>
>Here I prefer to use "control register" rather than "dr7" here :)
>
>Thanks!
>    Xin

That's the real issue here, 7 is the control register and 6 is the status register; 4-5 and 8-15 don't even exist. 

But we want to reset the control register first.

Incidentally, do you know the following x86 register sequences in the proper order?

ax, cx, dx, bx, sp, bp, si, di, ...
al, cl, dl, bl, ah, ch, dh, bh
es, cs, ss, ds, fs, gs
Re: [PATCH v1 2/3] x86/traps: Initialize DR7 by writing its architectural reset value
Posted by Peter Zijlstra 3 months, 4 weeks ago
On Fri, Jun 13, 2025 at 12:59:04AM -0700, H. Peter Anvin wrote:

> Incidentally, do you know the following x86 register sequences in the proper order?
> 
> ax, cx, dx, bx, sp, bp, si, di, ...

Yes, I get annoyed at that every time I decode regs :-)

> al, cl, dl, bl, ah, ch, dh, bh
> es, cs, ss, ds, fs, gs

These I don't know from the top of my head -- I've not had to use them enough.