[PATCH v3 03/12] x86/irq: Remove bitfields in posted interrupt descriptor

Jacob Pan posted 12 patches 1 year, 9 months ago
[PATCH v3 03/12] x86/irq: Remove bitfields in posted interrupt descriptor
Posted by Jacob Pan 1 year, 9 months ago
Mixture of bitfields and types is weird and really not intuitive, remove
bitfields and use typed data exclusively. Bitfields often result in
inferior machine code.

Link: https://lore.kernel.org/all/20240404101735.402feec8@jacob-builder/T/#mf66e34a82a48f4d8e2926b5581eff59a122de53a
Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>

---
v3:
	- Fix a bug where SN bit position was used as the mask, reported by
	  Oliver Sang.
	- Add and use non-atomic helpers to manipulate SN bit
	- Use pi_test_sn() instead of open coding
v2:
	- Replace bitfields, no more mix.
---
 arch/x86/include/asm/posted_intr.h | 21 ++++++++++++---------
 arch/x86/kvm/vmx/posted_intr.c     |  4 ++--
 arch/x86/kvm/vmx/vmx.c             |  2 +-
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
index acf237b2882e..20e31891de15 100644
--- a/arch/x86/include/asm/posted_intr.h
+++ b/arch/x86/include/asm/posted_intr.h
@@ -15,17 +15,9 @@ struct pi_desc {
 	};
 	union {
 		struct {
-				/* bit 256 - Outstanding Notification */
-			u16	on	: 1,
-				/* bit 257 - Suppress Notification */
-				sn	: 1,
-				/* bit 271:258 - Reserved */
-				rsvd_1	: 14;
-				/* bit 279:272 - Notification Vector */
+			u16	notifications; /* Suppress and outstanding bits */
 			u8	nv;
-				/* bit 287:280 - Reserved */
 			u8	rsvd_2;
-				/* bit 319:288 - Notification Destination */
 			u32	ndst;
 		};
 		u64 control;
@@ -88,4 +80,15 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
 	return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
 }
 
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
 #endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index af662312fd07..ec08fa3caf43 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -107,7 +107,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 		 * handle task migration (@cpu != vcpu->cpu).
 		 */
 		new.ndst = dest;
-		new.sn = 0;
+		__pi_clear_sn(&new);
 
 		/*
 		 * Restore the notification vector; in the blocking case, the
@@ -157,7 +157,7 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
 		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
 	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
 
-	WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
+	WARN(pi_test_sn(pi_desc), "PI descriptor SN field set before blocking");
 
 	old.control = READ_ONCE(pi_desc->control);
 	do {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d94bb069bac9..f505745913c8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4843,7 +4843,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	 * or POSTED_INTR_WAKEUP_VECTOR.
 	 */
 	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
-	vmx->pi_desc.sn = 1;
+	__pi_set_sn(&vmx->pi_desc);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
-- 
2.25.1
Re: [PATCH v3 03/12] x86/irq: Remove bitfields in posted interrupt descriptor
Posted by Oliver Sang 1 year, 9 months ago
hi, Jacob Pan,


On Tue, Apr 23, 2024 at 10:41:05AM -0700, Jacob Pan wrote:
> Mixture of bitfields and types is weird and really not intuitive, remove
> bitfields and use typed data exclusively. Bitfields often result in
> inferior machine code.
> 
> Link: https://lore.kernel.org/all/20240404101735.402feec8@jacob-builder/T/#mf66e34a82a48f4d8e2926b5581eff59a122de53a
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
> 
> ---
> v3:
> 	- Fix a bug where SN bit position was used as the mask, reported by
> 	  Oliver Sang.

we tested this verion, and confirmed the issue gone

Tested-by: kernel test robot <oliver.sang@intel.com>


below just for reference.

for previous version, we noticed a
"WARNING:at_arch/x86/kvm/vmx/posted_intr.c:#pi_enable_wakeup_handler[kvm_intel]"
in testcase: kernel-selftests

Call Trace like below:

[  399.225452][ T8098] ------------[ cut here ]------------
[  399.232475][ T8098] PI descriptor SN field set before blocking
[  399.232514][ T8098] WARNING: CPU: 184 PID: 8098 at arch/x86/kvm/vmx/posted_intr.c:160 pi_enable_wakeup_handler+0x421/0x5f0 [kvm_intel]
[  399.254685][ T8098] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 intel_rapl_msr intel_rapl_common btrfs
+blake2b_generic x86_pkg_temp_thermal xor intel_powerclamp zstd_compress coretemp raid6_pq libcrc32c kvm_intel kvm nvme crct10dif_pclmul crc32_pclmul
+nvme_core crc32c_intel t10_pi ghash_clmulni_intel ast sha512_ssse3 rapl intel_cstate dax_hmem crc64_rocksoft_generic drm_shmem_helper mei_me i2c_i801
+crc64_rocksoft mei drm_kms_helper i2c_smbus crc64 i2c_ismt wmi ipmi_ssif acpi_ipmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter joydev
+binfmt_misc loop fuse drm dm_mod ip_tables
[  399.321529][ T8098] CPU: 184 PID: 8098 Comm: xapic_ipi_test Not tainted 6.9.0-rc1-00008-g037ccaed5dc5 #1
[  399.333325][ T8098] RIP: 0010:pi_enable_wakeup_handler+0x421/0x5f0 [kvm_intel]
[  399.342631][ T8098] Code: e8 a4 5a 7b c0 e9 b2 fc ff ff e8 5a 5c 7b c0 fb eb 93 bf f1 00 00 00 e8 dd 9c 29 c0 eb 82 48 c7 c7 e0 c6 ee c0 e8 0f 2f 37 c0
+<0f> 0b e9 e7 fe ff ff 4c 89 f7 e8 00 6a c8 c0 e9 cd fe ff ff 4c 89
[  399.365742][ T8098] RSP: 0018:ffa000003a527780 EFLAGS: 00010086
[  399.373626][ T8098] RAX: 0000000000000000 RBX: ff1100247c23c740 RCX: 0000000000000027
[  399.383668][ T8098] RDX: 0000000000000027 RSI: 0000000000000004 RDI: ff11003fc3030c08
[  399.393759][ T8098] RBP: ffa000003a527908 R08: 0000000000000001 R09: ffe21c07f8606181
[  399.403834][ T8098] R10: ff11003fc3030c0b R11: 0000000000000000 R12: 1ff40000074a4ef4
[  399.413901][ T8098] R13: 0000000000000000 R14: ff1100247c23e4a0 R15: 00000000000000b8
[  399.423969][ T8098] FS:  00007f10de2006c0(0000) GS:ff11003fc3000000(0000) knlGS:0000000000000000
[  399.435126][ T8098] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  399.443651][ T8098] CR2: 0000000000000000 CR3: 000000210b958004 CR4: 0000000000f73ef0
[  399.453753][ T8098] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  399.463835][ T8098] DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400
[  399.473912][ T8098] PKRU: 55555554
[  399.479005][ T8098] Call Trace:
[  399.483812][ T8098]  <TASK>
[  399.488224][ T8098]  ? __warn+0xcc/0x2d0
[  399.493903][ T8098]  ? pi_enable_wakeup_handler+0x421/0x5f0 [kvm_intel]
[  399.502643][ T8098]  ? report_bug+0x261/0x2c0
[  399.508825][ T8098]  ? handle_bug+0x3a/0x90
[  399.514817][ T8098]  ? exc_invalid_op+0x17/0x40
[  399.521191][ T8098]  ? asm_exc_invalid_op+0x1a/0x20
[  399.527938][ T8098]  ? pi_enable_wakeup_handler+0x421/0x5f0 [kvm_intel]
[  399.536677][ T8098]  ? __pfx_pi_enable_wakeup_handler+0x10/0x10 [kvm_intel]
[  399.545747][ T8098]  ? __pfx_lock_repin_lock+0x10/0x10
[  399.552740][ T8098]  ? newidle_balance+0xc85/0x1300
[  399.559408][ T8098]  ? __pfx_perf_event_context_sched_out+0x10/0x10
[  399.567652][ T8098]  ? lock_acquire+0x432/0x4e0
[  399.573959][ T8098]  ? vmx_get_rflags+0x26/0x2c0 [kvm_intel]
[  399.581527][ T8098]  vmx_vcpu_pi_put+0x1d3/0x230 [kvm_intel]
[  399.589119][ T8098]  vmx_vcpu_put+0x12/0x20 [kvm_intel]
[  399.596205][ T8098]  kvm_arch_vcpu_put+0x49e/0x7b0 [kvm]
[  399.603463][ T8098]  kvm_sched_out+0xb2/0xe0 [kvm]
[  399.610100][ T8098]  prepare_task_switch+0x321/0xc40
[  399.616863][ T8098]  ? lock_release+0x1bf/0x240
[  399.623146][ T8098]  __schedule+0x5a6/0x20b0
[  399.629125][ T8098]  ? __pfx___schedule+0x10/0x10
[  399.635581][ T8098]  ? __pfx_lock_acquire+0x10/0x10
[  399.642228][ T8098]  ? kvm_apic_has_interrupt+0x9c/0x160 [kvm]
[  399.650034][ T8098]  ? lock_acquire+0x432/0x4e0
[  399.656267][ T8098]  ? __pfx_lock_acquire+0x10/0x10
[  399.662905][ T8098]  schedule+0xe2/0x2a0
[  399.668478][ T8098]  kvm_vcpu_block+0xd1/0x1c0 [kvm]
[  399.675308][ T8098]  kvm_vcpu_halt+0xee/0x900 [kvm]
[  399.682018][ T8098]  vcpu_run+0x50a/0x9d0 [kvm]
[  399.688346][ T8098]  kvm_arch_vcpu_ioctl_run+0x377/0x1430 [kvm]
[  399.696254][ T8098]  ? lock_release+0xe5/0x240
[  399.702428][ T8098]  kvm_vcpu_ioctl+0x34c/0xc40 [kvm]
[  399.709341][ T8098]  ? __pfx_kvm_vcpu_ioctl+0x10/0x10 [kvm]
[  399.716821][ T8098]  ? __lock_release+0x103/0x440
[  399.723960][ T8098]  ? __fget_files+0x1c7/0x330
[  399.730219][ T8098]  ? __pfx___lock_release+0x10/0x10
[  399.737750][ T8098]  ? __fget_files+0x1cc/0x330
[  399.744028][ T8098]  ? __fget_files+0x1c7/0x330
[  399.750289][ T8098]  ? lock_release+0xe5/0x240
[  399.756461][ T8098]  ? __fget_files+0x1cc/0x330
[  399.762719][ T8098]  __x64_sys_ioctl+0x134/0x1b0
[  399.769077][ T8098]  do_syscall_64+0x93/0x170
[  399.775146][ T8098]  ? do_user_addr_fault+0x477/0xcb0
[  399.781989][ T8098]  ? lockdep_hardirqs_on_prepare+0x279/0x3e0
[  399.789654][ T8098]  entry_SYSCALL_64_after_hwframe+0x6c/0x74
[  399.797192][ T8098] RIP: 0033:0x7f10de88bc5b
[  399.803079][ T8098] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05
+<89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00
[  399.826105][ T8098] RSP: 002b:00007f10de1ff9c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[  399.836565][ T8098] RAX: ffffffffffffffda RBX: 000000001a5418c0 RCX: 00007f10de88bc5b
[  399.846521][ T8098] R


> 	- Add and use non-atomic helpers to manipulate SN bit
> 	- Use pi_test_sn() instead of open coding
> v2:
> 	- Replace bitfields, no more mix.
> ---
>  arch/x86/include/asm/posted_intr.h | 21 ++++++++++++---------
>  arch/x86/kvm/vmx/posted_intr.c     |  4 ++--
>  arch/x86/kvm/vmx/vmx.c             |  2 +-
>  3 files changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
> index acf237b2882e..20e31891de15 100644
> --- a/arch/x86/include/asm/posted_intr.h
> +++ b/arch/x86/include/asm/posted_intr.h
> @@ -15,17 +15,9 @@ struct pi_desc {
>  	};
>  	union {
>  		struct {
> -				/* bit 256 - Outstanding Notification */
> -			u16	on	: 1,
> -				/* bit 257 - Suppress Notification */
> -				sn	: 1,
> -				/* bit 271:258 - Reserved */
> -				rsvd_1	: 14;
> -				/* bit 279:272 - Notification Vector */
> +			u16	notifications; /* Suppress and outstanding bits */
>  			u8	nv;
> -				/* bit 287:280 - Reserved */
>  			u8	rsvd_2;
> -				/* bit 319:288 - Notification Destination */
>  			u32	ndst;
>  		};
>  		u64 control;
> @@ -88,4 +80,15 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
>  	return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
>  }
>  
> +/* Non-atomic helpers */
> +static inline void __pi_set_sn(struct pi_desc *pi_desc)
> +{
> +	pi_desc->notifications |= BIT(POSTED_INTR_SN);
> +}
> +
> +static inline void __pi_clear_sn(struct pi_desc *pi_desc)
> +{
> +	pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
> +}
> +
>  #endif /* _X86_POSTED_INTR_H */
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index af662312fd07..ec08fa3caf43 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -107,7 +107,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
>  		 * handle task migration (@cpu != vcpu->cpu).
>  		 */
>  		new.ndst = dest;
> -		new.sn = 0;
> +		__pi_clear_sn(&new);
>  
>  		/*
>  		 * Restore the notification vector; in the blocking case, the
> @@ -157,7 +157,7 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
>  		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
>  	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
>  
> -	WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
> +	WARN(pi_test_sn(pi_desc), "PI descriptor SN field set before blocking");
>  
>  	old.control = READ_ONCE(pi_desc->control);
>  	do {
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d94bb069bac9..f505745913c8 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -4843,7 +4843,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
>  	 * or POSTED_INTR_WAKEUP_VECTOR.
>  	 */
>  	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
> -	vmx->pi_desc.sn = 1;
> +	__pi_set_sn(&vmx->pi_desc);
>  }
>  
>  static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> -- 
> 2.25.1
>
[tip: x86/irq] x86/irq: Remove bitfields in posted interrupt descriptor
Posted by tip-bot2 for Jacob Pan 1 year, 9 months ago
The following commit has been merged into the x86/irq branch of tip:

Commit-ID:     2254808b53d92c9fe7b645b2f43acc55f22cdce6
Gitweb:        https://git.kernel.org/tip/2254808b53d92c9fe7b645b2f43acc55f22cdce6
Author:        Jacob Pan <jacob.jun.pan@linux.intel.com>
AuthorDate:    Tue, 23 Apr 2024 10:41:05 -07:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Tue, 30 Apr 2024 00:54:42 +02:00

x86/irq: Remove bitfields in posted interrupt descriptor

Mixture of bitfields and types is weird and really not intuitive, remove
bitfields and use typed data exclusively. Bitfields often result in
inferior machine code.

Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240423174114.526704-4-jacob.jun.pan@linux.intel.com
Link: https://lore.kernel.org/all/20240404101735.402feec8@jacob-builder/T/#mf66e34a82a48f4d8e2926b5581eff59a122de53a
---
 arch/x86/include/asm/posted_intr.h | 21 ++++++++++++---------
 arch/x86/kvm/vmx/posted_intr.c     |  4 ++--
 arch/x86/kvm/vmx/vmx.c             |  2 +-
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
index acf237b..20e3189 100644
--- a/arch/x86/include/asm/posted_intr.h
+++ b/arch/x86/include/asm/posted_intr.h
@@ -15,17 +15,9 @@ struct pi_desc {
 	};
 	union {
 		struct {
-				/* bit 256 - Outstanding Notification */
-			u16	on	: 1,
-				/* bit 257 - Suppress Notification */
-				sn	: 1,
-				/* bit 271:258 - Reserved */
-				rsvd_1	: 14;
-				/* bit 279:272 - Notification Vector */
+			u16	notifications; /* Suppress and outstanding bits */
 			u8	nv;
-				/* bit 287:280 - Reserved */
 			u8	rsvd_2;
-				/* bit 319:288 - Notification Destination */
 			u32	ndst;
 		};
 		u64 control;
@@ -88,4 +80,15 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
 	return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
 }
 
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+	pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
 #endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
index af66231..ec08fa3 100644
--- a/arch/x86/kvm/vmx/posted_intr.c
+++ b/arch/x86/kvm/vmx/posted_intr.c
@@ -107,7 +107,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
 		 * handle task migration (@cpu != vcpu->cpu).
 		 */
 		new.ndst = dest;
-		new.sn = 0;
+		__pi_clear_sn(&new);
 
 		/*
 		 * Restore the notification vector; in the blocking case, the
@@ -157,7 +157,7 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
 		      &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
 	raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
 
-	WARN(pi_desc->sn, "PI descriptor SN field set before blocking");
+	WARN(pi_test_sn(pi_desc), "PI descriptor SN field set before blocking");
 
 	old.control = READ_ONCE(pi_desc->control);
 	do {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 273d264..becefaf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4845,7 +4845,7 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	 * or POSTED_INTR_WAKEUP_VECTOR.
 	 */
 	vmx->pi_desc.nv = POSTED_INTR_VECTOR;
-	vmx->pi_desc.sn = 1;
+	__pi_set_sn(&vmx->pi_desc);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)