arch/x86/kernel/apic/io_apic.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-)
arch_dynirq_lower_bound() is invoked by the core interrupt code to
retrieve the lowest possible Linux interrupt number for dynamically
allocated interrupts like MSI.
The x86 implementation uses this to exclude the IO/APIC GSI space.
This works correctly as long as there is an IO/APIC registered, but
returns 0 if not. This has been observed in VMs where the BIOS does
not advertise an IO/APIC.
0 is an invalid interrupt number except for the legacy timer interrupt
on x86. The return value is unchecked in the core code, so it ends up
to allocate interrupt number 0 which is subsequently considered to be
invalid by the caller, e.g. the MSI allocation code.
The function has already a check for 0 in the case that an IO/APIC is
registered, but ioapic_dynirq_base is 0 in case of device tree setups.
Consolidate this and zero check for both ioapic_dynirq_base and gsi_top,
which is used in the case that no IO/APIC is registered.
Fixes: 3e5bedc2c258 ("x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines")
Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Cc: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
[V2]
- Edit commit message
- Consolidated the 0 check for ioapic_dynirq_base as well
arch/x86/kernel/apic/io_apic.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1f83b052bb74..f980b38b0227 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2477,17 +2477,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
+ unsigned int ret;
+
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- if (!ioapic_initialized)
- return gsi_top;
+ ret = ioapic_dynirq_base ? : gsi_top;
+
/*
- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
- * updated. So simply return @from if ioapic_dynirq_base == 0.
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
*/
- return ioapic_dynirq_base ? : from;
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
--
2.34.1
On Tue, Mar 28, 2023 at 12:30:04AM -0700, Saurabh Sengar wrote: > arch_dynirq_lower_bound() is invoked by the core interrupt code to > retrieve the lowest possible Linux interrupt number for dynamically > allocated interrupts like MSI. > > The x86 implementation uses this to exclude the IO/APIC GSI space. > This works correctly as long as there is an IO/APIC registered, but > returns 0 if not. This has been observed in VMs where the BIOS does > not advertise an IO/APIC. > > 0 is an invalid interrupt number except for the legacy timer interrupt > on x86. The return value is unchecked in the core code, so it ends up > to allocate interrupt number 0 which is subsequently considered to be > invalid by the caller, e.g. the MSI allocation code. > > The function has already a check for 0 in the case that an IO/APIC is > registered, but ioapic_dynirq_base is 0 in case of device tree setups. > > Consolidate this and zero check for both ioapic_dynirq_base and gsi_top, > which is used in the case that no IO/APIC is registered. > > Fixes: 3e5bedc2c258 ("x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines") > Co-developed-by: Thomas Gleixner <tglx@linutronix.de> > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com> > Cc: Andy Shevchenko <andriy.shevchenko@intel.com> > Cc: Thomas Gleixner <tglx@linutronix.de> > --- > [V2] > - Edit commit message > - Consolidated the 0 check for ioapic_dynirq_base as well > > arch/x86/kernel/apic/io_apic.c | 14 +++++++++----- > 1 file changed, 9 insertions(+), 5 deletions(-) > > diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c > index 1f83b052bb74..f980b38b0227 100644 > --- a/arch/x86/kernel/apic/io_apic.c > +++ b/arch/x86/kernel/apic/io_apic.c > @@ -2477,17 +2477,21 @@ static int io_apic_get_redir_entries(int ioapic) > > unsigned int arch_dynirq_lower_bound(unsigned int from) > { > + unsigned int ret; > + > /* > * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use > * gsi_top if ioapic_dynirq_base hasn't been initialized yet. > */ > - if (!ioapic_initialized) > - return gsi_top; > + ret = ioapic_dynirq_base ? : gsi_top; > + > /* > - * For DT enabled machines ioapic_dynirq_base is irrelevant and not > - * updated. So simply return @from if ioapic_dynirq_base == 0. > + * For DT enabled machines ioapic_dynirq_base is irrelevant and > + * always 0. gsi_top can be 0 if there is no IO/APIC registered. > + * 0 is an invalid interrupt number for dynamic allocations. Return > + * @from instead. > */ > - return ioapic_dynirq_base ? : from; > + return ret ? : from; > } > > #ifdef CONFIG_X86_32 Is this good to get accepted ? Please let me know if anything pending from my end on this. - Saurabh > -- > 2.34.1
On March 28, 2023 12:30:04 AM PDT, Saurabh Sengar <ssengar@linux.microsoft.com> wrote: >arch_dynirq_lower_bound() is invoked by the core interrupt code to >retrieve the lowest possible Linux interrupt number for dynamically >allocated interrupts like MSI. > >The x86 implementation uses this to exclude the IO/APIC GSI space. >This works correctly as long as there is an IO/APIC registered, but >returns 0 if not. This has been observed in VMs where the BIOS does >not advertise an IO/APIC. > >0 is an invalid interrupt number except for the legacy timer interrupt >on x86. The return value is unchecked in the core code, so it ends up >to allocate interrupt number 0 which is subsequently considered to be >invalid by the caller, e.g. the MSI allocation code. > >The function has already a check for 0 in the case that an IO/APIC is >registered, but ioapic_dynirq_base is 0 in case of device tree setups. > >Consolidate this and zero check for both ioapic_dynirq_base and gsi_top, >which is used in the case that no IO/APIC is registered. > >Fixes: 3e5bedc2c258 ("x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines") >Co-developed-by: Thomas Gleixner <tglx@linutronix.de> >Signed-off-by: Thomas Gleixner <tglx@linutronix.de> >Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com> >Cc: Andy Shevchenko <andriy.shevchenko@intel.com> >Cc: Thomas Gleixner <tglx@linutronix.de> >--- >[V2] >- Edit commit message >- Consolidated the 0 check for ioapic_dynirq_base as well > > arch/x86/kernel/apic/io_apic.c | 14 +++++++++----- > 1 file changed, 9 insertions(+), 5 deletions(-) > >diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c >index 1f83b052bb74..f980b38b0227 100644 >--- a/arch/x86/kernel/apic/io_apic.c >+++ b/arch/x86/kernel/apic/io_apic.c >@@ -2477,17 +2477,21 @@ static int io_apic_get_redir_entries(int ioapic) > > unsigned int arch_dynirq_lower_bound(unsigned int from) > { >+ unsigned int ret; >+ > /* > * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use > * gsi_top if ioapic_dynirq_base hasn't been initialized yet. > */ >- if (!ioapic_initialized) >- return gsi_top; >+ ret = ioapic_dynirq_base ? : gsi_top; >+ > /* >- * For DT enabled machines ioapic_dynirq_base is irrelevant and not >- * updated. So simply return @from if ioapic_dynirq_base == 0. >+ * For DT enabled machines ioapic_dynirq_base is irrelevant and >+ * always 0. gsi_top can be 0 if there is no IO/APIC registered. >+ * 0 is an invalid interrupt number for dynamic allocations. Return >+ * @from instead. > */ >- return ioapic_dynirq_base ? : from; >+ return ret ? : from; > } > > #ifdef CONFIG_X86_32 Is there any reason why this variable can't be initialized to a fixed nonzero number, like 16?
On Tue, Mar 28, 2023 at 06:59:04AM -0700, H. Peter Anvin wrote: > On March 28, 2023 12:30:04 AM PDT, Saurabh Sengar <ssengar@linux.microsoft.com> wrote: > >arch_dynirq_lower_bound() is invoked by the core interrupt code to > >retrieve the lowest possible Linux interrupt number for dynamically > >allocated interrupts like MSI. > > > >The x86 implementation uses this to exclude the IO/APIC GSI space. > >This works correctly as long as there is an IO/APIC registered, but > >returns 0 if not. This has been observed in VMs where the BIOS does > >not advertise an IO/APIC. > > > >0 is an invalid interrupt number except for the legacy timer interrupt > >on x86. The return value is unchecked in the core code, so it ends up > >to allocate interrupt number 0 which is subsequently considered to be > >invalid by the caller, e.g. the MSI allocation code. > > > >The function has already a check for 0 in the case that an IO/APIC is > >registered, but ioapic_dynirq_base is 0 in case of device tree setups. > > > >Consolidate this and zero check for both ioapic_dynirq_base and gsi_top, > >which is used in the case that no IO/APIC is registered. > > > >Fixes: 3e5bedc2c258 ("x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines") > >Co-developed-by: Thomas Gleixner <tglx@linutronix.de> > >Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > >Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com> > >Cc: Andy Shevchenko <andriy.shevchenko@intel.com> > >Cc: Thomas Gleixner <tglx@linutronix.de> > >--- > >[V2] > >- Edit commit message > >- Consolidated the 0 check for ioapic_dynirq_base as well > > > > arch/x86/kernel/apic/io_apic.c | 14 +++++++++----- > > 1 file changed, 9 insertions(+), 5 deletions(-) > > > >diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c > >index 1f83b052bb74..f980b38b0227 100644 > >--- a/arch/x86/kernel/apic/io_apic.c > >+++ b/arch/x86/kernel/apic/io_apic.c > >@@ -2477,17 +2477,21 @@ static int io_apic_get_redir_entries(int ioapic) > > > > unsigned int arch_dynirq_lower_bound(unsigned int from) > > { > >+ unsigned int ret; > >+ > > /* > > * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use > > * gsi_top if ioapic_dynirq_base hasn't been initialized yet. > > */ > >- if (!ioapic_initialized) > >- return gsi_top; > >+ ret = ioapic_dynirq_base ? : gsi_top; > >+ > > /* > >- * For DT enabled machines ioapic_dynirq_base is irrelevant and not > >- * updated. So simply return @from if ioapic_dynirq_base == 0. > >+ * For DT enabled machines ioapic_dynirq_base is irrelevant and > >+ * always 0. gsi_top can be 0 if there is no IO/APIC registered. > >+ * 0 is an invalid interrupt number for dynamic allocations. Return > >+ * @from instead. > > */ > >- return ioapic_dynirq_base ? : from; > >+ return ret ? : from; > > } > > > > #ifdef CONFIG_X86_32 > > Is there any reason why this variable can't be initialized to a fixed nonzero number, like 16? Yes, initializing gst_top to any non-zero value should fix this issue. At first I thought to intialize gst_top to 1. But then I looked at how the ioapic_dynirq_base case is handled and followed a similar approach. Regards, Saurabh
On Tue, Mar 28 2023 at 07:48, Saurabh Singh Sengar wrote: > On Tue, Mar 28, 2023 at 06:59:04AM -0700, H. Peter Anvin wrote: >> >> Is there any reason why this variable can't be initialized to a fixed nonzero number, like 16? > > Yes, initializing gst_top to any non-zero value should fix this issue. > At first I thought to intialize gst_top to 1. That works only in your case. Some boot time registrations of IO_APICs use gsi_top as the base. So initializing gsi_top to N would move IOAPIC[0] interrupts out to irq N... and make the legacy interrupts fail. That whole IOAPIC registration could do with some major cleanup, but that's a different story. Thanks, tglx
The following commit has been merged into the x86/apic branch of tip:
Commit-ID: 5af507bef93c09a94fb8f058213b489178f4cbe5
Gitweb: https://git.kernel.org/tip/5af507bef93c09a94fb8f058213b489178f4cbe5
Author: Saurabh Sengar <ssengar@linux.microsoft.com>
AuthorDate: Tue, 28 Mar 2023 00:30:04 -07:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 12 Apr 2023 17:45:50 +02:00
x86/ioapic: Don't return 0 from arch_dynirq_lower_bound()
arch_dynirq_lower_bound() is invoked by the core interrupt code to
retrieve the lowest possible Linux interrupt number for dynamically
allocated interrupts like MSI.
The x86 implementation uses this to exclude the IO/APIC GSI space.
This works correctly as long as there is an IO/APIC registered, but
returns 0 if not. This has been observed in VMs where the BIOS does
not advertise an IO/APIC.
0 is an invalid interrupt number except for the legacy timer interrupt
on x86. The return value is unchecked in the core code, so it ends up
to allocate interrupt number 0 which is subsequently considered to be
invalid by the caller, e.g. the MSI allocation code.
The function has already a check for 0 in the case that an IO/APIC is
registered, as ioapic_dynirq_base is 0 in case of device tree setups.
Consolidate this and zero check for both ioapic_dynirq_base and gsi_top,
which is used in the case that no IO/APIC is registered.
Fixes: 3e5bedc2c258 ("x86/apic: Fix arch_dynirq_lower_bound() bug for DT enabled machines")
Signed-off-by: Saurabh Sengar <ssengar@linux.microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/1679988604-20308-1-git-send-email-ssengar@linux.microsoft.com
---
arch/x86/kernel/apic/io_apic.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1f83b05..f980b38 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2477,17 +2477,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
+ unsigned int ret;
+
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
- if (!ioapic_initialized)
- return gsi_top;
+ ret = ioapic_dynirq_base ? : gsi_top;
+
/*
- * For DT enabled machines ioapic_dynirq_base is irrelevant and not
- * updated. So simply return @from if ioapic_dynirq_base == 0.
+ * For DT enabled machines ioapic_dynirq_base is irrelevant and
+ * always 0. gsi_top can be 0 if there is no IO/APIC registered.
+ * 0 is an invalid interrupt number for dynamic allocations. Return
+ * @from instead.
*/
- return ioapic_dynirq_base ? : from;
+ return ret ? : from;
}
#ifdef CONFIG_X86_32
© 2016 - 2024 Red Hat, Inc.