Request a PPI for each vCPU during probe which will be used by the host
to communicate a stall detected event on the vCPU. When the host raises
this interrupt from the virtual machine monitor, the guest is expected to
handle the interrupt and panic.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
index e2015c87f03f..c580cd7fd225 100644
--- a/drivers/misc/vcpu_stall_detector.c
+++ b/drivers/misc/vcpu_stall_detector.c
@@ -32,6 +32,7 @@
struct vcpu_stall_detect_config {
u32 clock_freq_hz;
u32 stall_timeout_sec;
+ int ppi_irq;
void __iomem *membase;
struct platform_device *dev;
@@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
+static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
+{
+ panic("vCPU stall detector");
+ return IRQ_HANDLED;
+}
+
static int start_stall_detector_cpu(unsigned int cpu)
{
u32 ticks, ping_timeout_ms;
@@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
static int vcpu_stall_detect_probe(struct platform_device *pdev)
{
- int ret;
+ int ret, irq, num_irqs;
struct resource *r;
void __iomem *membase;
u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
@@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
vcpu_stall_config = (struct vcpu_stall_detect_config) {
.membase = membase,
.clock_freq_hz = clock_freq_hz,
- .stall_timeout_sec = stall_timeout_sec
+ .stall_timeout_sec = stall_timeout_sec,
+ .ppi_irq = -1,
};
+ num_irqs = platform_irq_count(pdev);
+ if (num_irqs < 0) {
+ dev_err(&pdev->dev, "Failed to get irqs\n");
+ ret = num_irqs;
+ goto err;
+ } else if (num_irqs > 1) {
+ dev_err(&pdev->dev, "Multipple irqs detected\n");
+ ret = -EINVAL;
+ goto err;
+ } else if (num_irqs == 1) {
+ irq = platform_get_irq(pdev, 0);
+ if ((irq > 0) && irq_is_percpu_devid(irq)) {
+ ret = request_percpu_irq(irq,
+ vcpu_stall_detector_irq,
+ "vcpu_stall_detector",
+ vcpu_stall_detectors);
+ if (!ret)
+ vcpu_stall_config.ppi_irq = irq;
+
+ }
+ }
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"virt/vcpu_stall_detector:online",
start_stall_detector_cpu,
@@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
vcpu_stall_config.hp_online = ret;
return 0;
err:
+ if (vcpu_stall_config.ppi_irq > 0)
+ free_percpu_irq(vcpu_stall_config.ppi_irq,
+ vcpu_stall_detectors);
return ret;
}
@@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
cpuhp_remove_state(vcpu_stall_config.hp_online);
+ if (vcpu_stall_config.ppi_irq > 0)
+ free_percpu_irq(vcpu_stall_config.ppi_irq,
+ vcpu_stall_detectors);
+
for_each_possible_cpu(cpu)
stop_stall_detector_cpu(cpu);
}
--
2.45.1.288.g0e0cd299f1-goog
On Thu, May 23, 2024 at 04:04:13PM +0000, Sebastian Ene wrote:
> Request a PPI for each vCPU during probe which will be used by the host
> to communicate a stall detected event on the vCPU. When the host raises
> this interrupt from the virtual machine monitor, the guest is expected to
> handle the interrupt and panic.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
> drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
> 1 file changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> index e2015c87f03f..c580cd7fd225 100644
> --- a/drivers/misc/vcpu_stall_detector.c
> +++ b/drivers/misc/vcpu_stall_detector.c
> @@ -32,6 +32,7 @@
> struct vcpu_stall_detect_config {
> u32 clock_freq_hz;
> u32 stall_timeout_sec;
> + int ppi_irq;
>
> void __iomem *membase;
> struct platform_device *dev;
> @@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> return HRTIMER_RESTART;
> }
>
> +static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
> +{
> + panic("vCPU stall detector");
> + return IRQ_HANDLED;
> +}
> +
> static int start_stall_detector_cpu(unsigned int cpu)
> {
> u32 ticks, ping_timeout_ms;
> @@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
>
> static int vcpu_stall_detect_probe(struct platform_device *pdev)
> {
> - int ret;
> + int ret, irq, num_irqs;
> struct resource *r;
> void __iomem *membase;
> u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> @@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> vcpu_stall_config = (struct vcpu_stall_detect_config) {
> .membase = membase,
> .clock_freq_hz = clock_freq_hz,
> - .stall_timeout_sec = stall_timeout_sec
> + .stall_timeout_sec = stall_timeout_sec,
> + .ppi_irq = -1,
> };
>
> + num_irqs = platform_irq_count(pdev);
> + if (num_irqs < 0) {
> + dev_err(&pdev->dev, "Failed to get irqs\n");
platform_irq_count() either returns a number or EPROBE_DEFER, I don't
think emitting an error on deferred probe is the correct thing to do
here?
> + ret = num_irqs;
> + goto err;
> + } else if (num_irqs > 1) {
> + dev_err(&pdev->dev, "Multipple irqs detected\n");
Typo. I don't really see why you're going to this level of complexity
though, why aren't you just doing a single get_irq_optional()?
> + ret = -EINVAL;
> + goto err;
> + } else if (num_irqs == 1) {
> + irq = platform_get_irq(pdev, 0);
> + if ((irq > 0) && irq_is_percpu_devid(irq)) {
> + ret = request_percpu_irq(irq,
> + vcpu_stall_detector_irq,
> + "vcpu_stall_detector",
> + vcpu_stall_detectors);
> + if (!ret)
> + vcpu_stall_config.ppi_irq = irq;
> +
> + }
> + }
> +
> ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> "virt/vcpu_stall_detector:online",
> start_stall_detector_cpu,
> @@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> vcpu_stall_config.hp_online = ret;
> return 0;
> err:
> + if (vcpu_stall_config.ppi_irq > 0)
> + free_percpu_irq(vcpu_stall_config.ppi_irq,
> + vcpu_stall_detectors);
> return ret;
> }
>
> @@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
>
> cpuhp_remove_state(vcpu_stall_config.hp_online);
>
> + if (vcpu_stall_config.ppi_irq > 0)
> + free_percpu_irq(vcpu_stall_config.ppi_irq,
> + vcpu_stall_detectors);
> +
> for_each_possible_cpu(cpu)
> stop_stall_detector_cpu(cpu);
> }
> --
> 2.45.1.288.g0e0cd299f1-goog
>
>
On Fri, May 24, 2024 at 08:00:42PM +0100, Conor Dooley wrote:
> On Thu, May 23, 2024 at 04:04:13PM +0000, Sebastian Ene wrote:
> > Request a PPI for each vCPU during probe which will be used by the host
> > to communicate a stall detected event on the vCPU. When the host raises
> > this interrupt from the virtual machine monitor, the guest is expected to
> > handle the interrupt and panic.
> >
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> > drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
> > 1 file changed, 39 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> > index e2015c87f03f..c580cd7fd225 100644
> > --- a/drivers/misc/vcpu_stall_detector.c
> > +++ b/drivers/misc/vcpu_stall_detector.c
> > @@ -32,6 +32,7 @@
> > struct vcpu_stall_detect_config {
> > u32 clock_freq_hz;
> > u32 stall_timeout_sec;
> > + int ppi_irq;
> >
> > void __iomem *membase;
> > struct platform_device *dev;
> > @@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> > return HRTIMER_RESTART;
> > }
> >
> > +static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
> > +{
> > + panic("vCPU stall detector");
> > + return IRQ_HANDLED;
> > +}
> > +
> > static int start_stall_detector_cpu(unsigned int cpu)
> > {
> > u32 ticks, ping_timeout_ms;
> > @@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
> >
> > static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > {
> > - int ret;
> > + int ret, irq, num_irqs;
> > struct resource *r;
> > void __iomem *membase;
> > u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> > @@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > vcpu_stall_config = (struct vcpu_stall_detect_config) {
> > .membase = membase,
> > .clock_freq_hz = clock_freq_hz,
> > - .stall_timeout_sec = stall_timeout_sec
> > + .stall_timeout_sec = stall_timeout_sec,
> > + .ppi_irq = -1,
> > };
> >
> > + num_irqs = platform_irq_count(pdev);
> > + if (num_irqs < 0) {
> > + dev_err(&pdev->dev, "Failed to get irqs\n");
Hello Conor,
>
> platform_irq_count() either returns a number or EPROBE_DEFER, I don't
> think emitting an error on deferred probe is the correct thing to do
> here?
I will drop this.
> > + ret = num_irqs;
> > + goto err;
> > + } else if (num_irqs > 1) {
> > + dev_err(&pdev->dev, "Multipple irqs detected\n");
>
> Typo. I don't really see why you're going to this level of complexity
> though, why aren't you just doing a single get_irq_optional()?
>
Thanks for the feedback, I simplified it by using the
platform_get_irq_optional as you suggested.
> > + ret = -EINVAL;
> > + goto err;
> > + } else if (num_irqs == 1) {
> > + irq = platform_get_irq(pdev, 0);
> > + if ((irq > 0) && irq_is_percpu_devid(irq)) {
> > + ret = request_percpu_irq(irq,
> > + vcpu_stall_detector_irq,
> > + "vcpu_stall_detector",
> > + vcpu_stall_detectors);
> > + if (!ret)
> > + vcpu_stall_config.ppi_irq = irq;
> > +
> > + }
> > + }
> > +
> > ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> > "virt/vcpu_stall_detector:online",
> > start_stall_detector_cpu,
> > @@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> > vcpu_stall_config.hp_online = ret;
> > return 0;
> > err:
> > + if (vcpu_stall_config.ppi_irq > 0)
> > + free_percpu_irq(vcpu_stall_config.ppi_irq,
> > + vcpu_stall_detectors);
> > return ret;
> > }
> >
> > @@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
> >
> > cpuhp_remove_state(vcpu_stall_config.hp_online);
> >
> > + if (vcpu_stall_config.ppi_irq > 0)
> > + free_percpu_irq(vcpu_stall_config.ppi_irq,
> > + vcpu_stall_detectors);
> > +
> > for_each_possible_cpu(cpu)
> > stop_stall_detector_cpu(cpu);
> > }
> > --
> > 2.45.1.288.g0e0cd299f1-goog
> >
> >
Cheers,
Seb
© 2016 - 2026 Red Hat, Inc.