[PATCH 2/2] misc: Register a PPI for the vcpu stall detection virtual device

Sebastian Ene posted 2 patches 1 year, 8 months ago
There is a newer version of this series
[PATCH 2/2] misc: Register a PPI for the vcpu stall detection virtual device
Posted by Sebastian Ene 1 year, 8 months ago
Request a PPI for each vCPU during probe which will be used by the host
to communicate a stall detected event on the vCPU. When the host raises
this interrupt from the virtual machine monitor, the guest is expected to
handle the interrupt and panic.

Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
 drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
index e2015c87f03f..c580cd7fd225 100644
--- a/drivers/misc/vcpu_stall_detector.c
+++ b/drivers/misc/vcpu_stall_detector.c
@@ -32,6 +32,7 @@
 struct vcpu_stall_detect_config {
 	u32 clock_freq_hz;
 	u32 stall_timeout_sec;
+	int ppi_irq;
 
 	void __iomem *membase;
 	struct platform_device *dev;
@@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
 	return HRTIMER_RESTART;
 }
 
+static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
+{
+	panic("vCPU stall detector");
+	return IRQ_HANDLED;
+}
+
 static int start_stall_detector_cpu(unsigned int cpu)
 {
 	u32 ticks, ping_timeout_ms;
@@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
 
 static int vcpu_stall_detect_probe(struct platform_device *pdev)
 {
-	int ret;
+	int ret, irq, num_irqs;
 	struct resource *r;
 	void __iomem *membase;
 	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
@@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
 	vcpu_stall_config = (struct vcpu_stall_detect_config) {
 		.membase		= membase,
 		.clock_freq_hz		= clock_freq_hz,
-		.stall_timeout_sec	= stall_timeout_sec
+		.stall_timeout_sec	= stall_timeout_sec,
+		.ppi_irq		= -1,
 	};
 
+	num_irqs = platform_irq_count(pdev);
+	if (num_irqs < 0) {
+		dev_err(&pdev->dev, "Failed to get irqs\n");
+		ret = num_irqs;
+		goto err;
+	} else if (num_irqs > 1) {
+		dev_err(&pdev->dev, "Multipple irqs detected\n");
+		ret = -EINVAL;
+		goto err;
+	} else if (num_irqs == 1) {
+		irq = platform_get_irq(pdev, 0);
+		if ((irq > 0) && irq_is_percpu_devid(irq)) {
+			ret = request_percpu_irq(irq,
+						 vcpu_stall_detector_irq,
+						 "vcpu_stall_detector",
+						 vcpu_stall_detectors);
+			if (!ret)
+				vcpu_stall_config.ppi_irq = irq;
+
+		}
+	}
+
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 				"virt/vcpu_stall_detector:online",
 				start_stall_detector_cpu,
@@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
 	vcpu_stall_config.hp_online = ret;
 	return 0;
 err:
+	if (vcpu_stall_config.ppi_irq > 0)
+		free_percpu_irq(vcpu_stall_config.ppi_irq,
+				vcpu_stall_detectors);
 	return ret;
 }
 
@@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
 
 	cpuhp_remove_state(vcpu_stall_config.hp_online);
 
+	if (vcpu_stall_config.ppi_irq > 0)
+		free_percpu_irq(vcpu_stall_config.ppi_irq,
+				vcpu_stall_detectors);
+
 	for_each_possible_cpu(cpu)
 		stop_stall_detector_cpu(cpu);
 }
-- 
2.45.1.288.g0e0cd299f1-goog
Re: [PATCH 2/2] misc: Register a PPI for the vcpu stall detection virtual device
Posted by Conor Dooley 1 year, 8 months ago
On Thu, May 23, 2024 at 04:04:13PM +0000, Sebastian Ene wrote:
> Request a PPI for each vCPU during probe which will be used by the host
> to communicate a stall detected event on the vCPU. When the host raises
> this interrupt from the virtual machine monitor, the guest is expected to
> handle the interrupt and panic.
> 
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
>  drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
>  1 file changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> index e2015c87f03f..c580cd7fd225 100644
> --- a/drivers/misc/vcpu_stall_detector.c
> +++ b/drivers/misc/vcpu_stall_detector.c
> @@ -32,6 +32,7 @@
>  struct vcpu_stall_detect_config {
>  	u32 clock_freq_hz;
>  	u32 stall_timeout_sec;
> +	int ppi_irq;
>  
>  	void __iomem *membase;
>  	struct platform_device *dev;
> @@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
>  	return HRTIMER_RESTART;
>  }
>  
> +static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
> +{
> +	panic("vCPU stall detector");
> +	return IRQ_HANDLED;
> +}
> +
>  static int start_stall_detector_cpu(unsigned int cpu)
>  {
>  	u32 ticks, ping_timeout_ms;
> @@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
>  
>  static int vcpu_stall_detect_probe(struct platform_device *pdev)
>  {
> -	int ret;
> +	int ret, irq, num_irqs;
>  	struct resource *r;
>  	void __iomem *membase;
>  	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> @@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
>  	vcpu_stall_config = (struct vcpu_stall_detect_config) {
>  		.membase		= membase,
>  		.clock_freq_hz		= clock_freq_hz,
> -		.stall_timeout_sec	= stall_timeout_sec
> +		.stall_timeout_sec	= stall_timeout_sec,
> +		.ppi_irq		= -1,
>  	};
>  
> +	num_irqs = platform_irq_count(pdev);
> +	if (num_irqs < 0) {
> +		dev_err(&pdev->dev, "Failed to get irqs\n");

platform_irq_count() either returns a number or EPROBE_DEFER, I don't
think emitting an error on deferred probe is the correct thing to do
here?

> +		ret = num_irqs;
> +		goto err;
> +	} else if (num_irqs > 1) {
> +		dev_err(&pdev->dev, "Multipple irqs detected\n");

Typo. I don't really see why you're going to this level of complexity
though, why aren't you just doing a single get_irq_optional()?

> +		ret = -EINVAL;
> +		goto err;
> +	} else if (num_irqs == 1) {
> +		irq = platform_get_irq(pdev, 0);
> +		if ((irq > 0) && irq_is_percpu_devid(irq)) {
> +			ret = request_percpu_irq(irq,
> +						 vcpu_stall_detector_irq,
> +						 "vcpu_stall_detector",
> +						 vcpu_stall_detectors);
> +			if (!ret)
> +				vcpu_stall_config.ppi_irq = irq;
> +
> +		}
> +	}
> +
>  	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
>  				"virt/vcpu_stall_detector:online",
>  				start_stall_detector_cpu,
> @@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
>  	vcpu_stall_config.hp_online = ret;
>  	return 0;
>  err:
> +	if (vcpu_stall_config.ppi_irq > 0)
> +		free_percpu_irq(vcpu_stall_config.ppi_irq,
> +				vcpu_stall_detectors);
>  	return ret;
>  }
>  
> @@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
>  
>  	cpuhp_remove_state(vcpu_stall_config.hp_online);
>  
> +	if (vcpu_stall_config.ppi_irq > 0)
> +		free_percpu_irq(vcpu_stall_config.ppi_irq,
> +				vcpu_stall_detectors);
> +
>  	for_each_possible_cpu(cpu)
>  		stop_stall_detector_cpu(cpu);
>  }
> -- 
> 2.45.1.288.g0e0cd299f1-goog
> 
> 
Re: [PATCH 2/2] misc: Register a PPI for the vcpu stall detection virtual device
Posted by Sebastian Ene 1 year, 8 months ago
On Fri, May 24, 2024 at 08:00:42PM +0100, Conor Dooley wrote:
> On Thu, May 23, 2024 at 04:04:13PM +0000, Sebastian Ene wrote:
> > Request a PPI for each vCPU during probe which will be used by the host
> > to communicate a stall detected event on the vCPU. When the host raises
> > this interrupt from the virtual machine monitor, the guest is expected to
> > handle the interrupt and panic.
> > 
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> >  drivers/misc/vcpu_stall_detector.c | 41 ++++++++++++++++++++++++++++--
> >  1 file changed, 39 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/misc/vcpu_stall_detector.c b/drivers/misc/vcpu_stall_detector.c
> > index e2015c87f03f..c580cd7fd225 100644
> > --- a/drivers/misc/vcpu_stall_detector.c
> > +++ b/drivers/misc/vcpu_stall_detector.c
> > @@ -32,6 +32,7 @@
> >  struct vcpu_stall_detect_config {
> >  	u32 clock_freq_hz;
> >  	u32 stall_timeout_sec;
> > +	int ppi_irq;
> >  
> >  	void __iomem *membase;
> >  	struct platform_device *dev;
> > @@ -77,6 +78,12 @@ vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)
> >  	return HRTIMER_RESTART;
> >  }
> >  
> > +static irqreturn_t vcpu_stall_detector_irq(int irq, void *dev)
> > +{
> > +	panic("vCPU stall detector");
> > +	return IRQ_HANDLED;
> > +}
> > +
> >  static int start_stall_detector_cpu(unsigned int cpu)
> >  {
> >  	u32 ticks, ping_timeout_ms;
> > @@ -132,7 +139,7 @@ static int stop_stall_detector_cpu(unsigned int cpu)
> >  
> >  static int vcpu_stall_detect_probe(struct platform_device *pdev)
> >  {
> > -	int ret;
> > +	int ret, irq, num_irqs;
> >  	struct resource *r;
> >  	void __iomem *membase;
> >  	u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ;
> > @@ -169,9 +176,32 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> >  	vcpu_stall_config = (struct vcpu_stall_detect_config) {
> >  		.membase		= membase,
> >  		.clock_freq_hz		= clock_freq_hz,
> > -		.stall_timeout_sec	= stall_timeout_sec
> > +		.stall_timeout_sec	= stall_timeout_sec,
> > +		.ppi_irq		= -1,
> >  	};
> >  
> > +	num_irqs = platform_irq_count(pdev);
> > +	if (num_irqs < 0) {
> > +		dev_err(&pdev->dev, "Failed to get irqs\n");

Hello Conor,


> 
> platform_irq_count() either returns a number or EPROBE_DEFER, I don't
> think emitting an error on deferred probe is the correct thing to do
> here?

I will drop this.


> > +		ret = num_irqs;
> > +		goto err;
> > +	} else if (num_irqs > 1) {
> > +		dev_err(&pdev->dev, "Multipple irqs detected\n");
> 
> Typo. I don't really see why you're going to this level of complexity
> though, why aren't you just doing a single get_irq_optional()?
> 

Thanks for the feedback, I simplified it by using the
platform_get_irq_optional as you suggested.


> > +		ret = -EINVAL;
> > +		goto err;
> > +	} else if (num_irqs == 1) {
> > +		irq = platform_get_irq(pdev, 0);
> > +		if ((irq > 0) && irq_is_percpu_devid(irq)) {
> > +			ret = request_percpu_irq(irq,
> > +						 vcpu_stall_detector_irq,
> > +						 "vcpu_stall_detector",
> > +						 vcpu_stall_detectors);
> > +			if (!ret)
> > +				vcpu_stall_config.ppi_irq = irq;
> > +
> > +		}
> > +	}
> > +
> >  	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
> >  				"virt/vcpu_stall_detector:online",
> >  				start_stall_detector_cpu,
> > @@ -184,6 +214,9 @@ static int vcpu_stall_detect_probe(struct platform_device *pdev)
> >  	vcpu_stall_config.hp_online = ret;
> >  	return 0;
> >  err:
> > +	if (vcpu_stall_config.ppi_irq > 0)
> > +		free_percpu_irq(vcpu_stall_config.ppi_irq,
> > +				vcpu_stall_detectors);
> >  	return ret;
> >  }
> >  
> > @@ -193,6 +226,10 @@ static void vcpu_stall_detect_remove(struct platform_device *pdev)
> >  
> >  	cpuhp_remove_state(vcpu_stall_config.hp_online);
> >  
> > +	if (vcpu_stall_config.ppi_irq > 0)
> > +		free_percpu_irq(vcpu_stall_config.ppi_irq,
> > +				vcpu_stall_detectors);
> > +
> >  	for_each_possible_cpu(cpu)
> >  		stop_stall_detector_cpu(cpu);
> >  }
> > -- 
> > 2.45.1.288.g0e0cd299f1-goog
> > 
> > 

Cheers,
Seb