[v1] irqchip/sifive-plic: ensure interrupt is enable before EOI

[PATCH] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by zhengyan 1 year, 7 months ago

RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
explained in the description of Interrupt Completion in the PLIC spec:
"The PLIC signals it has completed executing an interrupt handler by
writing the interrupt ID it received from the claim to the claim/complete
register. The PLIC does not check whether the completion ID is the same
as the last claim ID for that target. If the completion ID does not match
an interrupt source that *is currently enabled* for the target, the
completion is silently ignored."

Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
before EOI")
ensured that EOI is enable when irqd IRQD_IRQ_DISABLED is set, before
EOI

Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked")
ensured that EOI is successful by enabling interrupt first, before EOI.

Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
operations") removed the interrupt enabling code from the previous
commit, because it assumes that interrupt should already be enabled at the
point of EOI.

However, here still miss a corner case that if SMP is enabled. When
someone need to set affinity from a cpu to another (Maybe like
boardcast-tick) the original cpu when handle the EOI meanwhile the IE is
disabled by plic_set_affinity

So this patch ensure that won't happened

Signed-off-by: zhengyan <zhengyan@asrmicro.com>
---
 drivers/irqchip/irq-sifive-plic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 9e22f7e378f5..e6acd134a691 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
 static void plic_irq_eoi(struct irq_data *d)
 {
 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+	void __iomem *reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
+	u32 hwirq_mask = 1 << (d->hwirq % 32);
 
-	if (unlikely(irqd_irq_disabled(d))) {
+	if (unlikely(irqd_irq_disabled(d)) || (readl(reg) & hwirq_mask) == 0) {
 		plic_toggle(handler, d->hwirq, 1);
 		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
 		plic_toggle(handler, d->hwirq, 0);
-- 
2.25.1

Re: [PATCH] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Nam Cao 1 year, 7 months ago

On Mon, Jun 24, 2024 at 08:53:41AM +0000, zhengyan wrote:
> RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
> explained in the description of Interrupt Completion in the PLIC spec:
> "The PLIC signals it has completed executing an interrupt handler by
> writing the interrupt ID it received from the claim to the claim/complete
> register. The PLIC does not check whether the completion ID is the same
> as the last claim ID for that target. If the completion ID does not match
> an interrupt source that *is currently enabled* for the target, the
> completion is silently ignored."
> 
> Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
> before EOI")
> ensured that EOI is enable when irqd IRQD_IRQ_DISABLED is set, before
> EOI
> 
> Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked")
> ensured that EOI is successful by enabling interrupt first, before EOI.
> 
> Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
> operations") removed the interrupt enabling code from the previous
> commit, because it assumes that interrupt should already be enabled at the
> point of EOI.
> 
> However, here still miss a corner case that if SMP is enabled. When
> someone need to set affinity from a cpu to another (Maybe like
> boardcast-tick) the original cpu when handle the EOI meanwhile the IE is
> disabled by plic_set_affinity
> 
> So this patch ensure that won't happened
> 
> Signed-off-by: zhengyan <zhengyan@asrmicro.com>
> ---
>  drivers/irqchip/irq-sifive-plic.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 9e22f7e378f5..e6acd134a691 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
>  static void plic_irq_eoi(struct irq_data *d)
>  {
>  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> +	void __iomem *reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
> +	u32 hwirq_mask = 1 << (d->hwirq % 32);
>  
> -	if (unlikely(irqd_irq_disabled(d))) {
> +	if (unlikely(irqd_irq_disabled(d)) || (readl(reg) & hwirq_mask) == 0) {

If we read interrupt enable state from hardware, then reading the
software state (irqd_irq_disabled) is redundant.

>  		plic_toggle(handler, d->hwirq, 1);
>  		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
>  		plic_toggle(handler, d->hwirq, 0);

I have no knowledge about affinity stuff, so I don't really understand this
patch. But there is another idea regarding this "ignored EOI" problem:
always "complete" the interrupt while enabling. That would move this extra
complication out of the hot path, and also looks simpler in my opinion.

Something like the patch below. Would this solve this "affinity problem"
too?

Best regards,
Nam

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 0a233e9d9607..63f2111ced4a 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -122,7 +122,15 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
 
 static void plic_irq_enable(struct irq_data *d)
 {
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
+
+	writel(0, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
+
+	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+
 	plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
+
+	writel(1, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
 }
 
 static void plic_irq_disable(struct irq_data *d)
@@ -148,13 +156,7 @@ static void plic_irq_eoi(struct irq_data *d)
 {
 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
 
-	if (unlikely(irqd_irq_disabled(d))) {
-		plic_toggle(handler, d->hwirq, 1);
-		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
-		plic_toggle(handler, d->hwirq, 0);
-	} else {
-		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
-	}
+	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
 }
 
 #ifdef CONFIG_SMP

答复: [PATCH] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Yan Zheng（严政） 1 year, 7 months ago

> On Mon, Jun 24, 2024 at 08:53:41AM +0000, zhengyan wrote:
> > RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
> > explained in the description of Interrupt Completion in the PLIC spec:
> > "The PLIC signals it has completed executing an interrupt handler by
> > writing the interrupt ID it received from the claim to the
> > claim/complete register. The PLIC does not check whether the
> > completion ID is the same as the last claim ID for that target. If the
> > completion ID does not match an interrupt source that *is currently
> > enabled* for the target, the completion is silently ignored."
> >
> > Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
> > before EOI") ensured that EOI is enable when irqd IRQD_IRQ_DISABLED is
> > set, before EOI
> >
> > Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when
> > masked") ensured that EOI is successful by enabling interrupt first, before
> EOI.
> >
> > Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and
> > mask
> > operations") removed the interrupt enabling code from the previous
> > commit, because it assumes that interrupt should already be enabled at
> > the point of EOI.
> >
> > However, here still miss a corner case that if SMP is enabled. When
> > someone need to set affinity from a cpu to another (Maybe like
> > boardcast-tick) the original cpu when handle the EOI meanwhile the IE
> > is disabled by plic_set_affinity
> >
> > So this patch ensure that won't happened
> >
> > Signed-off-by: zhengyan <zhengyan@asrmicro.com>
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index 9e22f7e378f5..e6acd134a691 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
> > static void plic_irq_eoi(struct irq_data *d)  {
> >  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> > +	void __iomem *reg = handler->enable_base + (d->hwirq / 32) *
> sizeof(u32);
> > +	u32 hwirq_mask = 1 << (d->hwirq % 32);
> >
> > -	if (unlikely(irqd_irq_disabled(d))) {
> > +	if (unlikely(irqd_irq_disabled(d)) || (readl(reg) & hwirq_mask) ==
> > +0) {
> 
> If we read interrupt enable state from hardware, then reading the software
> state (irqd_irq_disabled) is redundant.
> 
Yes, you are right. I was afraid of missing some corner cases, so I kept the original conditional checks.
I think it over, it should be safe to only check hardware states
And I’d like to put it into "unlikely" path as 
if (unlikely((readl(reg) & hwirq_mask)) ==
> >  		plic_toggle(handler, d->hwirq, 1);
> >  		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> >  		plic_toggle(handler, d->hwirq, 0);
> 
> I have no knowledge about affinity stuff, so I don't really understand this
> patch. But there is another idea regarding this "ignored EOI" problem:
> always "complete" the interrupt while enabling. That would move this extra
> complication out of the hot path, and also looks simpler in my opinion.
> 
> Something like the patch below. Would this solve this "affinity problem"
> too?
> 
> Best regards,
> Nam
> 
No, I'm afraid the following patch can't solve this corner case. I thought it's because the core
Who executes plic_irq_enable is not the core who missing a write claim.
So if we want to do it in enable it might be something like follows :
static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
 {
        raw_spin_lock(&handler->enable_lock);
-       __plic_toggle(handler->enable_base, hwirq, enable);
+       if (enable) {
+               writel(hwirq, handler->hart_base + CONTEXT_CLAIM);
+               __plic_toggle(handler->enable_base, hwirq, enable);
+       }
        raw_spin_unlock(&handler->enable_lock);
 }

But there is a little difference:
a. check whether it's enabled  when do write claim
b. write claim anyway before enable 

sounds like a. is better?

And I'd like to illustrate more about this case:
For example, broadcast tick is working, cpu0 is about to response, cpu1 is the next
1. cpu0  response the timer irq, read the claim REG, and do timer isr event, 
2.  during the timer isr it will set next event 
tick_broadcast_set_event ->  irq_set_affinity-> xxx-> plic_set_affinity -> plic_irq_enable
3. in plic_set_affinity  disable cpu0's IE and enable cpu1'IE
4. cpu0 do the write claim to finish this irq, while cpu0's IE is disabled , left an active state in plic

Best regards,
zhengyan

> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 0a233e9d9607..63f2111ced4a 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -122,7 +122,15 @@ static inline void plic_irq_toggle(const struct
> cpumask *mask,
> 
>  static void plic_irq_enable(struct irq_data *d)  {
> +	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
missing a definition? If adds like this will cause a problem.
> +
> +	writel(0, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
> +
> +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> +
>  	plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
> +
> +	writel(1, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
>  }
> 
>  static void plic_irq_disable(struct irq_data *d) @@ -148,13 +156,7 @@ static
> void plic_irq_eoi(struct irq_data *d)  {
>  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> 
> -	if (unlikely(irqd_irq_disabled(d))) {
> -		plic_toggle(handler, d->hwirq, 1);
> -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> -		plic_toggle(handler, d->hwirq, 0);
> -	} else {
> -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> -	}
> +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
>  }
> 
>  #ifdef CONFIG_SMP

Re: 答复: [PATCH]irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Nam Cao 1 year, 7 months ago

On Mon, Jun 24, 2024 at 11:14:47AM +0000, Yan Zheng（严政） wrote:
> > I have no knowledge about affinity stuff, so I don't really understand this
> > patch. But there is another idea regarding this "ignored EOI" problem:
> > always "complete" the interrupt while enabling. That would move this extra
> > complication out of the hot path, and also looks simpler in my opinion.
> > 
> > Something like the patch below. Would this solve this "affinity problem"
> > too?
> > 
> No, I'm afraid the following patch can't solve this corner case. I thought it's because the core
> Who executes plic_irq_enable is not the core who missing a write claim.
> So if we want to do it in enable it might be something like follows :
> static void plic_toggle(struct plic_handler *handler, int hwirq, int enable)
>  {
>         raw_spin_lock(&handler->enable_lock);
> -       __plic_toggle(handler->enable_base, hwirq, enable);
> +       if (enable) {
> +               writel(hwirq, handler->hart_base + CONTEXT_CLAIM);
> +               __plic_toggle(handler->enable_base, hwirq, enable);
> +       }
>         raw_spin_unlock(&handler->enable_lock);
>  }

Again, I don't know anything about interrupt affinity thingy, so I may be
saying something dumb here:

I think this wouldn't work either. In plic_set_affinity(), I see the
interrupt is disabled, then enabled again. With your new proposed solution,
the interrupt would also be marked completed within plic_set_affinity().
So, the interrupt may be asserted again, earlier than it is supposed to (it
is not supposed to be asserted again until plic_irq_eoi() is called). It's
rare, but I think it's a possible race.

I don't have a better idea, at least for now. So probably we should stick
to your current solution.
> 
> But there is a little difference:
> a. check whether it's enabled  when do write claim
> b. write claim anyway before enable 
> 
> sounds like a. is better?
> 
> And I'd like to illustrate more about this case:
> For example, broadcast tick is working, cpu0 is about to response, cpu1 is the next
> 1. cpu0  response the timer irq, read the claim REG, and do timer isr event, 
> 2.  during the timer isr it will set next event 
> tick_broadcast_set_event ->  irq_set_affinity-> xxx-> plic_set_affinity -> plic_irq_enable
> 3. in plic_set_affinity  disable cpu0's IE and enable cpu1'IE
> 4. cpu0 do the write claim to finish this irq, while cpu0's IE is disabled , left an active state in plic

This is useful information, you may want to add it in your commit message.
> 
> Best regards,
> zhengyan
> 
> > diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> > index 0a233e9d9607..63f2111ced4a 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -122,7 +122,15 @@ static inline void plic_irq_toggle(const struct
> > cpumask *mask,
> > 
> >  static void plic_irq_enable(struct irq_data *d)  {
> > +	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
> 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> missing a definition? If adds like this will cause a problem.

Sorry, should have mentioned I didn't build this patch. Just wanted to
throw out ideas..

> > +
> > +	writel(0, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
> > +
> > +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > +
> >  	plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
> > +
> > +	writel(1, priv->regs + PRIORITY_BASE + d->hwirq * PRIORITY_PER_ID);
> >  }
> > 
> >  static void plic_irq_disable(struct irq_data *d) @@ -148,13 +156,7 @@ static
> > void plic_irq_eoi(struct irq_data *d)  {
> >  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> > 
> > -	if (unlikely(irqd_irq_disabled(d))) {
> > -		plic_toggle(handler, d->hwirq, 1);
> > -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > -		plic_toggle(handler, d->hwirq, 0);
> > -	} else {
> > -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > -	}
> > +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> >  }
> > 
> >  #ifdef CONFIG_SMP

答复: 答复: [PATCH] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Yan Zheng（严政） 1 year, 7 months ago

> On Mon, Jun 24, 2024 at 11:14:47AM +0000, Yan Zheng（严政） wrote:
> > > I have no knowledge about affinity stuff, so I don't really
> > > understand this patch. But there is another idea regarding this "ignored
> EOI" problem:
> > > always "complete" the interrupt while enabling. That would move this
> > > extra complication out of the hot path, and also looks simpler in my
> opinion.
> > >
> > > Something like the patch below. Would this solve this "affinity problem"
> > > too?
> > >
> > No, I'm afraid the following patch can't solve this corner case. I
> > thought it's because the core Who executes plic_irq_enable is not the core
> who missing a write claim.
> > So if we want to do it in enable it might be something like follows :
> > static void plic_toggle(struct plic_handler *handler, int hwirq, int
> > enable)  {
> >         raw_spin_lock(&handler->enable_lock);
> > -       __plic_toggle(handler->enable_base, hwirq, enable);
> > +       if (enable) {
> > +               writel(hwirq, handler->hart_base + CONTEXT_CLAIM);
> > +               __plic_toggle(handler->enable_base, hwirq, enable);
> > +       }
> >         raw_spin_unlock(&handler->enable_lock);
> >  }
> 
> Again, I don't know anything about interrupt affinity thingy, so I may be
> saying something dumb here:
> 
> I think this wouldn't work either. In plic_set_affinity(), I see the interrupt is
> disabled, then enabled again. With your new proposed solution, the
> interrupt would also be marked completed within plic_set_affinity().
> So, the interrupt may be asserted again, earlier than it is supposed to (it is
> not supposed to be asserted again until plic_irq_eoi() is called). It's rare, but I
> think it's a possible race.
> 
> I don't have a better idea, at least for now. So probably we should stick to
> your current solution.
> >
> > But there is a little difference:
> > a. check whether it's enabled  when do write claim b. write claim
> > anyway before enable
> >
> > sounds like a. is better?
> >
> > And I'd like to illustrate more about this case:
> > For example, broadcast tick is working, cpu0 is about to response,
> > cpu1 is the next 1. cpu0  response the timer irq, read the claim REG,
> > and do timer isr event, 2.  during the timer isr it will set next
> > event tick_broadcast_set_event ->  irq_set_affinity-> xxx->
> > plic_set_affinity -> plic_irq_enable 3. in plic_set_affinity  disable
> > cpu0's IE and enable cpu1'IE 4. cpu0 do the write claim to finish this
> > irq, while cpu0's IE is disabled , left an active state in plic
> 
> This is useful information, you may want to add it in your commit message.
Yes, I'd already add it in v2 patch, thx.
> >
> > Best regards,
> > zhengyan
> >
> > > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > > b/drivers/irqchip/irq-sifive-plic.c
> > > index 0a233e9d9607..63f2111ced4a 100644
> > > --- a/drivers/irqchip/irq-sifive-plic.c
> > > +++ b/drivers/irqchip/irq-sifive-plic.c
> > > @@ -122,7 +122,15 @@ static inline void plic_irq_toggle(const struct
> > > cpumask *mask,
> > >
> > >  static void plic_irq_enable(struct irq_data *d)  {
> > > +	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
> > 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers); missing
> > a definition? If adds like this will cause a problem.
> 
> Sorry, should have mentioned I didn't build this patch. Just wanted to throw
> out ideas..
> 
No problem, I thought you might not have compiled it. 
I wrote it down just to ensure we are on the same page.
> > > +
> > > +	writel(0, priv->regs + PRIORITY_BASE + d->hwirq *
> > > +PRIORITY_PER_ID);
> > > +
> > > +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > > +
> > >  	plic_irq_toggle(irq_data_get_effective_affinity_mask(d), d, 1);
> > > +
> > > +	writel(1, priv->regs + PRIORITY_BASE + d->hwirq *
> > > +PRIORITY_PER_ID);
> > >  }
> > >
> > >  static void plic_irq_disable(struct irq_data *d) @@ -148,13 +156,7
> > > @@ static void plic_irq_eoi(struct irq_data *d)  {
> > >  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> > >
> > > -	if (unlikely(irqd_irq_disabled(d))) {
> > > -		plic_toggle(handler, d->hwirq, 1);
> > > -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > > -		plic_toggle(handler, d->hwirq, 0);
> > > -	} else {
> > > -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > > -	}
> > > +	writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> > >  }
> > >
> > >  #ifdef CONFIG_SMP

[PATCH v2] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by zhengyan 1 year, 7 months ago

RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
explained in the description of Interrupt Completion in the PLIC spec:
"The PLIC signals it has completed executing an interrupt handler by
 writing the interrupt ID it received from the claim to the claim/complete
 register. The PLIC does not check whether the completion ID is the same
 as the last claim ID for that target. If the completion ID does not match
 an interrupt source that *is currently enabled* for the target, the
 completion is silently ignored."

 Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
 before EOI")
 ensured that EOI is enable when irqd IRQD_IRQ_DISABLED is set, before
 EOI

 Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked")
 ensured that EOI is successful by enabling interrupt first, before EOI.

 Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
 operations") removed the interrupt enabling code from the previous
 commit, because it assumes that interrupt should already be enabled at the
 point of EOI.

However, here still miss a corner case that if SMP is enabled. When
someone needs to set affinity from a cpu to another the original cpu
when handle the EOI meanwhile the IE is disabled by plic_set_affinity

For example, broadcast tick is working,
cpu0 is about to response, cpu1 is the next.
1. cpu0 responses the timer irq, read the claim REG, do timer isr event.
2. during the timer isr it will set next event
tick_broadcast_set_event -> irq_set_affinity->xxx->
plic_set_affinity -> plic_irq_enable
3. in plic_set_affinity disable cpu0's IE and enable cpu1'IE
4. cpu0 do the write claim to finish this irq, while cpu0's IE is disabled,
left an active state in plic.

So this patch ensure that won't happened

Signed-off-by: zhengyan <zhengyan@asrmicro.com>
---
 drivers/irqchip/irq-sifive-plic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 9e22f7e378f5..815ce8aa28f1 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
 static void plic_irq_eoi(struct irq_data *d)
 {
 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+	void __iomem *reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
+	u32 hwirq_mask = 1 << (d->hwirq % 32);
 
-	if (unlikely(irqd_irq_disabled(d))) {
+	if (unlikely((readl(reg) & hwirq_mask) == 0)) {
 		plic_toggle(handler, d->hwirq, 1);
 		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
 		plic_toggle(handler, d->hwirq, 0);
-- 
2.25.1

Re: [PATCH v2] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Nam Cao 1 year, 7 months ago

On Mon, Jun 24, 2024 at 11:35:23AM +0000, zhengyan wrote:
> RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
> explained in the description of Interrupt Completion in the PLIC spec:
> "The PLIC signals it has completed executing an interrupt handler by
>  writing the interrupt ID it received from the claim to the claim/complete
>  register. The PLIC does not check whether the completion ID is the same
>  as the last claim ID for that target. If the completion ID does not match
>  an interrupt source that *is currently enabled* for the target, the
>  completion is silently ignored."
> 
>  Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
>  before EOI")
>  ensured that EOI is enable when irqd IRQD_IRQ_DISABLED is set, before
>  EOI
> 
>  Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when masked")
>  ensured that EOI is successful by enabling interrupt first, before EOI.
> 
>  Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and mask
>  operations") removed the interrupt enabling code from the previous
>  commit, because it assumes that interrupt should already be enabled at the
>  point of EOI.
> 
> However, here still miss a corner case that if SMP is enabled. When
> someone needs to set affinity from a cpu to another the original cpu
> when handle the EOI meanwhile the IE is disabled by plic_set_affinity
> 
> For example, broadcast tick is working,
> cpu0 is about to response, cpu1 is the next.
> 1. cpu0 responses the timer irq, read the claim REG, do timer isr event.
> 2. during the timer isr it will set next event
> tick_broadcast_set_event -> irq_set_affinity->xxx->
> plic_set_affinity -> plic_irq_enable
> 3. in plic_set_affinity disable cpu0's IE and enable cpu1'IE
> 4. cpu0 do the write claim to finish this irq, while cpu0's IE is disabled,
> left an active state in plic.
> 
> So this patch ensure that won't happened
> 
> Signed-off-by: zhengyan <zhengyan@asrmicro.com>
> ---
>  drivers/irqchip/irq-sifive-plic.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index 9e22f7e378f5..815ce8aa28f1 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
>  static void plic_irq_eoi(struct irq_data *d)
>  {
>  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> +	void __iomem *reg = handler->enable_base + (d->hwirq / 32) * sizeof(u32);
> +	u32 hwirq_mask = 1 << (d->hwirq % 32);
>  
> -	if (unlikely(irqd_irq_disabled(d))) {
> +	if (unlikely((readl(reg) & hwirq_mask) == 0)) {
>  		plic_toggle(handler, d->hwirq, 1);
>  		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
>  		plic_toggle(handler, d->hwirq, 0);

I think this patch is fine.

But, I don't like reading hardware registers in the interrupt hot path. It
may slow things down. Also this patch doesn't allow moving the if condition
out of this plic_irq_eoi() function into the enabling function (I have been
thinking about doing that for some time, but too lazy to get to it).

I *may* have something better.

From the specification:
"The PLIC signals it has completed executing an interrupt handler by
writing the interrupt ID it received from the claim to the claim/complete
register. The PLIC **does not check** whether the completion ID is the same
as the last claim ID for that target. If the completion ID does not match
an interrupt source that is currently enabled for the target, the
completion is silently ignored."

Note what I "highlighed": the irq number written back does not have to
match the irq number last claimed for the CPU. If I interpret this
correctly, this means *any* claim/complete register can be used to complete
the interrupt.

So, my idea: since irq affinity setting still leaves at least 1 CPU with
the interrupt enabled; the claim/complete register for that enabled CPU can
be used for completing interrupt (instead of the original one used for
claiming). This would avoid some hardware register access in the hot path.
Also allows another optimization of moving the if condition out of the EOI
function.

Something like the patch below. To apply this one cleanly, another patch
must be applied first:
https://lore.kernel.org/linux-riscv/20240703072659.1427616-1-namcao@linutronix.de/

What I am still a bit unsure about is whether my interpretation of the
specification is correct. The patch works for my Visionfive 2 board, so the
question is whether this patch is relying on "undefined behavior", or this
is really what the spec means. Drew Barbier <drew@sifive.com> seems to be
the one who wrote that. Do you mind confirming my interpretation?

Best regards,
Nam

diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index f30bdb94ceeb..117ff9f1c982 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -69,6 +69,7 @@ struct plic_priv {
 	void __iomem *regs;
 	unsigned long plic_quirks;
 	unsigned int nr_irqs;
+	void __iomem **complete;
 	unsigned long *prio_save;
 };
 
@@ -149,13 +150,14 @@ static void plic_irq_mask(struct irq_data *d)
 static void plic_irq_eoi(struct irq_data *d)
 {
 	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
 
 	if (unlikely(irqd_irq_disabled(d))) {
 		plic_toggle(handler, d->hwirq, 1);
-		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+		writel(d->hwirq, priv->complete[d->hwirq]);
 		plic_toggle(handler, d->hwirq, 0);
 	} else {
-		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+		writel(d->hwirq, priv->complete[d->hwirq]);
 	}
 }
 
@@ -164,6 +166,7 @@ static int plic_set_affinity(struct irq_data *d,
 			     const struct cpumask *mask_val, bool force)
 {
 	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
+	struct plic_handler *handler;
 	struct cpumask new_mask;
 
 	cpumask_and(&new_mask, mask_val, &priv->lmask);
@@ -180,6 +183,9 @@ static int plic_set_affinity(struct irq_data *d,
 	if (!irqd_irq_disabled(d))
 		plic_irq_enable(d);
 
+	handler = per_cpu_ptr(&plic_handlers, cpumask_first(&new_mask));
+	priv->complete[d->hwirq] = handler->hart_base + CONTEXT_CLAIM;
+
 	return IRQ_SET_MASK_OK_DONE;
 }
 #endif
@@ -516,6 +522,10 @@ static int plic_probe(struct platform_device *pdev)
 	priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL);
 	if (!priv->prio_save)
 		return -ENOMEM;
+	
+	priv->complete = devm_kcalloc(dev, 1 + nr_irqs, sizeof(*priv->complete), GFP_KERNEL);
+	if (!priv->complete)
+		return -ENOMEM;
 
 	for (i = 0; i < nr_contexts; i++) {
 		error = plic_parse_context_parent(pdev, i, &parent_hwirq, &cpu);
@@ -577,6 +587,12 @@ static int plic_probe(struct platform_device *pdev)
 			writel(1, priv->regs + PRIORITY_BASE +
 				  hwirq * PRIORITY_PER_ID);
 		}
+
+		if (!nr_handlers) {
+			for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
+				priv->complete[hwirq] = handler->hart_base + CONTEXT_CLAIM;
+		}
+
 		nr_handlers++;
 	}

答复: [PATCH v2] irqchip/sifive-plic: ensure interrupt is enable before EOI

Posted by Yan Zheng（严政） 1 year, 6 months ago

> On Mon, Jun 24, 2024 at 11:35:23AM +0000, zhengyan wrote:
> > RISC-V PLIC cannot "end-of-interrupt" (EOI) disabled interrupts, as
> > explained in the description of Interrupt Completion in the PLIC spec:
> > "The PLIC signals it has completed executing an interrupt handler by
> > writing the interrupt ID it received from the claim to the
> > claim/complete  register. The PLIC does not check whether the
> > completion ID is the same  as the last claim ID for that target. If
> > the completion ID does not match  an interrupt source that *is
> > currently enabled* for the target, the  completion is silently ignored."
> >
> >  Commit 9c92006b896c ("irqchip/sifive-plic: Enable interrupt if needed
> > before EOI")  ensured that EOI is enable when irqd IRQD_IRQ_DISABLED
> > is set, before  EOI
> >
> >  Commit 69ea463021be ("irqchip/sifive-plic: Fixup EOI failed when
> > masked")  ensured that EOI is successful by enabling interrupt first, before
> EOI.
> >
> >  Commit a1706a1c5062 ("irqchip/sifive-plic: Separate the enable and
> > mask
> >  operations") removed the interrupt enabling code from the previous
> > commit, because it assumes that interrupt should already be enabled at
> > the  point of EOI.
> >
> > However, here still miss a corner case that if SMP is enabled. When
> > someone needs to set affinity from a cpu to another the original cpu
> > when handle the EOI meanwhile the IE is disabled by plic_set_affinity
> >
> > For example, broadcast tick is working,
> > cpu0 is about to response, cpu1 is the next.
> > 1. cpu0 responses the timer irq, read the claim REG, do timer isr event.
> > 2. during the timer isr it will set next event
> > tick_broadcast_set_event -> irq_set_affinity->xxx-> plic_set_affinity
> > -> plic_irq_enable 3. in plic_set_affinity disable cpu0's IE and
> > enable cpu1'IE 4. cpu0 do the write claim to finish this irq, while
> > cpu0's IE is disabled, left an active state in plic.
> >
> > So this patch ensure that won't happened
> >
> > Signed-off-by: zhengyan <zhengyan@asrmicro.com>
> > ---
> >  drivers/irqchip/irq-sifive-plic.c | 4 +++-
> >  1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/irqchip/irq-sifive-plic.c
> > b/drivers/irqchip/irq-sifive-plic.c
> > index 9e22f7e378f5..815ce8aa28f1 100644
> > --- a/drivers/irqchip/irq-sifive-plic.c
> > +++ b/drivers/irqchip/irq-sifive-plic.c
> > @@ -149,8 +149,10 @@ static void plic_irq_mask(struct irq_data *d)
> > static void plic_irq_eoi(struct irq_data *d)  {
> >  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> > +	void __iomem *reg = handler->enable_base + (d->hwirq / 32) *
> sizeof(u32);
> > +	u32 hwirq_mask = 1 << (d->hwirq % 32);
> >
> > -	if (unlikely(irqd_irq_disabled(d))) {
> > +	if (unlikely((readl(reg) & hwirq_mask) == 0)) {
> >  		plic_toggle(handler, d->hwirq, 1);
> >  		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> >  		plic_toggle(handler, d->hwirq, 0);
> 
> I think this patch is fine.
> 
> But, I don't like reading hardware registers in the interrupt hot path. It may
> slow things down. Also this patch doesn't allow moving the if condition out of
> this plic_irq_eoi() function into the enabling function (I have been thinking
> about doing that for some time, but too lazy to get to it).
> 
> I *may* have something better.
> 
> From the specification:
> "The PLIC signals it has completed executing an interrupt handler by writing
> the interrupt ID it received from the claim to the claim/complete register.
> The PLIC **does not check** whether the completion ID is the same as the
> last claim ID for that target. If the completion ID does not match an interrupt
> source that is currently enabled for the target, the completion is silently
> ignored."
> 
> Note what I "highlighed": the irq number written back does not have to
> match the irq number last claimed for the CPU. If I interpret this correctly,
> this means *any* claim/complete register can be used to complete the
> interrupt.
> 
> So, my idea: since irq affinity setting still leaves at least 1 CPU with the
> interrupt enabled; the claim/complete register for that enabled CPU can be
> used for completing interrupt (instead of the original one used for claiming).
> This would avoid some hardware register access in the hot path.
> Also allows another optimization of moving the if condition out of the EOI
> function.
> 
> Something like the patch below. To apply this one cleanly, another patch
> must be applied first:
> https://lore.kernel.org/linux-riscv/20240703072659.1427616-1-
> namcao@linutronix.de/
> 
> What I am still a bit unsure about is whether my interpretation of the
> specification is correct. The patch works for my Visionfive 2 board, so the
> question is whether this patch is relying on "undefined behavior", or this is
> really what the spec means. Drew Barbier <drew@sifive.com> seems to be
> the one who wrote that. Do you mind confirming my interpretation?
> 
> Best regards,
> Nam
> 
I confirm it, It works good on my platform as well. 
Looks like " *any* claim/complete register can be used to complete" is correct.

> diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
> index f30bdb94ceeb..117ff9f1c982 100644
> --- a/drivers/irqchip/irq-sifive-plic.c
> +++ b/drivers/irqchip/irq-sifive-plic.c
> @@ -69,6 +69,7 @@ struct plic_priv {
>  	void __iomem *regs;
>  	unsigned long plic_quirks;
>  	unsigned int nr_irqs;
> +	void __iomem **complete;
>  	unsigned long *prio_save;
>  };
> 
> @@ -149,13 +150,14 @@ static void plic_irq_mask(struct irq_data *d)  static
> void plic_irq_eoi(struct irq_data *d)  {
>  	struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
> +	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
> 
>  	if (unlikely(irqd_irq_disabled(d))) {
>  		plic_toggle(handler, d->hwirq, 1);
> -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> +		writel(d->hwirq, priv->complete[d->hwirq]);
>  		plic_toggle(handler, d->hwirq, 0);
>  	} else {
> -		writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
> +		writel(d->hwirq, priv->complete[d->hwirq]);
>  	}
>  }
> 
> @@ -164,6 +166,7 @@ static int plic_set_affinity(struct irq_data *d,
>  			     const struct cpumask *mask_val, bool force)  {
>  	struct plic_priv *priv = irq_data_get_irq_chip_data(d);
> +	struct plic_handler *handler;
>  	struct cpumask new_mask;
> 
>  	cpumask_and(&new_mask, mask_val, &priv->lmask); @@ -180,6
> +183,9 @@ static int plic_set_affinity(struct irq_data *d,
>  	if (!irqd_irq_disabled(d))
>  		plic_irq_enable(d);
> 
> +	handler = per_cpu_ptr(&plic_handlers,
> cpumask_first(&new_mask));
> +	priv->complete[d->hwirq] = handler->hart_base + CONTEXT_CLAIM;
> +
>  	return IRQ_SET_MASK_OK_DONE;
>  }
>  #endif
> @@ -516,6 +522,10 @@ static int plic_probe(struct platform_device *pdev)
>  	priv->prio_save = devm_bitmap_zalloc(dev, nr_irqs, GFP_KERNEL);
>  	if (!priv->prio_save)
>  		return -ENOMEM;
> +
> +	priv->complete = devm_kcalloc(dev, 1 + nr_irqs, sizeof(*priv-
> >complete), GFP_KERNEL);
> +	if (!priv->complete)
> +		return -ENOMEM;
> 
>  	for (i = 0; i < nr_contexts; i++) {
>  		error = plic_parse_context_parent(pdev, i, &parent_hwirq,
> &cpu); @@ -577,6 +587,12 @@ static int plic_probe(struct platform_device
> *pdev)
>  			writel(1, priv->regs + PRIORITY_BASE +
>  				  hwirq * PRIORITY_PER_ID);
>  		}
> +
> +		if (!nr_handlers) {
> +			for (hwirq = 1; hwirq <= nr_irqs; hwirq++)
> +				priv->complete[hwirq] = handler->hart_base
> + CONTEXT_CLAIM;
> +		}
> +
>  		nr_handlers++;
>  	}
>