[PATCH v9 4/6] pwm: dwc: use clock rate in hz to avoid rounding issues

Ben Dooks posted 6 patches 2 years, 3 months ago
[PATCH v9 4/6] pwm: dwc: use clock rate in hz to avoid rounding issues
Posted by Ben Dooks 2 years, 3 months ago
As noted, the clock-rate when not a nice multiple of ns is probably
going to end up with inacurate calculations, as well as on a non pci
system the rate may change (although we've not put a clock rate
change notifier in this code yet) so we also add some quick checks
of the rate when we do any calculations with it.

Signed-off-by; Ben Dooks <ben.dooks@codethink.co.uk>
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
v9:
 - fixed commit spelling
 - changed to use codethink email instead of sifive
v8:
 - fixup post rename
 - move to earlier in series
---
 drivers/pwm/pwm-dwc-core.c | 24 +++++++++++++++---------
 drivers/pwm/pwm-dwc.h      |  2 +-
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
index 3fc281a78c9a..3b856685029d 100644
--- a/drivers/pwm/pwm-dwc-core.c
+++ b/drivers/pwm/pwm-dwc-core.c
@@ -49,13 +49,14 @@ static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
 	 * periods and check are the result within HW limits between 1 and
 	 * 2^32 periods.
 	 */
-	tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns);
+	tmp = state->duty_cycle * dwc->clk_rate;
+	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
 	if (tmp < 1 || tmp > (1ULL << 32))
 		return -ERANGE;
 	low = tmp - 1;
 
-	tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
-				    dwc->clk_ns);
+	tmp = (state->period - state->duty_cycle) * dwc->clk_rate;
+	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
 	if (tmp < 1 || tmp > (1ULL << 32))
 		return -ERANGE;
 	high = tmp - 1;
@@ -121,11 +122,14 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 			     struct pwm_state *state)
 {
 	struct dwc_pwm *dwc = to_dwc_pwm(chip);
+	unsigned long clk_rate;
 	u64 duty, period;
 	u32 ctrl, ld, ld2;
 
 	pm_runtime_get_sync(chip->dev);
 
+	clk_rate = dwc->clk_rate;
+
 	ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm));
 	ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
 	ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
@@ -137,17 +141,19 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * based on the timer load-count only.
 	 */
 	if (ctrl & DWC_TIM_CTRL_PWM) {
-		duty = (ld + 1) * dwc->clk_ns;
-		period = (ld2 + 1)  * dwc->clk_ns;
+		duty = ld + 1;
+		period = ld2 + 1;
 		period += duty;
 	} else {
-		duty = (ld + 1) * dwc->clk_ns;
+		duty = ld + 1;
 		period = duty * 2;
 	}
 
+	duty *= NSEC_PER_SEC;
+	period *= NSEC_PER_SEC;
+	state->period = DIV_ROUND_CLOSEST_ULL(period, clk_rate);
+	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(duty, clk_rate);
 	state->polarity = PWM_POLARITY_INVERSED;
-	state->period = period;
-	state->duty_cycle = duty;
 
 	pm_runtime_put_sync(chip->dev);
 
@@ -168,7 +174,7 @@ struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
 	if (!dwc)
 		return NULL;
 
-	dwc->clk_ns = 10;
+	dwc->clk_rate = NSEC_PER_SEC / 10;
 	dwc->chip.dev = dev;
 	dwc->chip.ops = &dwc_pwm_ops;
 	dwc->chip.npwm = DWC_TIMERS_TOTAL;
diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
index 64795247c54c..e0a940fd6e87 100644
--- a/drivers/pwm/pwm-dwc.h
+++ b/drivers/pwm/pwm-dwc.h
@@ -42,7 +42,7 @@ struct dwc_pwm_ctx {
 struct dwc_pwm {
 	struct pwm_chip chip;
 	void __iomem *base;
-	unsigned int clk_ns;
+	unsigned long clk_rate;
 	struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
 };
 #define to_dwc_pwm(p)	(container_of((p), struct dwc_pwm, chip))
-- 
2.40.1

Re: [PATCH v9 4/6] pwm: dwc: use clock rate in hz to avoid rounding issues
Posted by Uwe Kleine-König 2 years, 3 months ago
Hello,

[Dropped William Salmon and Jude Onyenegecha from the list of recipents,
their email addresses don't seem to work any more.]

On Thu, Sep 07, 2023 at 05:12:40PM +0100, Ben Dooks wrote:
> As noted, the clock-rate when not a nice multiple of ns is probably
> going to end up with inacurate calculations, as well as on a non pci
> system the rate may change (although we've not put a clock rate
> change notifier in this code yet) so we also add some quick checks
> of the rate when we do any calculations with it.
> 
> Signed-off-by; Ben Dooks <ben.dooks@codethink.co.uk>
> Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
> ---
> v9:
>  - fixed commit spelling
>  - changed to use codethink email instead of sifive
> v8:
>  - fixup post rename
>  - move to earlier in series
> ---
>  drivers/pwm/pwm-dwc-core.c | 24 +++++++++++++++---------
>  drivers/pwm/pwm-dwc.h      |  2 +-
>  2 files changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
> index 3fc281a78c9a..3b856685029d 100644
> --- a/drivers/pwm/pwm-dwc-core.c
> +++ b/drivers/pwm/pwm-dwc-core.c
> @@ -49,13 +49,14 @@ static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
>  	 * periods and check are the result within HW limits between 1 and
>  	 * 2^32 periods.
>  	 */
> -	tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns);
> +	tmp = state->duty_cycle * dwc->clk_rate;

This might overflow. You can prevent this by asserting that clk_rate is
<= NSEC_PER_SEC and using mul_u64_u64_div_u64.

> +	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
>  	if (tmp < 1 || tmp > (1ULL << 32))
>  		return -ERANGE;
>  	low = tmp - 1;
>  
> -	tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
> -				    dwc->clk_ns);
> +	tmp = (state->period - state->duty_cycle) * dwc->clk_rate;
> +	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
>  	if (tmp < 1 || tmp > (1ULL << 32))
>  		return -ERANGE;
>  	high = tmp - 1;
> @@ -121,11 +122,14 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>  			     struct pwm_state *state)
>  {
>  	struct dwc_pwm *dwc = to_dwc_pwm(chip);
> +	unsigned long clk_rate;
>  	u64 duty, period;
>  	u32 ctrl, ld, ld2;
>  
>  	pm_runtime_get_sync(chip->dev);
>  
> +	clk_rate = dwc->clk_rate;
> +
>  	ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm));
>  	ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
>  	ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
> @@ -137,17 +141,19 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>  	 * based on the timer load-count only.
>  	 */
>  	if (ctrl & DWC_TIM_CTRL_PWM) {
> -		duty = (ld + 1) * dwc->clk_ns;
> -		period = (ld2 + 1)  * dwc->clk_ns;
> +		duty = ld + 1;
> +		period = ld2 + 1;
>  		period += duty;
>  	} else {
> -		duty = (ld + 1) * dwc->clk_ns;
> +		duty = ld + 1;
>  		period = duty * 2;
>  	}
>  
> +	duty *= NSEC_PER_SEC;
> +	period *= NSEC_PER_SEC;

A comment that/why this cannot overflow would be nice. (I didn't check,
maybe it can?)

> +	state->period = DIV_ROUND_CLOSEST_ULL(period, clk_rate);
> +	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(duty, clk_rate);

Without having thought deeply about this, I think you need to round up
here. Hmm, but given that .apply() uses round-closest, too, this needs
to be addressed separately.

(The ugly thing about round-closest is that .apply(mypwm,
.get_state(mypwm)) isn't idempotent in general. Consider a PWM that can
implement period = 41.7ns and period = 42.4 ns. If it's configured with
42.4, .get_state will return period = 42. Reapplying this will configure
for 41.7ns. This won't happen with the PCI clkrate, but it might in the
of case. Another reason to use rounding-down in .apply is that
mul_u64_u64_div_u64 doesn't have a round-nearest variant.)

>  	state->polarity = PWM_POLARITY_INVERSED;
> -	state->period = period;
> -	state->duty_cycle = duty;
>  
>  	pm_runtime_put_sync(chip->dev);
>  
> @@ -168,7 +174,7 @@ struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
>  	if (!dwc)
>  		return NULL;
>  
> -	dwc->clk_ns = 10;
> +	dwc->clk_rate = NSEC_PER_SEC / 10;
>  	dwc->chip.dev = dev;
>  	dwc->chip.ops = &dwc_pwm_ops;
>  	dwc->chip.npwm = DWC_TIMERS_TOTAL;
> diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
> index 64795247c54c..e0a940fd6e87 100644
> --- a/drivers/pwm/pwm-dwc.h
> +++ b/drivers/pwm/pwm-dwc.h
> @@ -42,7 +42,7 @@ struct dwc_pwm_ctx {
>  struct dwc_pwm {
>  	struct pwm_chip chip;
>  	void __iomem *base;
> -	unsigned int clk_ns;
> +	unsigned long clk_rate;

Given that clk_ns was only introduced in patch #2 I think it would be
cleaner to squash these two patches together.

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | https://www.pengutronix.de/ |
Re: [PATCH v9 4/6] pwm: dwc: use clock rate in hz to avoid rounding issues
Posted by Ben Dooks 2 years, 3 months ago
On 07/09/2023 22:34, Uwe Kleine-König wrote:
> Hello,
> 
> [Dropped William Salmon and Jude Onyenegecha from the list of recipents,
> their email addresses don't seem to work any more.]
> 
> On Thu, Sep 07, 2023 at 05:12:40PM +0100, Ben Dooks wrote:
>> As noted, the clock-rate when not a nice multiple of ns is probably
>> going to end up with inacurate calculations, as well as on a non pci
>> system the rate may change (although we've not put a clock rate
>> change notifier in this code yet) so we also add some quick checks
>> of the rate when we do any calculations with it.
>>
>> Signed-off-by; Ben Dooks <ben.dooks@codethink.co.uk>
>> Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
>> ---
>> v9:
>>   - fixed commit spelling
>>   - changed to use codethink email instead of sifive
>> v8:
>>   - fixup post rename
>>   - move to earlier in series
>> ---
>>   drivers/pwm/pwm-dwc-core.c | 24 +++++++++++++++---------
>>   drivers/pwm/pwm-dwc.h      |  2 +-
>>   2 files changed, 16 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
>> index 3fc281a78c9a..3b856685029d 100644
>> --- a/drivers/pwm/pwm-dwc-core.c
>> +++ b/drivers/pwm/pwm-dwc-core.c
>> @@ -49,13 +49,14 @@ static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
>>   	 * periods and check are the result within HW limits between 1 and
>>   	 * 2^32 periods.
>>   	 */
>> -	tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns);
>> +	tmp = state->duty_cycle * dwc->clk_rate;
> 
> This might overflow. You can prevent this by asserting that clk_rate is
> <= NSEC_PER_SEC and using mul_u64_u64_div_u64.
> 
>> +	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
>>   	if (tmp < 1 || tmp > (1ULL << 32))
>>   		return -ERANGE;
>>   	low = tmp - 1;
>>   
>> -	tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
>> -				    dwc->clk_ns);
>> +	tmp = (state->period - state->duty_cycle) * dwc->clk_rate;
>> +	tmp = DIV_ROUND_CLOSEST_ULL(tmp, NSEC_PER_SEC);
>>   	if (tmp < 1 || tmp > (1ULL << 32))
>>   		return -ERANGE;
>>   	high = tmp - 1;
>> @@ -121,11 +122,14 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>>   			     struct pwm_state *state)
>>   {
>>   	struct dwc_pwm *dwc = to_dwc_pwm(chip);
>> +	unsigned long clk_rate;
>>   	u64 duty, period;
>>   	u32 ctrl, ld, ld2;
>>   
>>   	pm_runtime_get_sync(chip->dev);
>>   
>> +	clk_rate = dwc->clk_rate;
>> +
>>   	ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm));
>>   	ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
>>   	ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
>> @@ -137,17 +141,19 @@ static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
>>   	 * based on the timer load-count only.
>>   	 */
>>   	if (ctrl & DWC_TIM_CTRL_PWM) {
>> -		duty = (ld + 1) * dwc->clk_ns;
>> -		period = (ld2 + 1)  * dwc->clk_ns;
>> +		duty = ld + 1;
>> +		period = ld2 + 1;
>>   		period += duty;
>>   	} else {
>> -		duty = (ld + 1) * dwc->clk_ns;
>> +		duty = ld + 1;
>>   		period = duty * 2;
>>   	}
>>   
>> +	duty *= NSEC_PER_SEC;
>> +	period *= NSEC_PER_SEC;
> 
> A comment that/why this cannot overflow would be nice. (I didn't check,
> maybe it can?)

I /think/ that as long as NSEC_PER_SEC  2^32 then this shouldn't
overflow.

> 
>> +	state->period = DIV_ROUND_CLOSEST_ULL(period, clk_rate);
>> +	state->duty_cycle = DIV_ROUND_CLOSEST_ULL(duty, clk_rate);
> 
> Without having thought deeply about this, I think you need to round up
> here. Hmm, but given that .apply() uses round-closest, too, this needs
> to be addressed separately.
> 
> (The ugly thing about round-closest is that .apply(mypwm,
> .get_state(mypwm)) isn't idempotent in general. Consider a PWM that can
> implement period = 41.7ns and period = 42.4 ns. If it's configured with
> 42.4, .get_state will return period = 42. Reapplying this will configure
> for 41.7ns. This won't happen with the PCI clkrate, but it might in the
> of case. Another reason to use rounding-down in .apply is that
> mul_u64_u64_div_u64 doesn't have a round-nearest variant.)
> 
>>   	state->polarity = PWM_POLARITY_INVERSED;
>> -	state->period = period;
>> -	state->duty_cycle = duty;
>>   
>>   	pm_runtime_put_sync(chip->dev);
>>   
>> @@ -168,7 +174,7 @@ struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
>>   	if (!dwc)
>>   		return NULL;
>>   
>> -	dwc->clk_ns = 10;
>> +	dwc->clk_rate = NSEC_PER_SEC / 10;
>>   	dwc->chip.dev = dev;
>>   	dwc->chip.ops = &dwc_pwm_ops;
>>   	dwc->chip.npwm = DWC_TIMERS_TOTAL;
>> diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
>> index 64795247c54c..e0a940fd6e87 100644
>> --- a/drivers/pwm/pwm-dwc.h
>> +++ b/drivers/pwm/pwm-dwc.h
>> @@ -42,7 +42,7 @@ struct dwc_pwm_ctx {
>>   struct dwc_pwm {
>>   	struct pwm_chip chip;
>>   	void __iomem *base;
>> -	unsigned int clk_ns;
>> +	unsigned long clk_rate;
> 
> Given that clk_ns was only introduced in patch #2 I think it would be
> cleaner to squash these two patches together.

I'll have a look at how much work re-ordering the patches would
be.

-- 
Ben Dooks				http://www.codethink.co.uk/
Senior Engineer				Codethink - Providing Genius

https://www.codethink.co.uk/privacy.html

Re: [PATCH v9 4/6] pwm: dwc: use clock rate in hz to avoid rounding issues
Posted by Uwe Kleine-König 2 years, 3 months ago
Hello Ben,

On Mon, Sep 11, 2023 at 08:33:02AM +0100, Ben Dooks wrote:
> On 07/09/2023 22:34, Uwe Kleine-König wrote:
> > > +	duty *= NSEC_PER_SEC;
> > > +	period *= NSEC_PER_SEC;
> > 
> > A comment that/why this cannot overflow would be nice. (I didn't check,
> > maybe it can?)
> 
> I /think/ that as long as NSEC_PER_SEC  2^32 then this shouldn't
> overflow.

I guess NSEC_PER_SEC won't change in the near future :-)

double checking and writing the result down in a comment would be
appreciated.

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | https://www.pengutronix.de/ |