include/linux/iopoll.h | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-)
A short polling delay, such as the delay of 5us
(SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
can become a 1/HZ (order of ms) delay caused by the usleep_range()
call in read_poll_timeout(), significantly reducing SPI NAND access
performance. Fix it by adjusting the read_poll_timeout() macro to use
udelay() to delay until 1/10 of a timer tick after it is called, and
only then sleep.
Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
---
include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
v2:
* Fix it in read_poll_timeout() instead
diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 53edd69acb9b..2ee89b76f072 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -19,9 +19,11 @@
*
* @op: Operation
* @cond: Break condition
- * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
- * Please read usleep_range() function description for details and
- * limitations.
+ * @sleep_us: Maximum time to sleep or delay between operations in us
+ * (0 tight-loops). Please read usleep_range() and udelay()
+ * function descriptions for details and limitations.
+ * This macro will delay until 1/10 of a timer tick after
+ * it is called, and will then start sleeping.
* @timeout_us: Timeout in us, 0 means never timeout
* @sleep_before_op: if it is true, sleep @sleep_us before operation.
*
@@ -35,11 +37,18 @@
({ \
u64 __timeout_us = (timeout_us); \
unsigned long __sleep_us = (sleep_us); \
- ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
+ ktime_t __start_time = ktime_get(); \
+ u64 __delay_timeout_us = 100000/HZ; \
+ ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
+ ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
int ___ret; \
might_sleep_if((__sleep_us) != 0); \
- if ((sleep_before_op) && __sleep_us) \
- usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+ if ((sleep_before_op) && __sleep_us) { \
+ if (__sleep_us <= __delay_timeout_us) \
+ udelay(__sleep_us); \
+ else \
+ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+ } \
for (;;) { \
bool __expired = __timeout_us && \
ktime_compare(ktime_get(), __timeout) > 0; \
@@ -54,8 +63,13 @@
___ret = -ETIMEDOUT; \
break; \
} \
- if (__sleep_us) \
- usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+ if (__sleep_us) { \
+ if (__sleep_us <= __delay_timeout_us && \
+ ktime_compare(ktime_get(), __delay_timeout) < 0) \
+ udelay(__sleep_us); \
+ else \
+ usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+ } \
cpu_relax(); \
} \
___ret; \
--
2.54.0
On Tue, May 19, 2026 at 03:24:46AM -0700, Peter Collingbourne wrote:
> A short polling delay, such as the delay of 5us
> (SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
> can become a 1/HZ (order of ms) delay caused by the usleep_range()
> call in read_poll_timeout(), significantly reducing SPI NAND access
> performance. Fix it by adjusting the read_poll_timeout() macro to use
> udelay() to delay until 1/10 of a timer tick after it is called, and
> only then sleep.
>
> Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
> Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
> ---
> include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
> 1 file changed, 22 insertions(+), 8 deletions(-)
>
> v2:
> * Fix it in read_poll_timeout() instead
>
> diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
> index 53edd69acb9b..2ee89b76f072 100644
> --- a/include/linux/iopoll.h
> +++ b/include/linux/iopoll.h
> @@ -19,9 +19,11 @@
> *
> * @op: Operation
> * @cond: Break condition
> - * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
> - * Please read usleep_range() function description for details and
> - * limitations.
> + * @sleep_us: Maximum time to sleep or delay between operations in us
> + * (0 tight-loops). Please read usleep_range() and udelay()
> + * function descriptions for details and limitations.
> + * This macro will delay until 1/10 of a timer tick after
> + * it is called, and will then start sleeping.
> * @timeout_us: Timeout in us, 0 means never timeout
> * @sleep_before_op: if it is true, sleep @sleep_us before operation.
> *
> @@ -35,11 +37,18 @@
> ({ \
> u64 __timeout_us = (timeout_us); \
> unsigned long __sleep_us = (sleep_us); \
> - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
> + ktime_t __start_time = ktime_get(); \
> + u64 __delay_timeout_us = 100000/HZ; \
> + ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
> + ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
> int ___ret; \
> might_sleep_if((__sleep_us) != 0); \
> - if ((sleep_before_op) && __sleep_us) \
> - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + if ((sleep_before_op) && __sleep_us) { \
> + if (__sleep_us <= __delay_timeout_us) \
> + udelay(__sleep_us); \
If you want udelay() why not just use the atomic variant of the macro?
> + else \
> + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + } \
> for (;;) { \
> bool __expired = __timeout_us && \
> ktime_compare(ktime_get(), __timeout) > 0; \
> @@ -54,8 +63,13 @@
> ___ret = -ETIMEDOUT; \
> break; \
> } \
> - if (__sleep_us) \
> - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + if (__sleep_us) { \
> + if (__sleep_us <= __delay_timeout_us && \
> + ktime_compare(ktime_get(), __delay_timeout) < 0) \
> + udelay(__sleep_us); \
> + else \
> + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + } \
> cpu_relax(); \
> } \
> ___ret; \
> --
> 2.54.0
--
Ville Syrjälä
Intel
On Wed, May 20, 2026 at 6:29 AM Ville Syrjälä
<ville.syrjala@linux.intel.com> wrote:
>
> On Tue, May 19, 2026 at 03:24:46AM -0700, Peter Collingbourne wrote:
> > A short polling delay, such as the delay of 5us
> > (SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
> > can become a 1/HZ (order of ms) delay caused by the usleep_range()
> > call in read_poll_timeout(), significantly reducing SPI NAND access
> > performance. Fix it by adjusting the read_poll_timeout() macro to use
> > udelay() to delay until 1/10 of a timer tick after it is called, and
> > only then sleep.
> >
> > Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
> > Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
> > ---
> > include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
> > 1 file changed, 22 insertions(+), 8 deletions(-)
> >
> > v2:
> > * Fix it in read_poll_timeout() instead
> >
> > diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
> > index 53edd69acb9b..2ee89b76f072 100644
> > --- a/include/linux/iopoll.h
> > +++ b/include/linux/iopoll.h
> > @@ -19,9 +19,11 @@
> > *
> > * @op: Operation
> > * @cond: Break condition
> > - * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
> > - * Please read usleep_range() function description for details and
> > - * limitations.
> > + * @sleep_us: Maximum time to sleep or delay between operations in us
> > + * (0 tight-loops). Please read usleep_range() and udelay()
> > + * function descriptions for details and limitations.
> > + * This macro will delay until 1/10 of a timer tick after
> > + * it is called, and will then start sleeping.
> > * @timeout_us: Timeout in us, 0 means never timeout
> > * @sleep_before_op: if it is true, sleep @sleep_us before operation.
> > *
> > @@ -35,11 +37,18 @@
> > ({ \
> > u64 __timeout_us = (timeout_us); \
> > unsigned long __sleep_us = (sleep_us); \
> > - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
> > + ktime_t __start_time = ktime_get(); \
> > + u64 __delay_timeout_us = 100000/HZ; \
> > + ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
> > + ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
> > int ___ret; \
> > might_sleep_if((__sleep_us) != 0); \
> > - if ((sleep_before_op) && __sleep_us) \
> > - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> > + if ((sleep_before_op) && __sleep_us) { \
> > + if (__sleep_us <= __delay_timeout_us) \
> > + udelay(__sleep_us); \
>
> If you want udelay() why not just use the atomic variant of the macro?
That's what I had in v1; we decided this approach would better handle
misbehaving devices.
https://lore.kernel.org/all/20260517150253.031dec09@pumpkin/
Peter
On Wed, 20 May 2026, Peter Collingbourne <peter@pcc.me.uk> wrote:
> On Wed, May 20, 2026 at 6:29 AM Ville Syrjälä
> <ville.syrjala@linux.intel.com> wrote:
>>
>> On Tue, May 19, 2026 at 03:24:46AM -0700, Peter Collingbourne wrote:
>> > A short polling delay, such as the delay of 5us
>> > (SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
>> > can become a 1/HZ (order of ms) delay caused by the usleep_range()
>> > call in read_poll_timeout(), significantly reducing SPI NAND access
>> > performance. Fix it by adjusting the read_poll_timeout() macro to use
>> > udelay() to delay until 1/10 of a timer tick after it is called, and
>> > only then sleep.
What's "1/10 of a timer tick" based on?
fsleep() has simply 10 us as the threshold.
>> >
>> > Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
>> > Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
>> > ---
>> > include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
>> > 1 file changed, 22 insertions(+), 8 deletions(-)
>> >
>> > v2:
>> > * Fix it in read_poll_timeout() instead
>> >
>> > diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
>> > index 53edd69acb9b..2ee89b76f072 100644
>> > --- a/include/linux/iopoll.h
>> > +++ b/include/linux/iopoll.h
>> > @@ -19,9 +19,11 @@
>> > *
>> > * @op: Operation
>> > * @cond: Break condition
>> > - * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
>> > - * Please read usleep_range() function description for details and
>> > - * limitations.
>> > + * @sleep_us: Maximum time to sleep or delay between operations in us
>> > + * (0 tight-loops). Please read usleep_range() and udelay()
>> > + * function descriptions for details and limitations.
>> > + * This macro will delay until 1/10 of a timer tick after
>> > + * it is called, and will then start sleeping.
>> > * @timeout_us: Timeout in us, 0 means never timeout
>> > * @sleep_before_op: if it is true, sleep @sleep_us before operation.
>> > *
>> > @@ -35,11 +37,18 @@
>> > ({ \
>> > u64 __timeout_us = (timeout_us); \
>> > unsigned long __sleep_us = (sleep_us); \
>> > - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
>> > + ktime_t __start_time = ktime_get(); \
>> > + u64 __delay_timeout_us = 100000/HZ; \
>> > + ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
>> > + ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
>> > int ___ret; \
>> > might_sleep_if((__sleep_us) != 0); \
>> > - if ((sleep_before_op) && __sleep_us) \
>> > - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
>> > + if ((sleep_before_op) && __sleep_us) { \
>> > + if (__sleep_us <= __delay_timeout_us) \
>> > + udelay(__sleep_us); \
>>
>> If you want udelay() why not just use the atomic variant of the macro?
>
> That's what I had in v1; we decided this approach would better handle
> misbehaving devices.
> https://lore.kernel.org/all/20260517150253.031dec09@pumpkin/
I think the problem with trying to adapt to everything within
read_poll_timeout() is that every step like this adapts to a *specific*
use case, and once it gets specific enough, it's no longer usable to
other scenarios.
Having to reimplement the whole thing in drivers is much worse than
having to do two calls.
Could a staggered approach work?
ret = read_poll_timeout_atomic("short delay/timeout")
if (ret)
ret = read_poll_timeout("longer delay/timeout")
Then you have better control of the behaviour in the driver, instead of
adapting a generic function to a specific use case.
BR,
Jani.
--
Jani Nikula, Intel
On Thu, May 21, 2026 at 10:03:28AM +0300, Jani Nikula wrote:
> On Wed, 20 May 2026, Peter Collingbourne <peter@pcc.me.uk> wrote:
> > That's what I had in v1; we decided this approach would better handle
> > misbehaving devices.
> > https://lore.kernel.org/all/20260517150253.031dec09@pumpkin/
> I think the problem with trying to adapt to everything within
> read_poll_timeout() is that every step like this adapts to a *specific*
> use case, and once it gets specific enough, it's no longer usable to
> other scenarios.
> Having to reimplement the whole thing in drivers is much worse than
> having to do two calls.
> Could a staggered approach work?
> ret = read_poll_timeout_atomic("short delay/timeout")
> if (ret)
> ret = read_poll_timeout("longer delay/timeout")
> Then you have better control of the behaviour in the driver, instead of
> adapting a generic function to a specific use case.
If we're doing that it still seems like it'd be better to have a helper
than just takes the two timeouts and does the right thing with them. My
original feedback here was that we should have helpers which encourage
good patterns, and TBH I expect there's probably a bunch of scenarios
where some fixed cutoff would be a worthwile improvement for very little
effort on the part of users.
On Tue, 19 May 2026 03:24:46 -0700
Peter Collingbourne <peter@pcc.me.uk> wrote:
> A short polling delay, such as the delay of 5us
> (SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
> can become a 1/HZ (order of ms) delay caused by the usleep_range()
> call in read_poll_timeout(), significantly reducing SPI NAND access
> performance. Fix it by adjusting the read_poll_timeout() macro to use
> udelay() to delay until 1/10 of a timer tick after it is called, and
> only then sleep.
>
> Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
> Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
> ---
> include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
> 1 file changed, 22 insertions(+), 8 deletions(-)
>
> v2:
> * Fix it in read_poll_timeout() instead
>
> diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
> index 53edd69acb9b..2ee89b76f072 100644
> --- a/include/linux/iopoll.h
> +++ b/include/linux/iopoll.h
> @@ -19,9 +19,11 @@
> *
> * @op: Operation
> * @cond: Break condition
> - * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
> - * Please read usleep_range() function description for details and
> - * limitations.
> + * @sleep_us: Maximum time to sleep or delay between operations in us
> + * (0 tight-loops). Please read usleep_range() and udelay()
> + * function descriptions for details and limitations.
> + * This macro will delay until 1/10 of a timer tick after
> + * it is called, and will then start sleeping.
> * @timeout_us: Timeout in us, 0 means never timeout
> * @sleep_before_op: if it is true, sleep @sleep_us before operation.
> *
> @@ -35,11 +37,18 @@
> ({ \
> u64 __timeout_us = (timeout_us); \
> unsigned long __sleep_us = (sleep_us); \
> - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
> + ktime_t __start_time = ktime_get(); \
> + u64 __delay_timeout_us = 100000/HZ; \
> + ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
> + ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
> int ___ret; \
> might_sleep_if((__sleep_us) != 0); \
> - if ((sleep_before_op) && __sleep_us) \
> - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + if ((sleep_before_op) && __sleep_us) { \
> + if (__sleep_us <= __delay_timeout_us) \
> + udelay(__sleep_us); \
> + else \
> + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + } \
> for (;;) { \
> bool __expired = __timeout_us && \
> ktime_compare(ktime_get(), __timeout) > 0; \
> @@ -54,8 +63,13 @@
> ___ret = -ETIMEDOUT; \
> break; \
> } \
> - if (__sleep_us) \
> - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + if (__sleep_us) { \
> + if (__sleep_us <= __delay_timeout_us && \
> + ktime_compare(ktime_get(), __delay_timeout) < 0) \
> + udelay(__sleep_us); \
> + else \
> + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> + } \
> cpu_relax(); \
> } \
> ___ret; \
How about:
#define poll_timeout_us(op, cond, sleep_us, timeout_us, sleep_before_op) \
({ \
u64 __timeout_us = (timeout_us); \
unsigned long __sleep_us = (sleep_us); \
ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
u64 __delay_timeout_us = 100000/HZ; \
int ___ret; \
bool __expired; \
might_sleep_if((__sleep_us) != 0); \
\
for (;; sleep_before_op = false) { \
if (!sleep_before_op) { \
__expired = __timeout_us && \
ktime_compare(ktime_get(), __timeout) > 0; \
/* guarantee 'op' and 'cond' are evaluated after timeout expired */ \
barrier(); \
op; \
if (cond) { \
___ret = 0; \
break; \
} \
if (__expired) { \
___ret = -ETIMEDOUT; \
break; \
} \
} \
if (__sleep_us > __delay_timeout_us) { \
usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
continue;
} \
if (__sleep_us) { \
__delay_timeout_us -= __sleep_us; \
udelay(__sleep_us); \
} \
cpu_relax(); \
} \
___ret; \
})
Which I think is approximately equivalent.
But I'm not at all sure the usleep/udelay test it right.
100000/HZ is a strange number of usecs; for HZ=100 it is 1ms, but for HZ=1000 0.1ms.
Maybe it should be more like:
u32 __delay_timeout_us = __sleep_us > 20 ? 0 : 100;
so that you delay for (approx) max 100us if the interval is less than 20us.
The is also a mismatch of long and u64.
I don't think anything (except the time_t) needs to be 64bit (esp. on 32bit).
I'm not sure about all architectures, but I'm pretty sure than on x86
usleep_range() is independent of HZ.
-- David
On Tue, May 19, 2026 at 11:35 AM David Laight
<david.laight.linux@gmail.com> wrote:
>
> On Tue, 19 May 2026 03:24:46 -0700
> Peter Collingbourne <peter@pcc.me.uk> wrote:
>
> > A short polling delay, such as the delay of 5us
> > (SPINAND_READ_POLL_DELAY_US) provided by the SPI NAND driver,
> > can become a 1/HZ (order of ms) delay caused by the usleep_range()
> > call in read_poll_timeout(), significantly reducing SPI NAND access
> > performance. Fix it by adjusting the read_poll_timeout() macro to use
> > udelay() to delay until 1/10 of a timer tick after it is called, and
> > only then sleep.
> >
> > Fixes: c955a0cc8a28 ("spi: spi-mem: add automatic poll status functions")
> > Signed-off-by: Peter Collingbourne <peter@pcc.me.uk>
> > ---
> > include/linux/iopoll.h | 30 ++++++++++++++++++++++--------
> > 1 file changed, 22 insertions(+), 8 deletions(-)
> >
> > v2:
> > * Fix it in read_poll_timeout() instead
> >
> > diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
> > index 53edd69acb9b..2ee89b76f072 100644
> > --- a/include/linux/iopoll.h
> > +++ b/include/linux/iopoll.h
> > @@ -19,9 +19,11 @@
> > *
> > * @op: Operation
> > * @cond: Break condition
> > - * @sleep_us: Maximum time to sleep between operations in us (0 tight-loops).
> > - * Please read usleep_range() function description for details and
> > - * limitations.
> > + * @sleep_us: Maximum time to sleep or delay between operations in us
> > + * (0 tight-loops). Please read usleep_range() and udelay()
> > + * function descriptions for details and limitations.
> > + * This macro will delay until 1/10 of a timer tick after
> > + * it is called, and will then start sleeping.
> > * @timeout_us: Timeout in us, 0 means never timeout
> > * @sleep_before_op: if it is true, sleep @sleep_us before operation.
> > *
> > @@ -35,11 +37,18 @@
> > ({ \
> > u64 __timeout_us = (timeout_us); \
> > unsigned long __sleep_us = (sleep_us); \
> > - ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
> > + ktime_t __start_time = ktime_get(); \
> > + u64 __delay_timeout_us = 100000/HZ; \
> > + ktime_t __delay_timeout = ktime_add_us(__start_time, __delay_timeout_us); \
> > + ktime_t __timeout = ktime_add_us(__start_time, __timeout_us); \
> > int ___ret; \
> > might_sleep_if((__sleep_us) != 0); \
> > - if ((sleep_before_op) && __sleep_us) \
> > - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> > + if ((sleep_before_op) && __sleep_us) { \
> > + if (__sleep_us <= __delay_timeout_us) \
> > + udelay(__sleep_us); \
> > + else \
> > + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> > + } \
> > for (;;) { \
> > bool __expired = __timeout_us && \
> > ktime_compare(ktime_get(), __timeout) > 0; \
> > @@ -54,8 +63,13 @@
> > ___ret = -ETIMEDOUT; \
> > break; \
> > } \
> > - if (__sleep_us) \
> > - usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> > + if (__sleep_us) { \
> > + if (__sleep_us <= __delay_timeout_us && \
> > + ktime_compare(ktime_get(), __delay_timeout) < 0) \
> > + udelay(__sleep_us); \
> > + else \
> > + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> > + } \
> > cpu_relax(); \
> > } \
> > ___ret; \
>
> How about:
> #define poll_timeout_us(op, cond, sleep_us, timeout_us, sleep_before_op) \
> ({ \
> u64 __timeout_us = (timeout_us); \
> unsigned long __sleep_us = (sleep_us); \
> ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
> u64 __delay_timeout_us = 100000/HZ; \
> int ___ret; \
> bool __expired; \
> might_sleep_if((__sleep_us) != 0); \
> \
> for (;; sleep_before_op = false) { \
> if (!sleep_before_op) { \
> __expired = __timeout_us && \
> ktime_compare(ktime_get(), __timeout) > 0; \
> /* guarantee 'op' and 'cond' are evaluated after timeout expired */ \
> barrier(); \
> op; \
> if (cond) { \
> ___ret = 0; \
> break; \
> } \
> if (__expired) { \
> ___ret = -ETIMEDOUT; \
> break; \
> } \
> } \
> if (__sleep_us > __delay_timeout_us) { \
> usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
> continue;
> } \
> if (__sleep_us) { \
> __delay_timeout_us -= __sleep_us; \
> udelay(__sleep_us); \
> } \
> cpu_relax(); \
> } \
> ___ret; \
> })
> Which I think is approximately equivalent.
> But I'm not at all sure the usleep/udelay test it right.
> 100000/HZ is a strange number of usecs; for HZ=100 it is 1ms, but for HZ=1000 0.1ms.
The idea here was that since the sleep time for usleep_range() is tied
to HZ (or so I thought, see below), the delay timeout should also be
tied to HZ.
> Maybe it should be more like:
> u32 __delay_timeout_us = __sleep_us > 20 ? 0 : 100;
> so that you delay for (approx) max 100us if the interval is less than 20us.
I think that makes sense.
> The is also a mismatch of long and u64.
> I don't think anything (except the time_t) needs to be 64bit (esp. on 32bit).
Ack.
> I'm not sure about all architectures, but I'm pretty sure than on x86
> usleep_range() is independent of HZ.
Right, after looking into it more I learned that the hrtimer should
usually allow usleep_range() to return before the next timer tick.
Which, as it turns out, was not happening on my target device because
the kernel was configured without CONFIG_HIGH_RES_TIMERS. After
turning it on, the performance issue is fixed, even without this
patch.
For me this raises the question of whether CONFIG_HIGH_RES_TIMERS=n is
always a good default. On x86 (except for ancient microarchitectures)
and arm64, the architecture provides a guaranteed hrtimer, so to me it
makes sense for it to default to y on those architectures at least, so
that users who write their own config (not defconfig based) will avoid
issues like this difficult to debug performance issue by default.
If we do decide to turn it on everywhere by default, it may be worth
reconsidering whether we need to delay in poll_timeout_us() after all.
Peter
© 2016 - 2026 Red Hat, Inc.