From: Gal Pressman <gal@nvidia.com>
The write combining completion poll loop uses usleep_range() which can
sleep much longer than requested due to scheduler latency. Under load,
we witnessed a 20ms+ delay until the process was rescheduled, causing
the jiffies based timeout to expire while the thread is sleeping.
The original do-while loop structure (poll, sleep, check timeout) would
exit without a final poll when waking after timeout, missing a CQE that
arrived during sleep.
Restructure the loop by moving the poll into the while condition,
ensuring we always poll after sleeping, catching CQEs that arrived
during that time.
While at it, remove the redundant 'err' assignment.
Fixes: d98995b4bf98 ("net/mlx5: Reimplement write combining test")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/wc.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
index 815a7c97d6b0..29db15c4b978 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -390,12 +390,10 @@ static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)
mlx5_wc_post_nop(sq, &offset, true);
expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
- do {
- err = mlx5_wc_poll_cq(sq);
- if (err)
- usleep_range(2, 10);
- } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED &&
- time_is_after_jiffies(expires));
+ while ((mlx5_wc_poll_cq(sq),
+ mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED) &&
+ time_is_after_jiffies(expires))
+ usleep_range(2, 10);
mlx5_wc_destroy_sq(sq);
--
2.44.0
On 2/12/2026 2:32 AM, Tariq Toukan wrote:
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
> index 815a7c97d6b0..29db15c4b978 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
> @@ -390,12 +390,10 @@ static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)
> mlx5_wc_post_nop(sq, &offset, true);
>
> expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
> - do {
> - err = mlx5_wc_poll_cq(sq);
> - if (err)
> - usleep_range(2, 10);
> - } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED &&
> - time_is_after_jiffies(expires));
> + while ((mlx5_wc_poll_cq(sq),
> + mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED) &&
> + time_is_after_jiffies(expires))
> + usleep_range(2, 10);
>
This could be written with poll_timeout_us(), but I don't know if it
warrants holding up the fix.
Something line the following:
diff --git i/drivers/net/ethernet/mellanox/mlx5/core/wc.c
w/drivers/net/ethernet/mellanox/mlx5/core/wc.c
index 29db15c4b978..6ec9c1a2da78 100644
--- i/drivers/net/ethernet/mellanox/mlx5/core/wc.c
+++ w/drivers/net/ethernet/mellanox/mlx5/core/wc.c
@@ -15,7 +15,7 @@
#define TEST_WC_NUM_WQES 255
#define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES))
#define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ
-#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
+#define TEST_WC_POLLING_MAX_TIME_USEC (100 * USEC_PER_MSEC)
struct mlx5_wc_cq {
/* data path - accessed per cqe */
@@ -359,7 +359,6 @@ static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq)
static void mlx5_core_test_wc(struct mlx5_core_dev *mdev)
{
unsigned int offset = 0;
- unsigned long expires;
struct mlx5_wc_sq *sq;
int i, err;
@@ -389,11 +388,9 @@ static void mlx5_core_test_wc(struct mlx5_core_dev
*mdev)
mlx5_wc_post_nop(sq, &offset, true);
- expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
- while ((mlx5_wc_poll_cq(sq),
- mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED) &&
- time_is_after_jiffies(expires))
- usleep_range(2, 10);
+ poll_timeout_us(mlx5_wc_poll_cq(sq),
+ mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED,
+ 10, TEST_WC_POLLING_MAX_TIME_USEC, false);
mlx5_wc_destroy_sq(sq);
On 13/02/2026 0:36, Jacob Keller wrote:
>
>
> On 2/12/2026 2:32 AM, Tariq Toukan wrote:
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/
>> net/ethernet/mellanox/mlx5/core/wc.c
>> index 815a7c97d6b0..29db15c4b978 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c
>> @@ -390,12 +390,10 @@ static void mlx5_core_test_wc(struct
>> mlx5_core_dev *mdev)
>> mlx5_wc_post_nop(sq, &offset, true);
>> expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
>> - do {
>> - err = mlx5_wc_poll_cq(sq);
>> - if (err)
>> - usleep_range(2, 10);
>> - } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED &&
>> - time_is_after_jiffies(expires));
>> + while ((mlx5_wc_poll_cq(sq),
>> + mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED) &&
>> + time_is_after_jiffies(expires))
>> + usleep_range(2, 10);
>>
>
> This could be written with poll_timeout_us(), but I don't know if it
> warrants holding up the fix.
Wasn't aware of iopoll.h, will change, thanks Jacob!
© 2016 - 2026 Red Hat, Inc.