drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++--------- drivers/mmc/core/host.h | 35 ++++++++++++++++++++++- drivers/mmc/core/mmc.c | 6 ++++ drivers/mmc/core/queue.c | 3 ++ include/linux/mmc/host.h | 4 +++ 5 files changed, 94 insertions(+), 14 deletions(-)
The host->claimed flag shares a bitfield storage word with several
retune flags (retune_now, retune_paused, can_retune, doing_retune,
doing_init_tune). Updating those flags without host->lock can RMW the
shared word and clear claimed, triggering spurious
WARN_ON(!host->claimed).
Serialize all retune bitfield updates with host->lock. Provide lockless
__mmc_retune_* helpers so callers that already hold host->lock can
avoid deadlocks while public wrappers serialize updates. Also protect
doing_init_tune and the CQE retune_now assignment with host->lock.
Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
Cc: stable@vger.kernel.org
Signed-off-by: Penghe Geng <pgeng@nvidia.com>
---
drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
drivers/mmc/core/mmc.c | 6 ++++
drivers/mmc/core/queue.c | 3 ++
include/linux/mmc/host.h | 4 +++
5 files changed, 94 insertions(+), 14 deletions(-)
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 88c95dbfd9cf..0b6b4a31f629 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
*/
void mmc_retune_enable(struct mmc_host *host)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
host->can_retune = 1;
+ spin_unlock_irqrestore(&host->lock, flags);
if (host->retune_period)
mod_timer(&host->retune_timer,
jiffies + host->retune_period * HZ);
@@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
*/
void mmc_retune_pause(struct mmc_host *host)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
if (!host->retune_paused) {
host->retune_paused = 1;
- mmc_retune_hold(host);
+ __mmc_retune_hold(host);
}
+ spin_unlock_irqrestore(&host->lock, flags);
}
EXPORT_SYMBOL(mmc_retune_pause);
void mmc_retune_unpause(struct mmc_host *host)
{
+ unsigned long flags;
+ bool released;
+
+ spin_lock_irqsave(&host->lock, flags);
if (host->retune_paused) {
host->retune_paused = 0;
- mmc_retune_release(host);
+ released = __mmc_retune_release(host);
+ spin_unlock_irqrestore(&host->lock, flags);
+ if (!released)
+ WARN_ON(1);
+ } else {
+ spin_unlock_irqrestore(&host->lock, flags);
}
}
EXPORT_SYMBOL(mmc_retune_unpause);
@@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
*/
void mmc_retune_disable(struct mmc_host *host)
{
+ unsigned long flags;
+
mmc_retune_unpause(host);
+ spin_lock_irqsave(&host->lock, flags);
host->can_retune = 0;
+ spin_unlock_irqrestore(&host->lock, flags);
timer_delete_sync(&host->retune_timer);
mmc_retune_clear(host);
}
@@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
void mmc_retune_hold(struct mmc_host *host)
{
- if (!host->hold_retune)
- host->retune_now = 1;
- host->hold_retune += 1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ __mmc_retune_hold(host);
+ spin_unlock_irqrestore(&host->lock, flags);
}
void mmc_retune_release(struct mmc_host *host)
{
- if (host->hold_retune)
- host->hold_retune -= 1;
- else
+ unsigned long flags;
+ bool released;
+
+ spin_lock_irqsave(&host->lock, flags);
+ released = __mmc_retune_release(host);
+ spin_unlock_irqrestore(&host->lock, flags);
+ if (!released)
WARN_ON(1);
}
EXPORT_SYMBOL(mmc_retune_release);
@@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
{
bool return_to_hs400 = false;
int err;
+ unsigned long flags;
- if (host->retune_now)
- host->retune_now = 0;
- else
+ spin_lock_irqsave(&host->lock, flags);
+ if (!host->retune_now) {
+ spin_unlock_irqrestore(&host->lock, flags);
return 0;
+ }
+ host->retune_now = 0;
- if (!host->need_retune || host->doing_retune || !host->card)
+ if (!host->need_retune || host->doing_retune || !host->card) {
+ spin_unlock_irqrestore(&host->lock, flags);
return 0;
+ }
host->need_retune = 0;
-
host->doing_retune = 1;
+ spin_unlock_irqrestore(&host->lock, flags);
if (host->ios.timing == MMC_TIMING_MMC_HS400) {
err = mmc_hs400_to_hs200(host->card);
@@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
if (return_to_hs400)
err = mmc_hs200_to_hs400(host->card);
out:
+ spin_lock_irqsave(&host->lock, flags);
host->doing_retune = 0;
+ spin_unlock_irqrestore(&host->lock, flags);
return err;
}
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index 5941d68ff989..07e4f427fe15 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
void mmc_retune_pause(struct mmc_host *host);
void mmc_retune_unpause(struct mmc_host *host);
-static inline void mmc_retune_clear(struct mmc_host *host)
+static inline void __mmc_retune_clear(struct mmc_host *host)
{
host->retune_now = 0;
host->need_retune = 0;
}
+static inline void mmc_retune_clear(struct mmc_host *host)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+ __mmc_retune_clear(host);
+ spin_unlock_irqrestore(&host->lock, flags);
+}
+
+static inline void __mmc_retune_hold(struct mmc_host *host)
+{
+ if (!host->hold_retune)
+ host->retune_now = 1;
+ host->hold_retune += 1;
+}
+
+static inline bool __mmc_retune_release(struct mmc_host *host)
+{
+ if (host->hold_retune) {
+ host->hold_retune -= 1;
+ return true;
+ }
+ return false;
+}
+
static inline void mmc_retune_hold_now(struct mmc_host *host)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
host->retune_now = 0;
host->hold_retune += 1;
+ spin_unlock_irqrestore(&host->lock, flags);
}
static inline void mmc_retune_recheck(struct mmc_host *host)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
if (host->hold_retune <= 1)
host->retune_now = 1;
+ spin_unlock_irqrestore(&host->lock, flags);
}
static inline int mmc_host_can_cmd23(struct mmc_host *host)
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 7c86efb1044a..114febd15f08 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
goto free_card;
if (mmc_card_hs200(card)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
host->doing_init_tune = 1;
+ spin_unlock_irqrestore(&host->lock, flags);
err = mmc_hs200_tuning(card);
if (!err)
err = mmc_select_hs400(card);
+ spin_lock_irqsave(&host->lock, flags);
host->doing_init_tune = 0;
+ spin_unlock_irqrestore(&host->lock, flags);
if (err)
goto free_card;
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 284856c8f655..5e38759c87f5 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
enum mmc_issue_type issue_type;
enum mmc_issued issued;
bool get_card, cqe_retune_ok;
+ unsigned long flags;
blk_status_t ret;
if (mmc_card_removed(mq->card)) {
@@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
mmc_get_card(card, &mq->ctx);
if (host->cqe_enabled) {
+ spin_lock_irqsave(&host->lock, flags);
host->retune_now = host->need_retune && cqe_retune_ok &&
!host->hold_retune;
+ spin_unlock_irqrestore(&host->lock, flags);
}
blk_mq_start_request(req);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0e2c265e5d1..e7bddbafd1da 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
static inline void mmc_retune_needed(struct mmc_host *host)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
if (host->can_retune)
host->need_retune = 1;
+ spin_unlock_irqrestore(&host->lock, flags);
}
static inline bool mmc_can_retune(struct mmc_host *host)
--
2.43.0
On 15/01/2026 23:46, Penghe Geng wrote:
> The host->claimed flag shares a bitfield storage word with several
> retune flags (retune_now, retune_paused, can_retune, doing_retune,
> doing_init_tune). Updating those flags without host->lock can RMW the
> shared word and clear claimed, triggering spurious
> WARN_ON(!host->claimed).
Thanks for finding this!
The design is that those members are protected by the host->claimed
lock itself.
mmc operations are primarily single-threaded, protected by the
host->claimed lock, although the block driver does allow multiple
transfers at the same time in some cases.
There are also other contexts like interrupt handlers.
Can you provide some information about when WARN_ON(!host->claimed)
is being hit? Including the stack dump?
What kernel version?
Is it eMMC, SDIO or SD card?
Is CQE being used?
Are there any I/O errors happening also?
What controller driver is it?
In any case, the use of bit fields seems to add complexity unnecessarily,
so we should consider converting some or all of them to bool.
>
> Serialize all retune bitfield updates with host->lock. Provide lockless
> __mmc_retune_* helpers so callers that already hold host->lock can
> avoid deadlocks while public wrappers serialize updates. Also protect
> doing_init_tune and the CQE retune_now assignment with host->lock.
>
> Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
> Cc: stable@vger.kernel.org
> Signed-off-by: Penghe Geng <pgeng@nvidia.com>
> ---
> drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
> drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
> drivers/mmc/core/mmc.c | 6 ++++
> drivers/mmc/core/queue.c | 3 ++
> include/linux/mmc/host.h | 4 +++
> 5 files changed, 94 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
> index 88c95dbfd9cf..0b6b4a31f629 100644
> --- a/drivers/mmc/core/host.c
> +++ b/drivers/mmc/core/host.c
> @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
> */
> void mmc_retune_enable(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->can_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> if (host->retune_period)
> mod_timer(&host->retune_timer,
> jiffies + host->retune_period * HZ);
> @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
> */
> void mmc_retune_pause(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (!host->retune_paused) {
> host->retune_paused = 1;
> - mmc_retune_hold(host);
> + __mmc_retune_hold(host);
> }
> + spin_unlock_irqrestore(&host->lock, flags);
> }
> EXPORT_SYMBOL(mmc_retune_pause);
>
> void mmc_retune_unpause(struct mmc_host *host)
> {
> + unsigned long flags;
> + bool released;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->retune_paused) {
> host->retune_paused = 0;
> - mmc_retune_release(host);
> + released = __mmc_retune_release(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> + if (!released)
> + WARN_ON(1);
> + } else {
> + spin_unlock_irqrestore(&host->lock, flags);
> }
> }
> EXPORT_SYMBOL(mmc_retune_unpause);
> @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
> */
> void mmc_retune_disable(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> mmc_retune_unpause(host);
> + spin_lock_irqsave(&host->lock, flags);
> host->can_retune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
> timer_delete_sync(&host->retune_timer);
> mmc_retune_clear(host);
> }
> @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
>
> void mmc_retune_hold(struct mmc_host *host)
> {
> - if (!host->hold_retune)
> - host->retune_now = 1;
> - host->hold_retune += 1;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + __mmc_retune_hold(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> void mmc_retune_release(struct mmc_host *host)
> {
> - if (host->hold_retune)
> - host->hold_retune -= 1;
> - else
> + unsigned long flags;
> + bool released;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + released = __mmc_retune_release(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> + if (!released)
> WARN_ON(1);
> }
> EXPORT_SYMBOL(mmc_retune_release);
> @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
> {
> bool return_to_hs400 = false;
> int err;
> + unsigned long flags;
>
> - if (host->retune_now)
> - host->retune_now = 0;
> - else
> + spin_lock_irqsave(&host->lock, flags);
> + if (!host->retune_now) {
> + spin_unlock_irqrestore(&host->lock, flags);
> return 0;
> + }
> + host->retune_now = 0;
>
> - if (!host->need_retune || host->doing_retune || !host->card)
> + if (!host->need_retune || host->doing_retune || !host->card) {
> + spin_unlock_irqrestore(&host->lock, flags);
> return 0;
> + }
>
> host->need_retune = 0;
> -
> host->doing_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> if (host->ios.timing == MMC_TIMING_MMC_HS400) {
> err = mmc_hs400_to_hs200(host->card);
> @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
> if (return_to_hs400)
> err = mmc_hs200_to_hs400(host->card);
> out:
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_retune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> return err;
> }
> diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
> index 5941d68ff989..07e4f427fe15 100644
> --- a/drivers/mmc/core/host.h
> +++ b/drivers/mmc/core/host.h
> @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
> void mmc_retune_pause(struct mmc_host *host);
> void mmc_retune_unpause(struct mmc_host *host);
>
> -static inline void mmc_retune_clear(struct mmc_host *host)
> +static inline void __mmc_retune_clear(struct mmc_host *host)
> {
> host->retune_now = 0;
> host->need_retune = 0;
> }
>
> +static inline void mmc_retune_clear(struct mmc_host *host)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + __mmc_retune_clear(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> +}
> +
> +static inline void __mmc_retune_hold(struct mmc_host *host)
> +{
> + if (!host->hold_retune)
> + host->retune_now = 1;
> + host->hold_retune += 1;
> +}
> +
> +static inline bool __mmc_retune_release(struct mmc_host *host)
> +{
> + if (host->hold_retune) {
> + host->hold_retune -= 1;
> + return true;
> + }
> + return false;
> +}
> +
> static inline void mmc_retune_hold_now(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->retune_now = 0;
> host->hold_retune += 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline void mmc_retune_recheck(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->hold_retune <= 1)
> host->retune_now = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline int mmc_host_can_cmd23(struct mmc_host *host)
> diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
> index 7c86efb1044a..114febd15f08 100644
> --- a/drivers/mmc/core/mmc.c
> +++ b/drivers/mmc/core/mmc.c
> @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
> goto free_card;
>
> if (mmc_card_hs200(card)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_init_tune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> err = mmc_hs200_tuning(card);
> if (!err)
> err = mmc_select_hs400(card);
>
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_init_tune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> if (err)
> goto free_card;
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index 284856c8f655..5e38759c87f5 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> enum mmc_issue_type issue_type;
> enum mmc_issued issued;
> bool get_card, cqe_retune_ok;
> + unsigned long flags;
> blk_status_t ret;
>
> if (mmc_card_removed(mq->card)) {
> @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> mmc_get_card(card, &mq->ctx);
>
> if (host->cqe_enabled) {
> + spin_lock_irqsave(&host->lock, flags);
> host->retune_now = host->need_retune && cqe_retune_ok &&
> !host->hold_retune;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> blk_mq_start_request(req);
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index e0e2c265e5d1..e7bddbafd1da 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
>
> static inline void mmc_retune_needed(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->can_retune)
> host->need_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline bool mmc_can_retune(struct mmc_host *host)
Hi Adrian,
Thanks for the feedback. Below are the details you asked for.
Kernel versions:
- Seen on 5.15.120.bsk.business.6‑arm64 (custom tree).
- Also observed on 6.1.0‑11‑arm64 (Debian 6.1.38‑4).
Media: eMMC
Controllers:
- BlueField‑2: Synopsys DesignWare MMC (drivers/mmc/host/dw_mmc-bluefield.c)
- BlueField‑3: Synopsys DWC MSHC (drivers/mmc/host/sdhci-of-dwcmshc.c in‑tree) and also with OOT sdhci-of-dwcmshc-bf3
CQE:
- Not enabled at runtime. CONFIG_MMC_CQHCI=m.
- lsmod | grep cq is empty by default.
- modprobe cqhci loads with 0 users and no CQE/CQHCI enable
messages in dmesg. So CQE is not in use by the active host.
I/O errors:
- None observed around the WARN.
Repro:
- Intermittent, mostly during boot under stress.
- Roughly 0.1–1% depending on distro/platform.
Example stack (BF3, in-tree sdhci-of-dwcmshc):
------------[ cut here ]------------
mmcblk0boot1: mmc0:0001 Y29128 31.9 MiB
WARNING: CPU: 8 PID: 240 at drivers/mmc/core/core.c:349 mmc_start_request+0xb4/0xc4
Modules linked in: crc16(E) mbcache(E) jbd2(E) nvme_tcp(OE) nvme_rdma(OE) rdma_cm(OE) iw_cm(OE) ib_cm(OE) ib_core(OE) nvme_fabrics(OE) configfs(E) nls_ascii(E) nls_cp437(E) nls_cp850(E) msdos(E) efivarfs(E) nvme(OE) nvme_core(OE) virtio_net(E) net_failover(E) virtio_console(E) failover(E) mlxbf_tmfifo(OE) mlx_compat(OE) virtio(E) t10_pi(E) sbsa_gwdt(E) mlxbf_bootctl(OE) sdhci_of_dwcmshc(OE) virtio_ring(E)
mmcblk0rpmb: mmc0:0001 Y29128 4.00 MiB, chardev (245:0)
CPU: 8 PID: 240 Comm: kworker/8:1H Tainted: G OE 5.15.120.bsk.business.6-arm64 #5.15.120.bsk.business.6
Hardware name: https://www.mellanox.com BlueField-3 DPU/BlueField-3 DPU, BIOS 4.9.2.13576 Mar 18 2025
Workqueue: kblockd blk_mq_run_work_fn
pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : mmc_start_request+0xb4/0xc4
lr : mmc_start_request+0x68/0xc4
sp : ffff80000932ba90
x29: ffff80000932ba90 x28: ffff000081b3b308 x27: 0000000000000000
x26: 0000000000000001 x25: 0000000000000000 x24: ffff000082aec000
x23: ffff000082485800 x22: ffff000082aec000 x21: 0000000000000000
x20: ffff000081b3b3d8 x19: ffff000082aec000 x18: 0000000000000000
x17: 0000000000000000 x16: ffffbab212913c40 x15: 0000000000000000
x14: 0000000000000000 x13: 0000000000000038 x12: ffff00008f06b000
x11: 7f7f7f7f7f7f7f7f x10: ffffbab2144acbc8 x9 : ffffbab2130ad368
x8 : ffff000081b3b548 x7 : ffff000082aec000 x6 : 0000000000000000
x5 : ffff000081b3b458 x4 : ffff00008a4b5e80 x3 : ffff0000824859b0
x2 : 0000000000000000 x1 : ffff000081b3b4c8 x0 : 0000000000000020
Call trace:
mmc_start_request+0xb4/0xc4
mmc_blk_mq_issue_rq+0x310/0x8fc
mmc_mq_queue_rq+0x154/0x3e0
blk_mq_dispatch_rq_list+0x13c/0xa44
blk_mq_do_dispatch_sched+0x2cc/0x33c
__blk_mq_sched_dispatch_requests+0x154/0x1b0
blk_mq_sched_dispatch_requests+0x40/0x80
__blk_mq_run_hw_queue+0x58/0xa0
blk_mq_run_work_fn+0x28/0x34
process_one_work+0x1f8/0x4c0
worker_thread+0x180/0x580
kthread+0x128/0x13c
kthread_return_to_user+0x0/0x10
---[ end trace fc3df73f08f7c8ee ]---
I agree the bitfield usage adds complexity. I can work on a follow-up
to convert the retune-related flags to bools if that’s the preferred
direction.
Thanks,
Penghe
On Mon, Jan 26, 2026 at 03:43:14PM +0200, Adrian Hunter wrote:
> External email: Use caution opening links or attachments
>
>
> On 15/01/2026 23:46, Penghe Geng wrote:
> > The host->claimed flag shares a bitfield storage word with several
> > retune flags (retune_now, retune_paused, can_retune, doing_retune,
> > doing_init_tune). Updating those flags without host->lock can RMW the
> > shared word and clear claimed, triggering spurious
> > WARN_ON(!host->claimed).
>
> Thanks for finding this!
>
> The design is that those members are protected by the host->claimed
> lock itself.
>
> mmc operations are primarily single-threaded, protected by the
> host->claimed lock, although the block driver does allow multiple
> transfers at the same time in some cases.
>
> There are also other contexts like interrupt handlers.
>
> Can you provide some information about when WARN_ON(!host->claimed)
> is being hit? Including the stack dump?
> What kernel version?
> Is it eMMC, SDIO or SD card?
> Is CQE being used?
> Are there any I/O errors happening also?
> What controller driver is it?
>
> In any case, the use of bit fields seems to add complexity unnecessarily,
> so we should consider converting some or all of them to bool.
>
> >
> > Serialize all retune bitfield updates with host->lock. Provide lockless
> > __mmc_retune_* helpers so callers that already hold host->lock can
> > avoid deadlocks while public wrappers serialize updates. Also protect
> > doing_init_tune and the CQE retune_now assignment with host->lock.
> >
> > Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Penghe Geng <pgeng@nvidia.com>
> > ---
> > drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
> > drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
> > drivers/mmc/core/mmc.c | 6 ++++
> > drivers/mmc/core/queue.c | 3 ++
> > include/linux/mmc/host.h | 4 +++
> > 5 files changed, 94 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
> > index 88c95dbfd9cf..0b6b4a31f629 100644
> > --- a/drivers/mmc/core/host.c
> > +++ b/drivers/mmc/core/host.c
> > @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
> > */
> > void mmc_retune_enable(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->can_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > if (host->retune_period)
> > mod_timer(&host->retune_timer,
> > jiffies + host->retune_period * HZ);
> > @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
> > */
> > void mmc_retune_pause(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (!host->retune_paused) {
> > host->retune_paused = 1;
> > - mmc_retune_hold(host);
> > + __mmc_retune_hold(host);
> > }
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> > EXPORT_SYMBOL(mmc_retune_pause);
> >
> > void mmc_retune_unpause(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > + bool released;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->retune_paused) {
> > host->retune_paused = 0;
> > - mmc_retune_release(host);
> > + released = __mmc_retune_release(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > + if (!released)
> > + WARN_ON(1);
> > + } else {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> > }
> > EXPORT_SYMBOL(mmc_retune_unpause);
> > @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
> > */
> > void mmc_retune_disable(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > mmc_retune_unpause(host);
> > + spin_lock_irqsave(&host->lock, flags);
> > host->can_retune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > timer_delete_sync(&host->retune_timer);
> > mmc_retune_clear(host);
> > }
> > @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
> >
> > void mmc_retune_hold(struct mmc_host *host)
> > {
> > - if (!host->hold_retune)
> > - host->retune_now = 1;
> > - host->hold_retune += 1;
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + __mmc_retune_hold(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > void mmc_retune_release(struct mmc_host *host)
> > {
> > - if (host->hold_retune)
> > - host->hold_retune -= 1;
> > - else
> > + unsigned long flags;
> > + bool released;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + released = __mmc_retune_release(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > + if (!released)
> > WARN_ON(1);
> > }
> > EXPORT_SYMBOL(mmc_retune_release);
> > @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
> > {
> > bool return_to_hs400 = false;
> > int err;
> > + unsigned long flags;
> >
> > - if (host->retune_now)
> > - host->retune_now = 0;
> > - else
> > + spin_lock_irqsave(&host->lock, flags);
> > + if (!host->retune_now) {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > return 0;
> > + }
> > + host->retune_now = 0;
> >
> > - if (!host->need_retune || host->doing_retune || !host->card)
> > + if (!host->need_retune || host->doing_retune || !host->card) {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > return 0;
> > + }
> >
> > host->need_retune = 0;
> > -
> > host->doing_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > if (host->ios.timing == MMC_TIMING_MMC_HS400) {
> > err = mmc_hs400_to_hs200(host->card);
> > @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
> > if (return_to_hs400)
> > err = mmc_hs200_to_hs400(host->card);
> > out:
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_retune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > return err;
> > }
> > diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
> > index 5941d68ff989..07e4f427fe15 100644
> > --- a/drivers/mmc/core/host.h
> > +++ b/drivers/mmc/core/host.h
> > @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
> > void mmc_retune_pause(struct mmc_host *host);
> > void mmc_retune_unpause(struct mmc_host *host);
> >
> > -static inline void mmc_retune_clear(struct mmc_host *host)
> > +static inline void __mmc_retune_clear(struct mmc_host *host)
> > {
> > host->retune_now = 0;
> > host->need_retune = 0;
> > }
> >
> > +static inline void mmc_retune_clear(struct mmc_host *host)
> > +{
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + __mmc_retune_clear(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > +}
> > +
> > +static inline void __mmc_retune_hold(struct mmc_host *host)
> > +{
> > + if (!host->hold_retune)
> > + host->retune_now = 1;
> > + host->hold_retune += 1;
> > +}
> > +
> > +static inline bool __mmc_retune_release(struct mmc_host *host)
> > +{
> > + if (host->hold_retune) {
> > + host->hold_retune -= 1;
> > + return true;
> > + }
> > + return false;
> > +}
> > +
> > static inline void mmc_retune_hold_now(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->retune_now = 0;
> > host->hold_retune += 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline void mmc_retune_recheck(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->hold_retune <= 1)
> > host->retune_now = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline int mmc_host_can_cmd23(struct mmc_host *host)
> > diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
> > index 7c86efb1044a..114febd15f08 100644
> > --- a/drivers/mmc/core/mmc.c
> > +++ b/drivers/mmc/core/mmc.c
> > @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
> > goto free_card;
> >
> > if (mmc_card_hs200(card)) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_init_tune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > err = mmc_hs200_tuning(card);
> > if (!err)
> > err = mmc_select_hs400(card);
> >
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_init_tune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > if (err)
> > goto free_card;
> > diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> > index 284856c8f655..5e38759c87f5 100644
> > --- a/drivers/mmc/core/queue.c
> > +++ b/drivers/mmc/core/queue.c
> > @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> > enum mmc_issue_type issue_type;
> > enum mmc_issued issued;
> > bool get_card, cqe_retune_ok;
> > + unsigned long flags;
> > blk_status_t ret;
> >
> > if (mmc_card_removed(mq->card)) {
> > @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> > mmc_get_card(card, &mq->ctx);
> >
> > if (host->cqe_enabled) {
> > + spin_lock_irqsave(&host->lock, flags);
> > host->retune_now = host->need_retune && cqe_retune_ok &&
> > !host->hold_retune;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > blk_mq_start_request(req);
> > diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> > index e0e2c265e5d1..e7bddbafd1da 100644
> > --- a/include/linux/mmc/host.h
> > +++ b/include/linux/mmc/host.h
> > @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
> >
> > static inline void mmc_retune_needed(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->can_retune)
> > host->need_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline bool mmc_can_retune(struct mmc_host *host)
>
On Mon, Jan 26, 2026 at 03:43:14PM +0200, Adrian Hunter wrote:
> External email: Use caution opening links or attachments
>
>
> On 15/01/2026 23:46, Penghe Geng wrote:
> > The host->claimed flag shares a bitfield storage word with several
> > retune flags (retune_now, retune_paused, can_retune, doing_retune,
> > doing_init_tune). Updating those flags without host->lock can RMW the
> > shared word and clear claimed, triggering spurious
> > WARN_ON(!host->claimed).
>
> Thanks for finding this!
>
> The design is that those members are protected by the host->claimed
> lock itself.
>
> mmc operations are primarily single-threaded, protected by the
> host->claimed lock, although the block driver does allow multiple
> transfers at the same time in some cases.
>
> There are also other contexts like interrupt handlers.
>
> Can you provide some information about when WARN_ON(!host->claimed)
> is being hit? Including the stack dump?
> What kernel version?
> Is it eMMC, SDIO or SD card?
> Is CQE being used?
> Are there any I/O errors happening also?
> What controller driver is it?
>
> In any case, the use of bit fields seems to add complexity unnecessarily,
> so we should consider converting some or all of them to bool.
>
> >
> > Serialize all retune bitfield updates with host->lock. Provide lockless
> > __mmc_retune_* helpers so callers that already hold host->lock can
> > avoid deadlocks while public wrappers serialize updates. Also protect
> > doing_init_tune and the CQE retune_now assignment with host->lock.
> >
> > Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Penghe Geng <pgeng@nvidia.com>
> > ---
> > drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
> > drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
> > drivers/mmc/core/mmc.c | 6 ++++
> > drivers/mmc/core/queue.c | 3 ++
> > include/linux/mmc/host.h | 4 +++
> > 5 files changed, 94 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
> > index 88c95dbfd9cf..0b6b4a31f629 100644
> > --- a/drivers/mmc/core/host.c
> > +++ b/drivers/mmc/core/host.c
> > @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
> > */
> > void mmc_retune_enable(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->can_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > if (host->retune_period)
> > mod_timer(&host->retune_timer,
> > jiffies + host->retune_period * HZ);
> > @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
> > */
> > void mmc_retune_pause(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (!host->retune_paused) {
> > host->retune_paused = 1;
> > - mmc_retune_hold(host);
> > + __mmc_retune_hold(host);
> > }
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> > EXPORT_SYMBOL(mmc_retune_pause);
> >
> > void mmc_retune_unpause(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > + bool released;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->retune_paused) {
> > host->retune_paused = 0;
> > - mmc_retune_release(host);
> > + released = __mmc_retune_release(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > + if (!released)
> > + WARN_ON(1);
> > + } else {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> > }
> > EXPORT_SYMBOL(mmc_retune_unpause);
> > @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
> > */
> > void mmc_retune_disable(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > mmc_retune_unpause(host);
> > + spin_lock_irqsave(&host->lock, flags);
> > host->can_retune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > timer_delete_sync(&host->retune_timer);
> > mmc_retune_clear(host);
> > }
> > @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
> >
> > void mmc_retune_hold(struct mmc_host *host)
> > {
> > - if (!host->hold_retune)
> > - host->retune_now = 1;
> > - host->hold_retune += 1;
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + __mmc_retune_hold(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > void mmc_retune_release(struct mmc_host *host)
> > {
> > - if (host->hold_retune)
> > - host->hold_retune -= 1;
> > - else
> > + unsigned long flags;
> > + bool released;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + released = __mmc_retune_release(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > + if (!released)
> > WARN_ON(1);
> > }
> > EXPORT_SYMBOL(mmc_retune_release);
> > @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
> > {
> > bool return_to_hs400 = false;
> > int err;
> > + unsigned long flags;
> >
> > - if (host->retune_now)
> > - host->retune_now = 0;
> > - else
> > + spin_lock_irqsave(&host->lock, flags);
> > + if (!host->retune_now) {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > return 0;
> > + }
> > + host->retune_now = 0;
> >
> > - if (!host->need_retune || host->doing_retune || !host->card)
> > + if (!host->need_retune || host->doing_retune || !host->card) {
> > + spin_unlock_irqrestore(&host->lock, flags);
> > return 0;
> > + }
> >
> > host->need_retune = 0;
> > -
> > host->doing_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > if (host->ios.timing == MMC_TIMING_MMC_HS400) {
> > err = mmc_hs400_to_hs200(host->card);
> > @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
> > if (return_to_hs400)
> > err = mmc_hs200_to_hs400(host->card);
> > out:
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_retune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > return err;
> > }
> > diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
> > index 5941d68ff989..07e4f427fe15 100644
> > --- a/drivers/mmc/core/host.h
> > +++ b/drivers/mmc/core/host.h
> > @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
> > void mmc_retune_pause(struct mmc_host *host);
> > void mmc_retune_unpause(struct mmc_host *host);
> >
> > -static inline void mmc_retune_clear(struct mmc_host *host)
> > +static inline void __mmc_retune_clear(struct mmc_host *host)
> > {
> > host->retune_now = 0;
> > host->need_retune = 0;
> > }
> >
> > +static inline void mmc_retune_clear(struct mmc_host *host)
> > +{
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > + __mmc_retune_clear(host);
> > + spin_unlock_irqrestore(&host->lock, flags);
> > +}
> > +
> > +static inline void __mmc_retune_hold(struct mmc_host *host)
> > +{
> > + if (!host->hold_retune)
> > + host->retune_now = 1;
> > + host->hold_retune += 1;
> > +}
> > +
> > +static inline bool __mmc_retune_release(struct mmc_host *host)
> > +{
> > + if (host->hold_retune) {
> > + host->hold_retune -= 1;
> > + return true;
> > + }
> > + return false;
> > +}
> > +
> > static inline void mmc_retune_hold_now(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->retune_now = 0;
> > host->hold_retune += 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline void mmc_retune_recheck(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->hold_retune <= 1)
> > host->retune_now = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline int mmc_host_can_cmd23(struct mmc_host *host)
> > diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
> > index 7c86efb1044a..114febd15f08 100644
> > --- a/drivers/mmc/core/mmc.c
> > +++ b/drivers/mmc/core/mmc.c
> > @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
> > goto free_card;
> >
> > if (mmc_card_hs200(card)) {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_init_tune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > err = mmc_hs200_tuning(card);
> > if (!err)
> > err = mmc_select_hs400(card);
> >
> > + spin_lock_irqsave(&host->lock, flags);
> > host->doing_init_tune = 0;
> > + spin_unlock_irqrestore(&host->lock, flags);
> >
> > if (err)
> > goto free_card;
> > diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> > index 284856c8f655..5e38759c87f5 100644
> > --- a/drivers/mmc/core/queue.c
> > +++ b/drivers/mmc/core/queue.c
> > @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> > enum mmc_issue_type issue_type;
> > enum mmc_issued issued;
> > bool get_card, cqe_retune_ok;
> > + unsigned long flags;
> > blk_status_t ret;
> >
> > if (mmc_card_removed(mq->card)) {
> > @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> > mmc_get_card(card, &mq->ctx);
> >
> > if (host->cqe_enabled) {
> > + spin_lock_irqsave(&host->lock, flags);
> > host->retune_now = host->need_retune && cqe_retune_ok &&
> > !host->hold_retune;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > blk_mq_start_request(req);
> > diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> > index e0e2c265e5d1..e7bddbafd1da 100644
> > --- a/include/linux/mmc/host.h
> > +++ b/include/linux/mmc/host.h
> > @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
> >
> > static inline void mmc_retune_needed(struct mmc_host *host)
> > {
> > + unsigned long flags;
> > +
> > + spin_lock_irqsave(&host->lock, flags);
> > if (host->can_retune)
> > host->need_retune = 1;
> > + spin_unlock_irqrestore(&host->lock, flags);
> > }
> >
> > static inline bool mmc_can_retune(struct mmc_host *host)
>
On 30/01/2026 00:47, Penghe Geng wrote:
>
> Hi Adrian,
>
> Thanks for the feedback. Below are the details you asked for.
>
> Kernel versions:
> - Seen on 5.15.120.bsk.business.6‑arm64 (custom tree).
> - Also observed on 6.1.0‑11‑arm64 (Debian 6.1.38‑4).
>
> Media: eMMC
> Controllers:
> - BlueField‑2: Synopsys DesignWare MMC (drivers/mmc/host/dw_mmc-bluefield.c)
> - BlueField‑3: Synopsys DWC MSHC (drivers/mmc/host/sdhci-of-dwcmshc.c in‑tree) and also with OOT sdhci-of-dwcmshc-bf3
>
> CQE:
> - Not enabled at runtime. CONFIG_MMC_CQHCI=m.
> - lsmod | grep cq is empty by default.
> - modprobe cqhci loads with 0 users and no CQE/CQHCI enable
> messages in dmesg. So CQE is not in use by the active host.
>
> I/O errors:
> - None observed around the WARN.
>
> Repro:
> - Intermittent, mostly during boot under stress.
> - Roughly 0.1–1% depending on distro/platform.
>
> Example stack (BF3, in-tree sdhci-of-dwcmshc):
> ------------[ cut here ]------------
> mmcblk0boot1: mmc0:0001 Y29128 31.9 MiB
> WARNING: CPU: 8 PID: 240 at drivers/mmc/core/core.c:349 mmc_start_request+0xb4/0xc4
> Modules linked in: crc16(E) mbcache(E) jbd2(E) nvme_tcp(OE) nvme_rdma(OE) rdma_cm(OE) iw_cm(OE) ib_cm(OE) ib_core(OE) nvme_fabrics(OE) configfs(E) nls_ascii(E) nls_cp437(E) nls_cp850(E) msdos(E) efivarfs(E) nvme(OE) nvme_core(OE) virtio_net(E) net_failover(E) virtio_console(E) failover(E) mlxbf_tmfifo(OE) mlx_compat(OE) virtio(E) t10_pi(E) sbsa_gwdt(E) mlxbf_bootctl(OE) sdhci_of_dwcmshc(OE) virtio_ring(E)
> mmcblk0rpmb: mmc0:0001 Y29128 4.00 MiB, chardev (245:0)
> CPU: 8 PID: 240 Comm: kworker/8:1H Tainted: G OE 5.15.120.bsk.business.6-arm64 #5.15.120.bsk.business.6
> Hardware name: https://www.mellanox.com BlueField-3 DPU/BlueField-3 DPU, BIOS 4.9.2.13576 Mar 18 2025
> Workqueue: kblockd blk_mq_run_work_fn
> pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : mmc_start_request+0xb4/0xc4
> lr : mmc_start_request+0x68/0xc4
> sp : ffff80000932ba90
> x29: ffff80000932ba90 x28: ffff000081b3b308 x27: 0000000000000000
> x26: 0000000000000001 x25: 0000000000000000 x24: ffff000082aec000
> x23: ffff000082485800 x22: ffff000082aec000 x21: 0000000000000000
> x20: ffff000081b3b3d8 x19: ffff000082aec000 x18: 0000000000000000
> x17: 0000000000000000 x16: ffffbab212913c40 x15: 0000000000000000
> x14: 0000000000000000 x13: 0000000000000038 x12: ffff00008f06b000
> x11: 7f7f7f7f7f7f7f7f x10: ffffbab2144acbc8 x9 : ffffbab2130ad368
> x8 : ffff000081b3b548 x7 : ffff000082aec000 x6 : 0000000000000000
> x5 : ffff000081b3b458 x4 : ffff00008a4b5e80 x3 : ffff0000824859b0
> x2 : 0000000000000000 x1 : ffff000081b3b4c8 x0 : 0000000000000020
> Call trace:
> mmc_start_request+0xb4/0xc4
> mmc_blk_mq_issue_rq+0x310/0x8fc
> mmc_mq_queue_rq+0x154/0x3e0
> blk_mq_dispatch_rq_list+0x13c/0xa44
> blk_mq_do_dispatch_sched+0x2cc/0x33c
> __blk_mq_sched_dispatch_requests+0x154/0x1b0
> blk_mq_sched_dispatch_requests+0x40/0x80
> __blk_mq_run_hw_queue+0x58/0xa0
> blk_mq_run_work_fn+0x28/0x34
> process_one_work+0x1f8/0x4c0
> worker_thread+0x180/0x580
> kthread+0x128/0x13c
> kthread_return_to_user+0x0/0x10
> ---[ end trace fc3df73f08f7c8ee ]---
Not much to go on
>
> I agree the bitfield usage adds complexity. I can work on a follow-up
> to convert the retune-related flags to bools if that’s the preferred
> direction.
There are 2 suspect cases that I notice:
1. host->claimed in __mmc_claim_host()
The block driver allows more than 1 request to be inflight, which means
__mmc_claim_host() could itself overwrite other bitfields in a asynchronous
context if the host has alrady been claimed for the same ctx.
int __mmc_claim_host(struct mmc_host *host, struct mmc_ctx *ctx,
atomic_t *abort)
{
struct task_struct *task = ctx ? NULL : current;
DECLARE_WAITQUEUE(wait, current);
unsigned long flags;
int stop;
bool pm = false;
might_sleep();
add_wait_queue(&host->wq, &wait);
spin_lock_irqsave(&host->lock, flags);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
stop = abort ? atomic_read(abort) : 0;
if (stop || !host->claimed || mmc_ctx_matches(host, ctx, task))
break;
spin_unlock_irqrestore(&host->lock, flags);
schedule();
spin_lock_irqsave(&host->lock, flags);
}
set_current_state(TASK_RUNNING);
if (!stop) {
host->claimed = 1; <- overwrites other bitfields
mmc_ctx_set_claimer(host, ctx, task);
host->claim_cnt += 1;
if (host->claim_cnt == 1)
pm = true;
} else
wake_up(&host->wq);
spin_unlock_irqrestore(&host->lock, flags);
remove_wait_queue(&host->wq, &wait);
if (pm)
pm_runtime_get_sync(mmc_dev(host));
return stop;
}
2. host->retune_now in mmc_mq_queue_rq()
For the same reason as 1, host->retune_now update can overwrite other
bitfields in an asynchronous context.
if (host->cqe_enabled) {
host->retune_now = host->need_retune && cqe_retune_ok &&
!host->hold_retune;
}
I suggest changing bitfields to bool for the above 2 cases and also
in cases that are relatively difficult to fully understand:
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index e0e2c265e5d1..ba84f02c2a10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -486,14 +486,12 @@ struct mmc_host {
struct mmc_ios ios; /* current io bus settings */
+ bool claimed; /* host exclusively claimed */
+
/* group bitfields together to minimize padding */
unsigned int use_spi_crc:1;
- unsigned int claimed:1; /* host exclusively claimed */
unsigned int doing_init_tune:1; /* initial tuning in progress */
- unsigned int can_retune:1; /* re-tuning can be used */
unsigned int doing_retune:1; /* re-tuning in progress */
- unsigned int retune_now:1; /* do re-tuning at next req */
- unsigned int retune_paused:1; /* re-tuning is temporarily disabled */
unsigned int retune_crc_disable:1; /* don't trigger retune upon crc */
unsigned int can_dma_map_merge:1; /* merging can be used */
unsigned int vqmmc_enabled:1; /* vqmmc regulator is enabled */
@@ -508,6 +506,9 @@ struct mmc_host {
int rescan_disable; /* disable card detection */
int rescan_entered; /* used with nonremovable devices */
+ bool can_retune; /* re-tuning can be used */
+ bool retune_now; /* do re-tuning at next req */
+ bool retune_paused; /* re-tuning is temporarily disabled */
int need_retune; /* re-tuning is needed */
int hold_retune; /* hold off re-tuning */
unsigned int retune_period; /* re-tuning period in secs */
For which fixes tags could be:
Fixes: 6c0cedd1ef952 "mmc: core: Introduce host claiming by context"
Fixes: 1e8e55b67030c "mmc: block: Add CQE support"
>
> Thanks,
> Penghe
>
> On Mon, Jan 26, 2026 at 03:43:14PM +0200, Adrian Hunter wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 15/01/2026 23:46, Penghe Geng wrote:
>>> The host->claimed flag shares a bitfield storage word with several
>>> retune flags (retune_now, retune_paused, can_retune, doing_retune,
>>> doing_init_tune). Updating those flags without host->lock can RMW the
>>> shared word and clear claimed, triggering spurious
>>> WARN_ON(!host->claimed).
>>
>> Thanks for finding this!
>>
>> The design is that those members are protected by the host->claimed
>> lock itself.
>>
>> mmc operations are primarily single-threaded, protected by the
>> host->claimed lock, although the block driver does allow multiple
>> transfers at the same time in some cases.
>>
>> There are also other contexts like interrupt handlers.
>>
>> Can you provide some information about when WARN_ON(!host->claimed)
>> is being hit? Including the stack dump?
>> What kernel version?
>> Is it eMMC, SDIO or SD card?
>> Is CQE being used?
>> Are there any I/O errors happening also?
>> What controller driver is it?
>>
>> In any case, the use of bit fields seems to add complexity unnecessarily,
>> so we should consider converting some or all of them to bool.
>>
>>>
>>> Serialize all retune bitfield updates with host->lock. Provide lockless
>>> __mmc_retune_* helpers so callers that already hold host->lock can
>>> avoid deadlocks while public wrappers serialize updates. Also protect
>>> doing_init_tune and the CQE retune_now assignment with host->lock.
>>>
>>> Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Penghe Geng <pgeng@nvidia.com>
>>> ---
>>> drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
>>> drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
>>> drivers/mmc/core/mmc.c | 6 ++++
>>> drivers/mmc/core/queue.c | 3 ++
>>> include/linux/mmc/host.h | 4 +++
>>> 5 files changed, 94 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
>>> index 88c95dbfd9cf..0b6b4a31f629 100644
>>> --- a/drivers/mmc/core/host.c
>>> +++ b/drivers/mmc/core/host.c
>>> @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
>>> */
>>> void mmc_retune_enable(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->can_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> if (host->retune_period)
>>> mod_timer(&host->retune_timer,
>>> jiffies + host->retune_period * HZ);
>>> @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
>>> */
>>> void mmc_retune_pause(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (!host->retune_paused) {
>>> host->retune_paused = 1;
>>> - mmc_retune_hold(host);
>>> + __mmc_retune_hold(host);
>>> }
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>> EXPORT_SYMBOL(mmc_retune_pause);
>>>
>>> void mmc_retune_unpause(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> + bool released;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->retune_paused) {
>>> host->retune_paused = 0;
>>> - mmc_retune_release(host);
>>> + released = __mmc_retune_release(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> + if (!released)
>>> + WARN_ON(1);
>>> + } else {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>> }
>>> EXPORT_SYMBOL(mmc_retune_unpause);
>>> @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
>>> */
>>> void mmc_retune_disable(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> mmc_retune_unpause(host);
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->can_retune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> timer_delete_sync(&host->retune_timer);
>>> mmc_retune_clear(host);
>>> }
>>> @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
>>>
>>> void mmc_retune_hold(struct mmc_host *host)
>>> {
>>> - if (!host->hold_retune)
>>> - host->retune_now = 1;
>>> - host->hold_retune += 1;
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + __mmc_retune_hold(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> void mmc_retune_release(struct mmc_host *host)
>>> {
>>> - if (host->hold_retune)
>>> - host->hold_retune -= 1;
>>> - else
>>> + unsigned long flags;
>>> + bool released;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + released = __mmc_retune_release(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> + if (!released)
>>> WARN_ON(1);
>>> }
>>> EXPORT_SYMBOL(mmc_retune_release);
>>> @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
>>> {
>>> bool return_to_hs400 = false;
>>> int err;
>>> + unsigned long flags;
>>>
>>> - if (host->retune_now)
>>> - host->retune_now = 0;
>>> - else
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + if (!host->retune_now) {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> return 0;
>>> + }
>>> + host->retune_now = 0;
>>>
>>> - if (!host->need_retune || host->doing_retune || !host->card)
>>> + if (!host->need_retune || host->doing_retune || !host->card) {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> return 0;
>>> + }
>>>
>>> host->need_retune = 0;
>>> -
>>> host->doing_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> if (host->ios.timing == MMC_TIMING_MMC_HS400) {
>>> err = mmc_hs400_to_hs200(host->card);
>>> @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
>>> if (return_to_hs400)
>>> err = mmc_hs200_to_hs400(host->card);
>>> out:
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_retune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> return err;
>>> }
>>> diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
>>> index 5941d68ff989..07e4f427fe15 100644
>>> --- a/drivers/mmc/core/host.h
>>> +++ b/drivers/mmc/core/host.h
>>> @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
>>> void mmc_retune_pause(struct mmc_host *host);
>>> void mmc_retune_unpause(struct mmc_host *host);
>>>
>>> -static inline void mmc_retune_clear(struct mmc_host *host)
>>> +static inline void __mmc_retune_clear(struct mmc_host *host)
>>> {
>>> host->retune_now = 0;
>>> host->need_retune = 0;
>>> }
>>>
>>> +static inline void mmc_retune_clear(struct mmc_host *host)
>>> +{
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + __mmc_retune_clear(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> +}
>>> +
>>> +static inline void __mmc_retune_hold(struct mmc_host *host)
>>> +{
>>> + if (!host->hold_retune)
>>> + host->retune_now = 1;
>>> + host->hold_retune += 1;
>>> +}
>>> +
>>> +static inline bool __mmc_retune_release(struct mmc_host *host)
>>> +{
>>> + if (host->hold_retune) {
>>> + host->hold_retune -= 1;
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> static inline void mmc_retune_hold_now(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->retune_now = 0;
>>> host->hold_retune += 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline void mmc_retune_recheck(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->hold_retune <= 1)
>>> host->retune_now = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline int mmc_host_can_cmd23(struct mmc_host *host)
>>> diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
>>> index 7c86efb1044a..114febd15f08 100644
>>> --- a/drivers/mmc/core/mmc.c
>>> +++ b/drivers/mmc/core/mmc.c
>>> @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
>>> goto free_card;
>>>
>>> if (mmc_card_hs200(card)) {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_init_tune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> err = mmc_hs200_tuning(card);
>>> if (!err)
>>> err = mmc_select_hs400(card);
>>>
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_init_tune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> if (err)
>>> goto free_card;
>>> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
>>> index 284856c8f655..5e38759c87f5 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>>> enum mmc_issue_type issue_type;
>>> enum mmc_issued issued;
>>> bool get_card, cqe_retune_ok;
>>> + unsigned long flags;
>>> blk_status_t ret;
>>>
>>> if (mmc_card_removed(mq->card)) {
>>> @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>>> mmc_get_card(card, &mq->ctx);
>>>
>>> if (host->cqe_enabled) {
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->retune_now = host->need_retune && cqe_retune_ok &&
>>> !host->hold_retune;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> blk_mq_start_request(req);
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index e0e2c265e5d1..e7bddbafd1da 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
>>>
>>> static inline void mmc_retune_needed(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->can_retune)
>>> host->need_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline bool mmc_can_retune(struct mmc_host *host)
>>
>
> On Mon, Jan 26, 2026 at 03:43:14PM +0200, Adrian Hunter wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> On 15/01/2026 23:46, Penghe Geng wrote:
>>> The host->claimed flag shares a bitfield storage word with several
>>> retune flags (retune_now, retune_paused, can_retune, doing_retune,
>>> doing_init_tune). Updating those flags without host->lock can RMW the
>>> shared word and clear claimed, triggering spurious
>>> WARN_ON(!host->claimed).
>>
>> Thanks for finding this!
>>
>> The design is that those members are protected by the host->claimed
>> lock itself.
>>
>> mmc operations are primarily single-threaded, protected by the
>> host->claimed lock, although the block driver does allow multiple
>> transfers at the same time in some cases.
>>
>> There are also other contexts like interrupt handlers.
>>
>> Can you provide some information about when WARN_ON(!host->claimed)
>> is being hit? Including the stack dump?
>> What kernel version?
>> Is it eMMC, SDIO or SD card?
>> Is CQE being used?
>> Are there any I/O errors happening also?
>> What controller driver is it?
>>
>> In any case, the use of bit fields seems to add complexity unnecessarily,
>> so we should consider converting some or all of them to bool.
>>
>>>
>>> Serialize all retune bitfield updates with host->lock. Provide lockless
>>> __mmc_retune_* helpers so callers that already hold host->lock can
>>> avoid deadlocks while public wrappers serialize updates. Also protect
>>> doing_init_tune and the CQE retune_now assignment with host->lock.
>>>
>>> Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
>>> Cc: stable@vger.kernel.org
>>> Signed-off-by: Penghe Geng <pgeng@nvidia.com>
>>> ---
>>> drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
>>> drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
>>> drivers/mmc/core/mmc.c | 6 ++++
>>> drivers/mmc/core/queue.c | 3 ++
>>> include/linux/mmc/host.h | 4 +++
>>> 5 files changed, 94 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
>>> index 88c95dbfd9cf..0b6b4a31f629 100644
>>> --- a/drivers/mmc/core/host.c
>>> +++ b/drivers/mmc/core/host.c
>>> @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
>>> */
>>> void mmc_retune_enable(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->can_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> if (host->retune_period)
>>> mod_timer(&host->retune_timer,
>>> jiffies + host->retune_period * HZ);
>>> @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
>>> */
>>> void mmc_retune_pause(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (!host->retune_paused) {
>>> host->retune_paused = 1;
>>> - mmc_retune_hold(host);
>>> + __mmc_retune_hold(host);
>>> }
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>> EXPORT_SYMBOL(mmc_retune_pause);
>>>
>>> void mmc_retune_unpause(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> + bool released;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->retune_paused) {
>>> host->retune_paused = 0;
>>> - mmc_retune_release(host);
>>> + released = __mmc_retune_release(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> + if (!released)
>>> + WARN_ON(1);
>>> + } else {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>> }
>>> EXPORT_SYMBOL(mmc_retune_unpause);
>>> @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
>>> */
>>> void mmc_retune_disable(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> mmc_retune_unpause(host);
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->can_retune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> timer_delete_sync(&host->retune_timer);
>>> mmc_retune_clear(host);
>>> }
>>> @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
>>>
>>> void mmc_retune_hold(struct mmc_host *host)
>>> {
>>> - if (!host->hold_retune)
>>> - host->retune_now = 1;
>>> - host->hold_retune += 1;
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + __mmc_retune_hold(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> void mmc_retune_release(struct mmc_host *host)
>>> {
>>> - if (host->hold_retune)
>>> - host->hold_retune -= 1;
>>> - else
>>> + unsigned long flags;
>>> + bool released;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + released = __mmc_retune_release(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> + if (!released)
>>> WARN_ON(1);
>>> }
>>> EXPORT_SYMBOL(mmc_retune_release);
>>> @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
>>> {
>>> bool return_to_hs400 = false;
>>> int err;
>>> + unsigned long flags;
>>>
>>> - if (host->retune_now)
>>> - host->retune_now = 0;
>>> - else
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + if (!host->retune_now) {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> return 0;
>>> + }
>>> + host->retune_now = 0;
>>>
>>> - if (!host->need_retune || host->doing_retune || !host->card)
>>> + if (!host->need_retune || host->doing_retune || !host->card) {
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> return 0;
>>> + }
>>>
>>> host->need_retune = 0;
>>> -
>>> host->doing_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> if (host->ios.timing == MMC_TIMING_MMC_HS400) {
>>> err = mmc_hs400_to_hs200(host->card);
>>> @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
>>> if (return_to_hs400)
>>> err = mmc_hs200_to_hs400(host->card);
>>> out:
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_retune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> return err;
>>> }
>>> diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
>>> index 5941d68ff989..07e4f427fe15 100644
>>> --- a/drivers/mmc/core/host.h
>>> +++ b/drivers/mmc/core/host.h
>>> @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
>>> void mmc_retune_pause(struct mmc_host *host);
>>> void mmc_retune_unpause(struct mmc_host *host);
>>>
>>> -static inline void mmc_retune_clear(struct mmc_host *host)
>>> +static inline void __mmc_retune_clear(struct mmc_host *host)
>>> {
>>> host->retune_now = 0;
>>> host->need_retune = 0;
>>> }
>>>
>>> +static inline void mmc_retune_clear(struct mmc_host *host)
>>> +{
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> + __mmc_retune_clear(host);
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> +}
>>> +
>>> +static inline void __mmc_retune_hold(struct mmc_host *host)
>>> +{
>>> + if (!host->hold_retune)
>>> + host->retune_now = 1;
>>> + host->hold_retune += 1;
>>> +}
>>> +
>>> +static inline bool __mmc_retune_release(struct mmc_host *host)
>>> +{
>>> + if (host->hold_retune) {
>>> + host->hold_retune -= 1;
>>> + return true;
>>> + }
>>> + return false;
>>> +}
>>> +
>>> static inline void mmc_retune_hold_now(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->retune_now = 0;
>>> host->hold_retune += 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline void mmc_retune_recheck(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->hold_retune <= 1)
>>> host->retune_now = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline int mmc_host_can_cmd23(struct mmc_host *host)
>>> diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
>>> index 7c86efb1044a..114febd15f08 100644
>>> --- a/drivers/mmc/core/mmc.c
>>> +++ b/drivers/mmc/core/mmc.c
>>> @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
>>> goto free_card;
>>>
>>> if (mmc_card_hs200(card)) {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_init_tune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> err = mmc_hs200_tuning(card);
>>> if (!err)
>>> err = mmc_select_hs400(card);
>>>
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->doing_init_tune = 0;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>>
>>> if (err)
>>> goto free_card;
>>> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
>>> index 284856c8f655..5e38759c87f5 100644
>>> --- a/drivers/mmc/core/queue.c
>>> +++ b/drivers/mmc/core/queue.c
>>> @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>>> enum mmc_issue_type issue_type;
>>> enum mmc_issued issued;
>>> bool get_card, cqe_retune_ok;
>>> + unsigned long flags;
>>> blk_status_t ret;
>>>
>>> if (mmc_card_removed(mq->card)) {
>>> @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
>>> mmc_get_card(card, &mq->ctx);
>>>
>>> if (host->cqe_enabled) {
>>> + spin_lock_irqsave(&host->lock, flags);
>>> host->retune_now = host->need_retune && cqe_retune_ok &&
>>> !host->hold_retune;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> blk_mq_start_request(req);
>>> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
>>> index e0e2c265e5d1..e7bddbafd1da 100644
>>> --- a/include/linux/mmc/host.h
>>> +++ b/include/linux/mmc/host.h
>>> @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
>>>
>>> static inline void mmc_retune_needed(struct mmc_host *host)
>>> {
>>> + unsigned long flags;
>>> +
>>> + spin_lock_irqsave(&host->lock, flags);
>>> if (host->can_retune)
>>> host->need_retune = 1;
>>> + spin_unlock_irqrestore(&host->lock, flags);
>>> }
>>>
>>> static inline bool mmc_can_retune(struct mmc_host *host)
>>
+ Adrian
On Thu, 15 Jan 2026 at 22:46, Penghe Geng <pgeng@nvidia.com> wrote:
>
> The host->claimed flag shares a bitfield storage word with several
> retune flags (retune_now, retune_paused, can_retune, doing_retune,
> doing_init_tune). Updating those flags without host->lock can RMW the
> shared word and clear claimed, triggering spurious
> WARN_ON(!host->claimed).
>
> Serialize all retune bitfield updates with host->lock. Provide lockless
> __mmc_retune_* helpers so callers that already hold host->lock can
> avoid deadlocks while public wrappers serialize updates. Also protect
> doing_init_tune and the CQE retune_now assignment with host->lock.
>
> Fixes: dfa13ebbe334 ("mmc: host: Add facility to support re-tuning")
> Cc: stable@vger.kernel.org
> Signed-off-by: Penghe Geng <pgeng@nvidia.com>
Thanks for the patch! I have looped in Adrian to get his opinion on this.
Kind regards
Uffe
> ---
> drivers/mmc/core/host.c | 60 +++++++++++++++++++++++++++++++---------
> drivers/mmc/core/host.h | 35 ++++++++++++++++++++++-
> drivers/mmc/core/mmc.c | 6 ++++
> drivers/mmc/core/queue.c | 3 ++
> include/linux/mmc/host.h | 4 +++
> 5 files changed, 94 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
> index 88c95dbfd9cf..0b6b4a31f629 100644
> --- a/drivers/mmc/core/host.c
> +++ b/drivers/mmc/core/host.c
> @@ -109,7 +109,11 @@ void mmc_unregister_host_class(void)
> */
> void mmc_retune_enable(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->can_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> if (host->retune_period)
> mod_timer(&host->retune_timer,
> jiffies + host->retune_period * HZ);
> @@ -121,18 +125,31 @@ void mmc_retune_enable(struct mmc_host *host)
> */
> void mmc_retune_pause(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (!host->retune_paused) {
> host->retune_paused = 1;
> - mmc_retune_hold(host);
> + __mmc_retune_hold(host);
> }
> + spin_unlock_irqrestore(&host->lock, flags);
> }
> EXPORT_SYMBOL(mmc_retune_pause);
>
> void mmc_retune_unpause(struct mmc_host *host)
> {
> + unsigned long flags;
> + bool released;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->retune_paused) {
> host->retune_paused = 0;
> - mmc_retune_release(host);
> + released = __mmc_retune_release(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> + if (!released)
> + WARN_ON(1);
> + } else {
> + spin_unlock_irqrestore(&host->lock, flags);
> }
> }
> EXPORT_SYMBOL(mmc_retune_unpause);
> @@ -145,8 +162,12 @@ EXPORT_SYMBOL(mmc_retune_unpause);
> */
> void mmc_retune_disable(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> mmc_retune_unpause(host);
> + spin_lock_irqsave(&host->lock, flags);
> host->can_retune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
> timer_delete_sync(&host->retune_timer);
> mmc_retune_clear(host);
> }
> @@ -159,16 +180,22 @@ EXPORT_SYMBOL(mmc_retune_timer_stop);
>
> void mmc_retune_hold(struct mmc_host *host)
> {
> - if (!host->hold_retune)
> - host->retune_now = 1;
> - host->hold_retune += 1;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + __mmc_retune_hold(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> void mmc_retune_release(struct mmc_host *host)
> {
> - if (host->hold_retune)
> - host->hold_retune -= 1;
> - else
> + unsigned long flags;
> + bool released;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + released = __mmc_retune_release(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> + if (!released)
> WARN_ON(1);
> }
> EXPORT_SYMBOL(mmc_retune_release);
> @@ -177,18 +204,23 @@ int mmc_retune(struct mmc_host *host)
> {
> bool return_to_hs400 = false;
> int err;
> + unsigned long flags;
>
> - if (host->retune_now)
> - host->retune_now = 0;
> - else
> + spin_lock_irqsave(&host->lock, flags);
> + if (!host->retune_now) {
> + spin_unlock_irqrestore(&host->lock, flags);
> return 0;
> + }
> + host->retune_now = 0;
>
> - if (!host->need_retune || host->doing_retune || !host->card)
> + if (!host->need_retune || host->doing_retune || !host->card) {
> + spin_unlock_irqrestore(&host->lock, flags);
> return 0;
> + }
>
> host->need_retune = 0;
> -
> host->doing_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> if (host->ios.timing == MMC_TIMING_MMC_HS400) {
> err = mmc_hs400_to_hs200(host->card);
> @@ -205,7 +237,9 @@ int mmc_retune(struct mmc_host *host)
> if (return_to_hs400)
> err = mmc_hs200_to_hs400(host->card);
> out:
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_retune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> return err;
> }
> diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
> index 5941d68ff989..07e4f427fe15 100644
> --- a/drivers/mmc/core/host.h
> +++ b/drivers/mmc/core/host.h
> @@ -21,22 +21,55 @@ int mmc_retune(struct mmc_host *host);
> void mmc_retune_pause(struct mmc_host *host);
> void mmc_retune_unpause(struct mmc_host *host);
>
> -static inline void mmc_retune_clear(struct mmc_host *host)
> +static inline void __mmc_retune_clear(struct mmc_host *host)
> {
> host->retune_now = 0;
> host->need_retune = 0;
> }
>
> +static inline void mmc_retune_clear(struct mmc_host *host)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> + __mmc_retune_clear(host);
> + spin_unlock_irqrestore(&host->lock, flags);
> +}
> +
> +static inline void __mmc_retune_hold(struct mmc_host *host)
> +{
> + if (!host->hold_retune)
> + host->retune_now = 1;
> + host->hold_retune += 1;
> +}
> +
> +static inline bool __mmc_retune_release(struct mmc_host *host)
> +{
> + if (host->hold_retune) {
> + host->hold_retune -= 1;
> + return true;
> + }
> + return false;
> +}
> +
> static inline void mmc_retune_hold_now(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->retune_now = 0;
> host->hold_retune += 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline void mmc_retune_recheck(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->hold_retune <= 1)
> host->retune_now = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline int mmc_host_can_cmd23(struct mmc_host *host)
> diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
> index 7c86efb1044a..114febd15f08 100644
> --- a/drivers/mmc/core/mmc.c
> +++ b/drivers/mmc/core/mmc.c
> @@ -1820,13 +1820,19 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
> goto free_card;
>
> if (mmc_card_hs200(card)) {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_init_tune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> err = mmc_hs200_tuning(card);
> if (!err)
> err = mmc_select_hs400(card);
>
> + spin_lock_irqsave(&host->lock, flags);
> host->doing_init_tune = 0;
> + spin_unlock_irqrestore(&host->lock, flags);
>
> if (err)
> goto free_card;
> diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
> index 284856c8f655..5e38759c87f5 100644
> --- a/drivers/mmc/core/queue.c
> +++ b/drivers/mmc/core/queue.c
> @@ -237,6 +237,7 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> enum mmc_issue_type issue_type;
> enum mmc_issued issued;
> bool get_card, cqe_retune_ok;
> + unsigned long flags;
> blk_status_t ret;
>
> if (mmc_card_removed(mq->card)) {
> @@ -297,8 +298,10 @@ static blk_status_t mmc_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
> mmc_get_card(card, &mq->ctx);
>
> if (host->cqe_enabled) {
> + spin_lock_irqsave(&host->lock, flags);
> host->retune_now = host->need_retune && cqe_retune_ok &&
> !host->hold_retune;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> blk_mq_start_request(req);
> diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
> index e0e2c265e5d1..e7bddbafd1da 100644
> --- a/include/linux/mmc/host.h
> +++ b/include/linux/mmc/host.h
> @@ -713,8 +713,12 @@ void mmc_retune_timer_stop(struct mmc_host *host);
>
> static inline void mmc_retune_needed(struct mmc_host *host)
> {
> + unsigned long flags;
> +
> + spin_lock_irqsave(&host->lock, flags);
> if (host->can_retune)
> host->need_retune = 1;
> + spin_unlock_irqrestore(&host->lock, flags);
> }
>
> static inline bool mmc_can_retune(struct mmc_host *host)
> --
> 2.43.0
>
© 2016 - 2026 Red Hat, Inc.