fs/ceph/mds_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
This MDS reconnect error path leaves s_cap_reconnect set.
send_mds_reconnect() sets the bit at the beginning of the reconnect,
but the first failing operation after that, ceph_pagelist_encode_32(),
can jump to `fail:` without clearing it.
__ceph_remove_cap() consults that flag to decide whether cap releases
should be queued. A reconnect-preparation failure therefore leaves the
session in reconnect mode from the cap-release path's point of view
and can strand release work until some later state transition repairs
it.
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
fs/ceph/mds_client.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index b1746273f186..4fa471d9b3b2 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4956,7 +4956,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
/* placeholder for nr_caps */
err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
if (err)
- goto fail;
+ goto fail_clear_cap_reconnect;
if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
recon_state.msg_version = 3;
@@ -5046,6 +5046,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
ceph_pagelist_release(recon_state.pagelist);
return;
+fail_clear_cap_reconnect:
+ spin_lock(&session->s_cap_lock);
+ session->s_cap_reconnect = 0;
+ spin_unlock(&session->s_cap_lock);
fail:
ceph_msg_put(reply);
up_read(&mdsc->snap_rwsem);
--
2.47.3
On Mon, 2026-03-30 at 10:43 +0200, Max Kellermann wrote:
> This MDS reconnect error path leaves s_cap_reconnect set.
> send_mds_reconnect() sets the bit at the beginning of the reconnect,
> but the first failing operation after that, ceph_pagelist_encode_32(),
> can jump to `fail:` without clearing it.
>
> __ceph_remove_cap() consults that flag to decide whether cap releases
> should be queued. A reconnect-preparation failure therefore leaves the
> session in reconnect mode from the cap-release path's point of view
> and can strand release work until some later state transition repairs
> it.
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
> fs/ceph/mds_client.c | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index b1746273f186..4fa471d9b3b2 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -4956,7 +4956,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> /* placeholder for nr_caps */
> err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
> if (err)
> - goto fail;
> + goto fail_clear_cap_reconnect;
>
> if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
> recon_state.msg_version = 3;
> @@ -5046,6 +5046,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> ceph_pagelist_release(recon_state.pagelist);
> return;
>
> +fail_clear_cap_reconnect:
> + spin_lock(&session->s_cap_lock);
> + session->s_cap_reconnect = 0;
> + spin_unlock(&session->s_cap_lock);
> fail:
> ceph_msg_put(reply);
> up_read(&mdsc->snap_rwsem);
Makes sense. Nice fix. But, maybe, we need to consider refactoring of this
function because of such issue.
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Thanks,
Slava.
On Mon, 2026-03-30 at 17:29 +0000, Viacheslav Dubeyko wrote:
> On Mon, 2026-03-30 at 10:43 +0200, Max Kellermann wrote:
> > This MDS reconnect error path leaves s_cap_reconnect set.
> > send_mds_reconnect() sets the bit at the beginning of the reconnect,
> > but the first failing operation after that, ceph_pagelist_encode_32(),
> > can jump to `fail:` without clearing it.
> >
> > __ceph_remove_cap() consults that flag to decide whether cap releases
> > should be queued. A reconnect-preparation failure therefore leaves the
> > session in reconnect mode from the cap-release path's point of view
> > and can strand release work until some later state transition repairs
> > it.
> >
> > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > ---
> > fs/ceph/mds_client.c | 6 +++++-
> > 1 file changed, 5 insertions(+), 1 deletion(-)
> >
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index b1746273f186..4fa471d9b3b2 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -4956,7 +4956,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> > /* placeholder for nr_caps */
> > err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
> > if (err)
> > - goto fail;
> > + goto fail_clear_cap_reconnect;
> >
> > if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
> > recon_state.msg_version = 3;
> > @@ -5046,6 +5046,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> > ceph_pagelist_release(recon_state.pagelist);
> > return;
> >
> > +fail_clear_cap_reconnect:
> > + spin_lock(&session->s_cap_lock);
> > + session->s_cap_reconnect = 0;
> > + spin_unlock(&session->s_cap_lock);
> > fail:
> > ceph_msg_put(reply);
> > up_read(&mdsc->snap_rwsem);
>
> Makes sense. Nice fix. But, maybe, we need to consider refactoring of this
> function because of such issue.
>
> Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
>
>
The xfstests run on 7.0-rc6 didn't reveal any new issues related to the patch.
Tested-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
Thanks,
Slava.
On Thu, 2026-04-02 at 18:12 +0000, Viacheslav Dubeyko wrote:
> On Mon, 2026-03-30 at 17:29 +0000, Viacheslav Dubeyko wrote:
> > On Mon, 2026-03-30 at 10:43 +0200, Max Kellermann wrote:
> > > This MDS reconnect error path leaves s_cap_reconnect set.
> > > send_mds_reconnect() sets the bit at the beginning of the reconnect,
> > > but the first failing operation after that, ceph_pagelist_encode_32(),
> > > can jump to `fail:` without clearing it.
> > >
> > > __ceph_remove_cap() consults that flag to decide whether cap releases
> > > should be queued. A reconnect-preparation failure therefore leaves the
> > > session in reconnect mode from the cap-release path's point of view
> > > and can strand release work until some later state transition repairs
> > > it.
> > >
> > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > > ---
> > > fs/ceph/mds_client.c | 6 +++++-
> > > 1 file changed, 5 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index b1746273f186..4fa471d9b3b2 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -4956,7 +4956,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> > > /* placeholder for nr_caps */
> > > err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
> > > if (err)
> > > - goto fail;
> > > + goto fail_clear_cap_reconnect;
> > >
> > > if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
> > > recon_state.msg_version = 3;
> > > @@ -5046,6 +5046,10 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
> > > ceph_pagelist_release(recon_state.pagelist);
> > > return;
> > >
> > > +fail_clear_cap_reconnect:
> > > + spin_lock(&session->s_cap_lock);
> > > + session->s_cap_reconnect = 0;
> > > + spin_unlock(&session->s_cap_lock);
> > > fail:
> > > ceph_msg_put(reply);
> > > up_read(&mdsc->snap_rwsem);
> >
> > Makes sense. Nice fix. But, maybe, we need to consider refactoring of this
> > function because of such issue.
> >
> > Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
> >
> >
>
> The xfstests run on 7.0-rc6 didn't reveal any new issues related to the patch.
>
> Tested-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
>
Applied on testing branch of CephFS kernel client.
Thanks,
Slava.
© 2016 - 2026 Red Hat, Inc.