fs/ceph/file.c | 22 +++++++++++++--------- fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++----------- fs/ceph/io.h | 8 +++++--- 3 files changed, 51 insertions(+), 23 deletions(-)
This allows killing processes that wait for a lock when one process is
stuck waiting for the Ceph server. This is similar to the NFS commit
38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
fs/ceph/file.c | 22 +++++++++++++---------
fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++-----------
fs/ceph/io.h | 8 +++++---
3 files changed, 51 insertions(+), 23 deletions(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4b8d59ebda00..d79c0774dc6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
- if (direct_lock)
- ceph_start_io_direct(inode);
- else
- ceph_start_io_read(inode);
+ ret = direct_lock
+ ? ceph_start_io_direct(inode)
+ : ceph_start_io_read(inode);
+ if (ret)
+ return ret;
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
want |= CEPH_CAP_FILE_CACHE;
@@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
(fi->flags & CEPH_F_SYNC))
return copy_splice_read(in, ppos, pipe, len, flags);
- ceph_start_io_read(inode);
+ ret = ceph_start_io_read(inode);
+ if (ret)
+ return ret;
want = CEPH_CAP_FILE_CACHE;
if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
direct_lock = true;
retry_snap:
- if (direct_lock)
- ceph_start_io_direct(inode);
- else
- ceph_start_io_write(inode);
+ err = direct_lock
+ ? ceph_start_io_direct(inode)
+ : ceph_start_io_write(inode);
+ if (err)
+ goto out_unlocked;
if (iocb->ki_flags & IOCB_APPEND) {
err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
index c456509b31c3..2735503bc479 100644
--- a/fs/ceph/io.c
+++ b/fs/ceph/io.c
@@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/
-void
+int
ceph_start_io_read(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ int err;
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
+
if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
+
ceph_block_o_direct(ci, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
@@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
* Declare that a buffered write operation is about to start, and ensure
* that we block all direct I/O.
*/
-void
+int
ceph_start_io_write(struct inode *inode)
{
- down_write(&inode->i_rwsem);
- ceph_block_o_direct(ceph_inode(inode), inode);
+ int err = down_write_killable(&inode->i_rwsem);
+ if (!err)
+ ceph_block_o_direct(ceph_inode(inode), inode);
+ return err;
}
/**
@@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/
-void
+int
ceph_start_io_direct(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ int err;
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
+
if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
+
ceph_block_buffered(ci, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
index fa594cd77348..08d58253f533 100644
--- a/fs/ceph/io.h
+++ b/fs/ceph/io.h
@@ -2,11 +2,13 @@
#ifndef _FS_CEPH_IO_H
#define _FS_CEPH_IO_H
-void ceph_start_io_read(struct inode *inode);
+#include <linux/compiler_attributes.h> // for __must_check
+
+__must_check int ceph_start_io_read(struct inode *inode);
void ceph_end_io_read(struct inode *inode);
-void ceph_start_io_write(struct inode *inode);
+__must_check int ceph_start_io_write(struct inode *inode);
void ceph_end_io_write(struct inode *inode);
-void ceph_start_io_direct(struct inode *inode);
+__must_check int ceph_start_io_direct(struct inode *inode);
void ceph_end_io_direct(struct inode *inode);
#endif /* FS_CEPH_IO_H */
--
2.45.2
What happened to this patch submission? Similar patches were accepted
in NFS and VFS core.
On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
>
> This allows killing processes that wait for a lock when one process is
> stuck waiting for the Ceph server. This is similar to the NFS commit
> 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
> fs/ceph/file.c | 22 +++++++++++++---------
> fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++-----------
> fs/ceph/io.h | 8 +++++---
> 3 files changed, 51 insertions(+), 23 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 4b8d59ebda00..d79c0774dc6e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> if (ceph_inode_is_shutdown(inode))
> return -ESTALE;
>
> - if (direct_lock)
> - ceph_start_io_direct(inode);
> - else
> - ceph_start_io_read(inode);
> + ret = direct_lock
> + ? ceph_start_io_direct(inode)
> + : ceph_start_io_read(inode);
> + if (ret)
> + return ret;
>
> if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> want |= CEPH_CAP_FILE_CACHE;
> @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> (fi->flags & CEPH_F_SYNC))
> return copy_splice_read(in, ppos, pipe, len, flags);
>
> - ceph_start_io_read(inode);
> + ret = ceph_start_io_read(inode);
> + if (ret)
> + return ret;
>
> want = CEPH_CAP_FILE_CACHE;
> if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> direct_lock = true;
>
> retry_snap:
> - if (direct_lock)
> - ceph_start_io_direct(inode);
> - else
> - ceph_start_io_write(inode);
> + err = direct_lock
> + ? ceph_start_io_direct(inode)
> + : ceph_start_io_write(inode);
> + if (err)
> + goto out_unlocked;
>
> if (iocb->ki_flags & IOCB_APPEND) {
> err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> index c456509b31c3..2735503bc479 100644
> --- a/fs/ceph/io.c
> +++ b/fs/ceph/io.c
> @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> * Note that buffered writes and truncates both take a write lock on
> * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> */
> -void
> +int
> ceph_start_io_read(struct inode *inode)
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> + int err;
>
> /* Be an optimist! */
> - down_read(&inode->i_rwsem);
> + err = down_read_killable(&inode->i_rwsem);
> + if (err)
> + return err;
> +
> if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> - return;
> + return 0;
> up_read(&inode->i_rwsem);
> +
> /* Slow path.... */
> - down_write(&inode->i_rwsem);
> + err = down_write_killable(&inode->i_rwsem);
> + if (err)
> + return err;
> +
> ceph_block_o_direct(ci, inode);
> downgrade_write(&inode->i_rwsem);
> +
> + return 0;
> }
>
> /**
> @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
> * Declare that a buffered write operation is about to start, and ensure
> * that we block all direct I/O.
> */
> -void
> +int
> ceph_start_io_write(struct inode *inode)
> {
> - down_write(&inode->i_rwsem);
> - ceph_block_o_direct(ceph_inode(inode), inode);
> + int err = down_write_killable(&inode->i_rwsem);
> + if (!err)
> + ceph_block_o_direct(ceph_inode(inode), inode);
> + return err;
> }
>
> /**
> @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> * Note that buffered writes and truncates both take a write lock on
> * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> */
> -void
> +int
> ceph_start_io_direct(struct inode *inode)
> {
> struct ceph_inode_info *ci = ceph_inode(inode);
> + int err;
>
> /* Be an optimist! */
> - down_read(&inode->i_rwsem);
> + err = down_read_killable(&inode->i_rwsem);
> + if (err)
> + return err;
> +
> if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> - return;
> + return 0;
> up_read(&inode->i_rwsem);
> +
> /* Slow path.... */
> - down_write(&inode->i_rwsem);
> + err = down_write_killable(&inode->i_rwsem);
> + if (err)
> + return err;
> +
> ceph_block_buffered(ci, inode);
> downgrade_write(&inode->i_rwsem);
> +
> + return 0;
> }
>
> /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index fa594cd77348..08d58253f533 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,13 @@
> #ifndef _FS_CEPH_IO_H
> #define _FS_CEPH_IO_H
>
> -void ceph_start_io_read(struct inode *inode);
> +#include <linux/compiler_attributes.h> // for __must_check
> +
> +__must_check int ceph_start_io_read(struct inode *inode);
> void ceph_end_io_read(struct inode *inode);
> -void ceph_start_io_write(struct inode *inode);
> +__must_check int ceph_start_io_write(struct inode *inode);
> void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +__must_check int ceph_start_io_direct(struct inode *inode);
> void ceph_end_io_direct(struct inode *inode);
>
> #endif /* FS_CEPH_IO_H */
> --
> 2.45.2
>
On Mon, May 19, 2025 at 12:15 PM Max Kellermann
<max.kellermann@ionos.com> wrote:
>
> What happened to this patch submission? Similar patches were accepted
> in NFS and VFS core.
Hi Slava,
Can you take another look? It doesn't make sense to deviate from NFS
or other filesystems in this area.
Thanks,
Ilya
>
> On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> >
> > This allows killing processes that wait for a lock when one process is
> > stuck waiting for the Ceph server. This is similar to the NFS commit
> > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
> >
> > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > ---
> > fs/ceph/file.c | 22 +++++++++++++---------
> > fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++-----------
> > fs/ceph/io.h | 8 +++++---
> > 3 files changed, 51 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 4b8d59ebda00..d79c0774dc6e 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> > if (ceph_inode_is_shutdown(inode))
> > return -ESTALE;
> >
> > - if (direct_lock)
> > - ceph_start_io_direct(inode);
> > - else
> > - ceph_start_io_read(inode);
> > + ret = direct_lock
> > + ? ceph_start_io_direct(inode)
> > + : ceph_start_io_read(inode);
> > + if (ret)
> > + return ret;
> >
> > if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> > want |= CEPH_CAP_FILE_CACHE;
> > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> > (fi->flags & CEPH_F_SYNC))
> > return copy_splice_read(in, ppos, pipe, len, flags);
> >
> > - ceph_start_io_read(inode);
> > + ret = ceph_start_io_read(inode);
> > + if (ret)
> > + return ret;
> >
> > want = CEPH_CAP_FILE_CACHE;
> > if (fi->fmode & CEPH_FILE_MODE_LAZY)
> > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> > direct_lock = true;
> >
> > retry_snap:
> > - if (direct_lock)
> > - ceph_start_io_direct(inode);
> > - else
> > - ceph_start_io_write(inode);
> > + err = direct_lock
> > + ? ceph_start_io_direct(inode)
> > + : ceph_start_io_write(inode);
> > + if (err)
> > + goto out_unlocked;
> >
> > if (iocb->ki_flags & IOCB_APPEND) {
> > err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> > diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> > index c456509b31c3..2735503bc479 100644
> > --- a/fs/ceph/io.c
> > +++ b/fs/ceph/io.c
> > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> > * Note that buffered writes and truncates both take a write lock on
> > * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> > */
> > -void
> > +int
> > ceph_start_io_read(struct inode *inode)
> > {
> > struct ceph_inode_info *ci = ceph_inode(inode);
> > + int err;
> >
> > /* Be an optimist! */
> > - down_read(&inode->i_rwsem);
> > + err = down_read_killable(&inode->i_rwsem);
> > + if (err)
> > + return err;
> > +
> > if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> > - return;
> > + return 0;
> > up_read(&inode->i_rwsem);
> > +
> > /* Slow path.... */
> > - down_write(&inode->i_rwsem);
> > + err = down_write_killable(&inode->i_rwsem);
> > + if (err)
> > + return err;
> > +
> > ceph_block_o_direct(ci, inode);
> > downgrade_write(&inode->i_rwsem);
> > +
> > + return 0;
> > }
> >
> > /**
> > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
> > * Declare that a buffered write operation is about to start, and ensure
> > * that we block all direct I/O.
> > */
> > -void
> > +int
> > ceph_start_io_write(struct inode *inode)
> > {
> > - down_write(&inode->i_rwsem);
> > - ceph_block_o_direct(ceph_inode(inode), inode);
> > + int err = down_write_killable(&inode->i_rwsem);
> > + if (!err)
> > + ceph_block_o_direct(ceph_inode(inode), inode);
> > + return err;
> > }
> >
> > /**
> > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> > * Note that buffered writes and truncates both take a write lock on
> > * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> > */
> > -void
> > +int
> > ceph_start_io_direct(struct inode *inode)
> > {
> > struct ceph_inode_info *ci = ceph_inode(inode);
> > + int err;
> >
> > /* Be an optimist! */
> > - down_read(&inode->i_rwsem);
> > + err = down_read_killable(&inode->i_rwsem);
> > + if (err)
> > + return err;
> > +
> > if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> > - return;
> > + return 0;
> > up_read(&inode->i_rwsem);
> > +
> > /* Slow path.... */
> > - down_write(&inode->i_rwsem);
> > + err = down_write_killable(&inode->i_rwsem);
> > + if (err)
> > + return err;
> > +
> > ceph_block_buffered(ci, inode);
> > downgrade_write(&inode->i_rwsem);
> > +
> > + return 0;
> > }
> >
> > /**
> > diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> > index fa594cd77348..08d58253f533 100644
> > --- a/fs/ceph/io.h
> > +++ b/fs/ceph/io.h
> > @@ -2,11 +2,13 @@
> > #ifndef _FS_CEPH_IO_H
> > #define _FS_CEPH_IO_H
> >
> > -void ceph_start_io_read(struct inode *inode);
> > +#include <linux/compiler_attributes.h> // for __must_check
> > +
> > +__must_check int ceph_start_io_read(struct inode *inode);
> > void ceph_end_io_read(struct inode *inode);
> > -void ceph_start_io_write(struct inode *inode);
> > +__must_check int ceph_start_io_write(struct inode *inode);
> > void ceph_end_io_write(struct inode *inode);
> > -void ceph_start_io_direct(struct inode *inode);
> > +__must_check int ceph_start_io_direct(struct inode *inode);
> > void ceph_end_io_direct(struct inode *inode);
> >
> > #endif /* FS_CEPH_IO_H */
> > --
> > 2.45.2
> >
On Fri, 2024-12-06 at 17:50 +0100, Max Kellermann wrote:
> This allows killing processes that wait for a lock when one process
> is
> stuck waiting for the Ceph server. This is similar to the NFS commit
> 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
> fs/ceph/file.c | 22 +++++++++++++---------
> fs/ceph/io.c | 44 +++++++++++++++++++++++++++++++++-----------
> fs/ceph/io.h | 8 +++++---
> 3 files changed, 51 insertions(+), 23 deletions(-)
>
>
<skipped>
>
> /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index fa594cd77348..08d58253f533 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,13 @@
> #ifndef _FS_CEPH_IO_H
> #define _FS_CEPH_IO_H
>
> -void ceph_start_io_read(struct inode *inode);
> +#include <linux/compiler_attributes.h> // for __must_check
Do we really need this comment (for __must_check)? It looks like not
very informative. What do you think?
I am not completely sure that it really needs to request compiler to
check that return value is processed. Do we really need to enforce it?
Thanks,
Slava.
> +
> +__must_check int ceph_start_io_read(struct inode *inode);
> void ceph_end_io_read(struct inode *inode);
> -void ceph_start_io_write(struct inode *inode);
> +__must_check int ceph_start_io_write(struct inode *inode);
> void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +__must_check int ceph_start_io_direct(struct inode *inode);
> void ceph_end_io_direct(struct inode *inode);
>
> #endif /* FS_CEPH_IO_H */
On Fri, Dec 6, 2024 at 6:40 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote: > Do we really need this comment (for __must_check)? It looks like not > very informative. What do you think? That's a question of taste. For my taste, such comments are (not needed but) helpful; many similar comments exist in the Linux kernel. > I am not completely sure that it really needs to request compiler to > check that return value is processed. Do we really need to enforce it? Yes, should definitely be enforced. Callers which don't check the return value are 100% buggy.
© 2016 - 2025 Red Hat, Inc.