fs/ceph/io: make ceph_start_io_*() killable

[PATCH] fs/ceph/io: make ceph_start_io_*() killable

Posted by Max Kellermann 1 year, 2 months ago

This allows killing processes that wait for a lock when one process is
stuck waiting for the Ceph server.  This is similar to the NFS commit
38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/file.c | 22 +++++++++++++---------
 fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
 fs/ceph/io.h   |  8 +++++---
 3 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4b8d59ebda00..d79c0774dc6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
 
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else
-		ceph_start_io_read(inode);
+	ret = direct_lock
+		? ceph_start_io_direct(inode)
+		: ceph_start_io_read(inode);
+	if (ret)
+		return ret;
 
 	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
 		want |= CEPH_CAP_FILE_CACHE;
@@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
 	    (fi->flags & CEPH_F_SYNC))
 		return copy_splice_read(in, ppos, pipe, len, flags);
 
-	ceph_start_io_read(inode);
+	ret = ceph_start_io_read(inode);
+	if (ret)
+		return ret;
 
 	want = CEPH_CAP_FILE_CACHE;
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		direct_lock = true;
 
 retry_snap:
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else
-		ceph_start_io_write(inode);
+	err = direct_lock
+		? ceph_start_io_direct(inode)
+		: ceph_start_io_write(inode);
+	if (err)
+		goto out_unlocked;
 
 	if (iocb->ki_flags & IOCB_APPEND) {
 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
index c456509b31c3..2735503bc479 100644
--- a/fs/ceph/io.c
+++ b/fs/ceph/io.c
@@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
  */
-void
+int
 ceph_start_io_read(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	err = down_read_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
+
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	err = down_write_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	ceph_block_o_direct(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+
+	return 0;
 }
 
 /**
@@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
  * Declare that a buffered write operation is about to start, and ensure
  * that we block all direct I/O.
  */
-void
+int
 ceph_start_io_write(struct inode *inode)
 {
-	down_write(&inode->i_rwsem);
-	ceph_block_o_direct(ceph_inode(inode), inode);
+	int err = down_write_killable(&inode->i_rwsem);
+	if (!err)
+		ceph_block_o_direct(ceph_inode(inode), inode);
+	return err;
 }
 
 /**
@@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
  */
-void
+int
 ceph_start_io_direct(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int err;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	err = down_read_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
+
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	err = down_write_killable(&inode->i_rwsem);
+	if (err)
+		return err;
+
 	ceph_block_buffered(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+
+	return 0;
 }
 
 /**
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
index fa594cd77348..08d58253f533 100644
--- a/fs/ceph/io.h
+++ b/fs/ceph/io.h
@@ -2,11 +2,13 @@
 #ifndef _FS_CEPH_IO_H
 #define _FS_CEPH_IO_H
 
-void ceph_start_io_read(struct inode *inode);
+#include <linux/compiler_attributes.h> // for __must_check
+
+__must_check int ceph_start_io_read(struct inode *inode);
 void ceph_end_io_read(struct inode *inode);
-void ceph_start_io_write(struct inode *inode);
+__must_check int ceph_start_io_write(struct inode *inode);
 void ceph_end_io_write(struct inode *inode);
-void ceph_start_io_direct(struct inode *inode);
+__must_check int ceph_start_io_direct(struct inode *inode);
 void ceph_end_io_direct(struct inode *inode);
 
 #endif /* FS_CEPH_IO_H */
-- 
2.45.2

Re: [PATCH] fs/ceph/io: make ceph_start_io_*() killable

Posted by Max Kellermann 8 months, 3 weeks ago

What happened to this patch submission? Similar patches were accepted
in NFS and VFS core.

On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
>
> This allows killing processes that wait for a lock when one process is
> stuck waiting for the Ceph server.  This is similar to the NFS commit
> 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
>
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
>  fs/ceph/file.c | 22 +++++++++++++---------
>  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
>  fs/ceph/io.h   |  8 +++++---
>  3 files changed, 51 insertions(+), 23 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 4b8d59ebda00..d79c0774dc6e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
>         if (ceph_inode_is_shutdown(inode))
>                 return -ESTALE;
>
> -       if (direct_lock)
> -               ceph_start_io_direct(inode);
> -       else
> -               ceph_start_io_read(inode);
> +       ret = direct_lock
> +               ? ceph_start_io_direct(inode)
> +               : ceph_start_io_read(inode);
> +       if (ret)
> +               return ret;
>
>         if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
>                 want |= CEPH_CAP_FILE_CACHE;
> @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
>             (fi->flags & CEPH_F_SYNC))
>                 return copy_splice_read(in, ppos, pipe, len, flags);
>
> -       ceph_start_io_read(inode);
> +       ret = ceph_start_io_read(inode);
> +       if (ret)
> +               return ret;
>
>         want = CEPH_CAP_FILE_CACHE;
>         if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>                 direct_lock = true;
>
>  retry_snap:
> -       if (direct_lock)
> -               ceph_start_io_direct(inode);
> -       else
> -               ceph_start_io_write(inode);
> +       err = direct_lock
> +               ? ceph_start_io_direct(inode)
> +               : ceph_start_io_write(inode);
> +       if (err)
> +               goto out_unlocked;
>
>         if (iocb->ki_flags & IOCB_APPEND) {
>                 err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> index c456509b31c3..2735503bc479 100644
> --- a/fs/ceph/io.c
> +++ b/fs/ceph/io.c
> @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
>   */
> -void
> +int
>  ceph_start_io_read(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       int err;
>
>         /* Be an optimist! */
> -       down_read(&inode->i_rwsem);
> +       err = down_read_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> -               return;
> +               return 0;
>         up_read(&inode->i_rwsem);
> +
>         /* Slow path.... */
> -       down_write(&inode->i_rwsem);
> +       err = down_write_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         ceph_block_o_direct(ci, inode);
>         downgrade_write(&inode->i_rwsem);
> +
> +       return 0;
>  }
>
>  /**
> @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
>   * Declare that a buffered write operation is about to start, and ensure
>   * that we block all direct I/O.
>   */
> -void
> +int
>  ceph_start_io_write(struct inode *inode)
>  {
> -       down_write(&inode->i_rwsem);
> -       ceph_block_o_direct(ceph_inode(inode), inode);
> +       int err = down_write_killable(&inode->i_rwsem);
> +       if (!err)
> +               ceph_block_o_direct(ceph_inode(inode), inode);
> +       return err;
>  }
>
>  /**
> @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
>   */
> -void
> +int
>  ceph_start_io_direct(struct inode *inode)
>  {
>         struct ceph_inode_info *ci = ceph_inode(inode);
> +       int err;
>
>         /* Be an optimist! */
> -       down_read(&inode->i_rwsem);
> +       err = down_read_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> -               return;
> +               return 0;
>         up_read(&inode->i_rwsem);
> +
>         /* Slow path.... */
> -       down_write(&inode->i_rwsem);
> +       err = down_write_killable(&inode->i_rwsem);
> +       if (err)
> +               return err;
> +
>         ceph_block_buffered(ci, inode);
>         downgrade_write(&inode->i_rwsem);
> +
> +       return 0;
>  }
>
>  /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index fa594cd77348..08d58253f533 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,13 @@
>  #ifndef _FS_CEPH_IO_H
>  #define _FS_CEPH_IO_H
>
> -void ceph_start_io_read(struct inode *inode);
> +#include <linux/compiler_attributes.h> // for __must_check
> +
> +__must_check int ceph_start_io_read(struct inode *inode);
>  void ceph_end_io_read(struct inode *inode);
> -void ceph_start_io_write(struct inode *inode);
> +__must_check int ceph_start_io_write(struct inode *inode);
>  void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +__must_check int ceph_start_io_direct(struct inode *inode);
>  void ceph_end_io_direct(struct inode *inode);
>
>  #endif /* FS_CEPH_IO_H */
> --
> 2.45.2
>

Re: [PATCH] fs/ceph/io: make ceph_start_io_*() killable

Posted by Ilya Dryomov 8 months ago

On Mon, May 19, 2025 at 12:15 PM Max Kellermann
<max.kellermann@ionos.com> wrote:
>
> What happened to this patch submission? Similar patches were accepted
> in NFS and VFS core.

Hi Slava,

Can you take another look?  It doesn't make sense to deviate from NFS
or other filesystems in this area.

Thanks,

                Ilya

>
> On Fri, Dec 6, 2024 at 5:50 PM Max Kellermann <max.kellermann@ionos.com> wrote:
> >
> > This allows killing processes that wait for a lock when one process is
> > stuck waiting for the Ceph server.  This is similar to the NFS commit
> > 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
> >
> > Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> > ---
> >  fs/ceph/file.c | 22 +++++++++++++---------
> >  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
> >  fs/ceph/io.h   |  8 +++++---
> >  3 files changed, 51 insertions(+), 23 deletions(-)
> >
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 4b8d59ebda00..d79c0774dc6e 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -2127,10 +2127,11 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
> >         if (ceph_inode_is_shutdown(inode))
> >                 return -ESTALE;
> >
> > -       if (direct_lock)
> > -               ceph_start_io_direct(inode);
> > -       else
> > -               ceph_start_io_read(inode);
> > +       ret = direct_lock
> > +               ? ceph_start_io_direct(inode)
> > +               : ceph_start_io_read(inode);
> > +       if (ret)
> > +               return ret;
> >
> >         if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
> >                 want |= CEPH_CAP_FILE_CACHE;
> > @@ -2283,7 +2284,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
> >             (fi->flags & CEPH_F_SYNC))
> >                 return copy_splice_read(in, ppos, pipe, len, flags);
> >
> > -       ceph_start_io_read(inode);
> > +       ret = ceph_start_io_read(inode);
> > +       if (ret)
> > +               return ret;
> >
> >         want = CEPH_CAP_FILE_CACHE;
> >         if (fi->fmode & CEPH_FILE_MODE_LAZY)
> > @@ -2362,10 +2365,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >                 direct_lock = true;
> >
> >  retry_snap:
> > -       if (direct_lock)
> > -               ceph_start_io_direct(inode);
> > -       else
> > -               ceph_start_io_write(inode);
> > +       err = direct_lock
> > +               ? ceph_start_io_direct(inode)
> > +               : ceph_start_io_write(inode);
> > +       if (err)
> > +               goto out_unlocked;
> >
> >         if (iocb->ki_flags & IOCB_APPEND) {
> >                 err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
> > diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> > index c456509b31c3..2735503bc479 100644
> > --- a/fs/ceph/io.c
> > +++ b/fs/ceph/io.c
> > @@ -47,20 +47,30 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
> >   * Note that buffered writes and truncates both take a write lock on
> >   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
> >   */
> > -void
> > +int
> >  ceph_start_io_read(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       int err;
> >
> >         /* Be an optimist! */
> > -       down_read(&inode->i_rwsem);
> > +       err = down_read_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> > -               return;
> > +               return 0;
> >         up_read(&inode->i_rwsem);
> > +
> >         /* Slow path.... */
> > -       down_write(&inode->i_rwsem);
> > +       err = down_write_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         ceph_block_o_direct(ci, inode);
> >         downgrade_write(&inode->i_rwsem);
> > +
> > +       return 0;
> >  }
> >
> >  /**
> > @@ -83,11 +93,13 @@ ceph_end_io_read(struct inode *inode)
> >   * Declare that a buffered write operation is about to start, and ensure
> >   * that we block all direct I/O.
> >   */
> > -void
> > +int
> >  ceph_start_io_write(struct inode *inode)
> >  {
> > -       down_write(&inode->i_rwsem);
> > -       ceph_block_o_direct(ceph_inode(inode), inode);
> > +       int err = down_write_killable(&inode->i_rwsem);
> > +       if (!err)
> > +               ceph_block_o_direct(ceph_inode(inode), inode);
> > +       return err;
> >  }
> >
> >  /**
> > @@ -133,20 +145,30 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
> >   * Note that buffered writes and truncates both take a write lock on
> >   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
> >   */
> > -void
> > +int
> >  ceph_start_io_direct(struct inode *inode)
> >  {
> >         struct ceph_inode_info *ci = ceph_inode(inode);
> > +       int err;
> >
> >         /* Be an optimist! */
> > -       down_read(&inode->i_rwsem);
> > +       err = down_read_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> > -               return;
> > +               return 0;
> >         up_read(&inode->i_rwsem);
> > +
> >         /* Slow path.... */
> > -       down_write(&inode->i_rwsem);
> > +       err = down_write_killable(&inode->i_rwsem);
> > +       if (err)
> > +               return err;
> > +
> >         ceph_block_buffered(ci, inode);
> >         downgrade_write(&inode->i_rwsem);
> > +
> > +       return 0;
> >  }
> >
> >  /**
> > diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> > index fa594cd77348..08d58253f533 100644
> > --- a/fs/ceph/io.h
> > +++ b/fs/ceph/io.h
> > @@ -2,11 +2,13 @@
> >  #ifndef _FS_CEPH_IO_H
> >  #define _FS_CEPH_IO_H
> >
> > -void ceph_start_io_read(struct inode *inode);
> > +#include <linux/compiler_attributes.h> // for __must_check
> > +
> > +__must_check int ceph_start_io_read(struct inode *inode);
> >  void ceph_end_io_read(struct inode *inode);
> > -void ceph_start_io_write(struct inode *inode);
> > +__must_check int ceph_start_io_write(struct inode *inode);
> >  void ceph_end_io_write(struct inode *inode);
> > -void ceph_start_io_direct(struct inode *inode);
> > +__must_check int ceph_start_io_direct(struct inode *inode);
> >  void ceph_end_io_direct(struct inode *inode);
> >
> >  #endif /* FS_CEPH_IO_H */
> > --
> > 2.45.2
> >

Re: [PATCH] fs/ceph/io: make ceph_start_io_*() killable

Posted by Viacheslav Dubeyko 1 year, 2 months ago

On Fri, 2024-12-06 at 17:50 +0100, Max Kellermann wrote:
> This allows killing processes that wait for a lock when one process
> is
> stuck waiting for the Ceph server.  This is similar to the NFS commit
> 38a125b31504 ("fs/nfs/io: make nfs_start_io_*() killable").
> 
> Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
> ---
>  fs/ceph/file.c | 22 +++++++++++++---------
>  fs/ceph/io.c   | 44 +++++++++++++++++++++++++++++++++-----------
>  fs/ceph/io.h   |  8 +++++---
>  3 files changed, 51 insertions(+), 23 deletions(-)
> 
> 

<skipped>

>  
>  /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index fa594cd77348..08d58253f533 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,13 @@
>  #ifndef _FS_CEPH_IO_H
>  #define _FS_CEPH_IO_H
>  
> -void ceph_start_io_read(struct inode *inode);
> +#include <linux/compiler_attributes.h> // for __must_check

Do we really need this comment (for __must_check)? It looks like not
very informative. What do you think?

I am not completely sure that it really needs to request compiler to
check that return value is processed. Do we really need to enforce it?

Thanks,
Slava.

> +
> +__must_check int ceph_start_io_read(struct inode *inode);
>  void ceph_end_io_read(struct inode *inode);
> -void ceph_start_io_write(struct inode *inode);
> +__must_check int ceph_start_io_write(struct inode *inode);
>  void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +__must_check int ceph_start_io_direct(struct inode *inode);
>  void ceph_end_io_direct(struct inode *inode);
>  
>  #endif /* FS_CEPH_IO_H */

Re: [PATCH] fs/ceph/io: make ceph_start_io_*() killable

Posted by Max Kellermann 1 year, 2 months ago

On Fri, Dec 6, 2024 at 6:40 PM Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> wrote:
> Do we really need this comment (for __must_check)? It looks like not
> very informative. What do you think?

That's a question of taste. For my taste, such comments are (not
needed but) helpful; many similar comments exist in the Linux kernel.

> I am not completely sure that it really needs to request compiler to
> check that return value is processed. Do we really need to enforce it?

Yes, should definitely be enforced. Callers which don't check the
return value are 100% buggy.