[PATCH v1 09/13] ceph: make remaining I/O lock functions killable

Ionut Nechita (Wind River) posted 13 patches 3 weeks, 5 days ago
[PATCH v1 09/13] ceph: make remaining I/O lock functions killable
Posted by Ionut Nechita (Wind River) 3 weeks, 5 days ago
From: Ionut Nechita <ionut.nechita@windriver.com>

Following the same pattern as ceph_start_io_write(), make
ceph_start_io_read() and ceph_start_io_direct() killable to
prevent indefinite hangs when waiting for i_rwsem during
MDS/OSD unavailability.

This completes the killable lock conversion for all ceph I/O
start functions, allowing blocked processes to be terminated
with SIGKILL instead of hanging indefinitely.

Signed-off-by: Ionut Nechita <ionut.nechita@windriver.com>
---
 fs/ceph/file.c | 27 +++++++++++++++++++--------
 fs/ceph/io.c   | 28 ++++++++++++++++++++--------
 fs/ceph/io.h   |  4 ++--
 3 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 01e4f31b1f2f3..c828552d51920 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2122,10 +2122,15 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	if (ceph_inode_is_shutdown(inode))
 		return -ESTALE;
 
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else
-		ceph_start_io_read(inode);
+	if (direct_lock) {
+		ret = ceph_start_io_direct(inode);
+		if (ret)
+			return ret;
+	} else {
+		ret = ceph_start_io_read(inode);
+		if (ret)
+			return ret;
+	}
 
 	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
 		want |= CEPH_CAP_FILE_CACHE;
@@ -2278,7 +2283,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
 	    (fi->flags & CEPH_F_SYNC))
 		return copy_splice_read(in, ppos, pipe, len, flags);
 
-	ceph_start_io_read(inode);
+	ret = ceph_start_io_read(inode);
+	if (ret)
+		return ret;
 
 	want = CEPH_CAP_FILE_CACHE;
 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
@@ -2357,9 +2364,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		direct_lock = true;
 
 retry_snap:
-	if (direct_lock)
-		ceph_start_io_direct(inode);
-	else {
+	if (direct_lock) {
+		err = ceph_start_io_direct(inode);
+		if (err) {
+			ceph_free_cap_flush(prealloc_cf);
+			return err;
+		}
+	} else {
 		err = ceph_start_io_write(inode);
 		if (err) {
 			ceph_free_cap_flush(prealloc_cf);
diff --git a/fs/ceph/io.c b/fs/ceph/io.c
index f9ac89ec1d6a1..7bd57de2d9681 100644
--- a/fs/ceph/io.c
+++ b/fs/ceph/io.c
@@ -47,20 +47,26 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
  */
-void
+int
 ceph_start_io_read(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	ret = down_read_killable(&inode->i_rwsem);
+	if (ret)
+		return ret;
 	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	ret = down_write_killable(&inode->i_rwsem);
+	if (ret)
+		return ret;
 	ceph_block_o_direct(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+	return 0;
 }
 
 /**
@@ -138,20 +144,26 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
  * Note that buffered writes and truncates both take a write lock on
  * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
  */
-void
+int
 ceph_start_io_direct(struct inode *inode)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	int ret;
 
 	/* Be an optimist! */
-	down_read(&inode->i_rwsem);
+	ret = down_read_killable(&inode->i_rwsem);
+	if (ret)
+		return ret;
 	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
-		return;
+		return 0;
 	up_read(&inode->i_rwsem);
 	/* Slow path.... */
-	down_write(&inode->i_rwsem);
+	ret = down_write_killable(&inode->i_rwsem);
+	if (ret)
+		return ret;
 	ceph_block_buffered(ci, inode);
 	downgrade_write(&inode->i_rwsem);
+	return 0;
 }
 
 /**
diff --git a/fs/ceph/io.h b/fs/ceph/io.h
index 94ce176df9997..9432b8b607650 100644
--- a/fs/ceph/io.h
+++ b/fs/ceph/io.h
@@ -2,11 +2,11 @@
 #ifndef _FS_CEPH_IO_H
 #define _FS_CEPH_IO_H
 
-void ceph_start_io_read(struct inode *inode);
+int ceph_start_io_read(struct inode *inode);
 void ceph_end_io_read(struct inode *inode);
 int ceph_start_io_write(struct inode *inode);
 void ceph_end_io_write(struct inode *inode);
-void ceph_start_io_direct(struct inode *inode);
+int ceph_start_io_direct(struct inode *inode);
 void ceph_end_io_direct(struct inode *inode);
 
 #endif /* FS_CEPH_IO_H */
-- 
2.53.0
Re: [PATCH v1 09/13] ceph: make remaining I/O lock functions killable
Posted by Viacheslav Dubeyko 3 weeks, 4 days ago
On Thu, 2026-03-12 at 10:16 +0200, Ionut Nechita (Wind River) wrote:
> From: Ionut Nechita <ionut.nechita@windriver.com>
> 
> Following the same pattern as ceph_start_io_write(), make
> ceph_start_io_read() and ceph_start_io_direct() killable to
> prevent indefinite hangs when waiting for i_rwsem during
> MDS/OSD unavailability.
> 
> This completes the killable lock conversion for all ceph I/O
> start functions, allowing blocked processes to be terminated
> with SIGKILL instead of hanging indefinitely.
> 
> Signed-off-by: Ionut Nechita <ionut.nechita@windriver.com>
> ---
>  fs/ceph/file.c | 27 +++++++++++++++++++--------
>  fs/ceph/io.c   | 28 ++++++++++++++++++++--------
>  fs/ceph/io.h   |  4 ++--
>  3 files changed, 41 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 01e4f31b1f2f3..c828552d51920 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -2122,10 +2122,15 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  	if (ceph_inode_is_shutdown(inode))
>  		return -ESTALE;
>  
> -	if (direct_lock)
> -		ceph_start_io_direct(inode);
> -	else
> -		ceph_start_io_read(inode);
> +	if (direct_lock) {
> +		ret = ceph_start_io_direct(inode);
> +		if (ret)
> +			return ret;
> +	} else {
> +		ret = ceph_start_io_read(inode);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
>  		want |= CEPH_CAP_FILE_CACHE;
> @@ -2278,7 +2283,9 @@ static ssize_t ceph_splice_read(struct file *in, loff_t *ppos,
>  	    (fi->flags & CEPH_F_SYNC))
>  		return copy_splice_read(in, ppos, pipe, len, flags);
>  
> -	ceph_start_io_read(inode);
> +	ret = ceph_start_io_read(inode);
> +	if (ret)
> +		return ret;
>  
>  	want = CEPH_CAP_FILE_CACHE;
>  	if (fi->fmode & CEPH_FILE_MODE_LAZY)
> @@ -2357,9 +2364,13 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		direct_lock = true;
>  
>  retry_snap:
> -	if (direct_lock)
> -		ceph_start_io_direct(inode);
> -	else {
> +	if (direct_lock) {
> +		err = ceph_start_io_direct(inode);
> +		if (err) {
> +			ceph_free_cap_flush(prealloc_cf);
> +			return err;
> +		}
> +	} else {
>  		err = ceph_start_io_write(inode);
>  		if (err) {
>  			ceph_free_cap_flush(prealloc_cf);
> diff --git a/fs/ceph/io.c b/fs/ceph/io.c
> index f9ac89ec1d6a1..7bd57de2d9681 100644
> --- a/fs/ceph/io.c
> +++ b/fs/ceph/io.c
> @@ -47,20 +47,26 @@ static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
>   */
> -void
> +int
>  ceph_start_io_read(struct inode *inode)

Which kernel version do you have? I can see down_read_killable() already
available in ceph_start_io_read() for v.7.0.0-rc3 [1].

Thanks,
Slava.

[1] https://elixir.bootlin.com/linux/v7.0-rc3/source/fs/ceph/io.c#L59

>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
> +	int ret;
>  
>  	/* Be an optimist! */
> -	down_read(&inode->i_rwsem);
> +	ret = down_read_killable(&inode->i_rwsem);
> +	if (ret)
> +		return ret;
>  	if (!(READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT))
> -		return;
> +		return 0;
>  	up_read(&inode->i_rwsem);
>  	/* Slow path.... */
> -	down_write(&inode->i_rwsem);
> +	ret = down_write_killable(&inode->i_rwsem);
> +	if (ret)
> +		return ret;
>  	ceph_block_o_direct(ci, inode);
>  	downgrade_write(&inode->i_rwsem);
> +	return 0;
>  }
>  
>  /**
> @@ -138,20 +144,26 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
>   * Note that buffered writes and truncates both take a write lock on
>   * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
>   */
> -void
> +int
>  ceph_start_io_direct(struct inode *inode)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
> +	int ret;
>  
>  	/* Be an optimist! */
> -	down_read(&inode->i_rwsem);
> +	ret = down_read_killable(&inode->i_rwsem);
> +	if (ret)
> +		return ret;
>  	if (READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT)
> -		return;
> +		return 0;
>  	up_read(&inode->i_rwsem);
>  	/* Slow path.... */
> -	down_write(&inode->i_rwsem);
> +	ret = down_write_killable(&inode->i_rwsem);
> +	if (ret)
> +		return ret;
>  	ceph_block_buffered(ci, inode);
>  	downgrade_write(&inode->i_rwsem);
> +	return 0;
>  }
>  
>  /**
> diff --git a/fs/ceph/io.h b/fs/ceph/io.h
> index 94ce176df9997..9432b8b607650 100644
> --- a/fs/ceph/io.h
> +++ b/fs/ceph/io.h
> @@ -2,11 +2,11 @@
>  #ifndef _FS_CEPH_IO_H
>  #define _FS_CEPH_IO_H
>  
> -void ceph_start_io_read(struct inode *inode);
> +int ceph_start_io_read(struct inode *inode);
>  void ceph_end_io_read(struct inode *inode);
>  int ceph_start_io_write(struct inode *inode);
>  void ceph_end_io_write(struct inode *inode);
> -void ceph_start_io_direct(struct inode *inode);
> +int ceph_start_io_direct(struct inode *inode);
>  void ceph_end_io_direct(struct inode *inode);
>  
>  #endif /* FS_CEPH_IO_H */