ceph: support RWF_DONTCACHE

[PATCH] ceph: support RWF_DONTCACHE
Posted by Max Kellermann an hour ago
Similar to commit 902893e39076 ("NFS: Enable use of the RWF_DONTCACHE
flag on the NFS client").

The Ceph client now supports RWF_DONTCACHE for buffered file IO, but
it needs preserve the IOCB_DONTCACHE state through
netfs_write_begin().

Unlike NFS, Ceph does not need to defer drop-behind until after a
separate COMMIT step.  Ceph's buffered writeback completes when the
OSD write completes, so the existing folio_end_writeback() handling is
sufficient.

This patch changes netfs because Ceph does not get its write_begin
folio directly.  ceph_write_begin() is a wrapper around
netfs_write_begin(), and that helper was calling
__filemap_get_folio(FGP_WRITEBEGIN, ...) without access to the kiocb.
As a result, the per-IO IOCB_DONTCACHE flag was lost before folio
allocation.

Fix this by passing the kiocb into netfs_write_begin() and switching
it to write_begin_get_folio(iocb, ...), so IOCB_DONTCACHE is
translated into FGP_DONTCACHE when appropriate.  Then set
FOP_DONTCACHE on ceph_file_fops so the VFS will accept RWF_DONTCACHE
for files on Ceph.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 fs/ceph/addr.c           | 3 ++-
 fs/ceph/file.c           | 1 +
 fs/netfs/buffered_read.c | 6 +++---
 include/linux/netfs.h    | 3 ++-
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 2090fc78529c..b4ea3ba8211e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1865,7 +1865,8 @@ static int ceph_write_begin(const struct kiocb *iocb,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int r;
 
-	r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, foliop, NULL);
+	r = netfs_write_begin(iocb, &ci->netfs, file, inode->i_mapping, pos,
+			      len, foliop, NULL);
 	if (r < 0)
 		return r;
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 5e7c73a29aa3..341589ef32cd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -3191,4 +3191,5 @@ const struct file_operations ceph_file_fops = {
 	.compat_ioctl = compat_ptr_ioctl,
 	.fallocate	= ceph_fallocate,
 	.copy_file_range = ceph_copy_file_range,
+	.fop_flags = FOP_DONTCACHE,
 };
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index a8c0d86118c5..6ec7c2459d8a 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -589,6 +589,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
 
 /**
  * netfs_write_begin - Helper to prepare for writing [DEPRECATED]
+ * @iocb: The kiocb describing the write request
  * @ctx: The netfs context
  * @file: The file to read from
  * @mapping: The mapping to read from
@@ -620,7 +621,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
  * Note that this should be considered deprecated and netfs_perform_write()
  * used instead.
  */
-int netfs_write_begin(struct netfs_inode *ctx,
+int netfs_write_begin(const struct kiocb *iocb, struct netfs_inode *ctx,
 		      struct file *file, struct address_space *mapping,
 		      loff_t pos, unsigned int len, struct folio **_folio,
 		      void **_fsdata)
@@ -631,8 +632,7 @@ int netfs_write_begin(struct netfs_inode *ctx,
 	int ret;
 
 retry:
-	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
-				    mapping_gfp_mask(mapping));
+	folio = write_begin_get_folio(iocb, mapping, index, len);
 	if (IS_ERR(folio))
 		return PTR_ERR(folio);
 
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ba17ac5bf356..d58c614e32fd 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -414,7 +414,8 @@ int netfs_writeback_single(struct address_space *mapping,
 struct readahead_control;
 void netfs_readahead(struct readahead_control *);
 int netfs_read_folio(struct file *, struct folio *);
-int netfs_write_begin(struct netfs_inode *, struct file *,
+int netfs_write_begin(const struct kiocb *iocb,
+		      struct netfs_inode *ctx, struct file *file,
 		      struct address_space *, loff_t pos, unsigned int len,
 		      struct folio **, void **fsdata);
 int netfs_writepages(struct address_space *mapping,
-- 
2.47.3