[PATCH v6 32/43] btrfs: implement process_bio cb for fscrypt

Daniel Vacek posted 43 patches 2 days, 15 hours ago
[PATCH v6 32/43] btrfs: implement process_bio cb for fscrypt
Posted by Daniel Vacek 2 days, 15 hours ago
From: Josef Bacik <josef@toxicpanda.com>

We are going to be checksumming the encrypted data, so we have to
implement the ->process_bio fscrypt callback.  This will provide us with
the original bio and the encrypted bio to do work on.  For WRITE's this
will happen after the encrypted bio has been encrypted.  For READ's this
will happen after the read has completed and before the decryption step
is done.

For write's this is straightforward, we can just pass in the encrypted
bio to btrfs_csum_one_bio and then the csums will be added to the bbio
as normal.

For read's this is relatively straightforward, but requires some care.
We assume (because that's how it works currently) that the encrypted bio
match the original bio, this is important because we save the iter of
the bio before we submit.  If this changes in the future we'll need a
hook to give us the bi_iter of the decryption bio before it's submitted.
We check the csums before decryption.  If it doesn't match we simply
error out and we let the normal path handle the repair work.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Daniel Vacek <neelx@suse.com>
---

v5: https://lore.kernel.org/linux-btrfs/ca32684b01ff8c252be515509137e0a4a0e5db7a.1706116485.git.josef@toxicpanda.com/
 * Adapt to btrfs_data_csum_ok() changes for bs > ps.  Mostly follow
   what was done in 052fd7a5cace ("btrfs: make read verification
   handle bs > ps cases without large folios").
 * Rename bbio::csum_done to csum_ok due to name collision.
   With upstream, member name csum_done was used for async csums.
---
 fs/btrfs/bio.c       | 38 +++++++++++++++++++++++++++++++++++++-
 fs/btrfs/bio.h       |  3 +++
 fs/btrfs/file-item.c | 14 ++++++++++++--
 fs/btrfs/fscrypt.c   | 29 +++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index 1ae81997fb2d..69dc32cb4ed6 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -300,6 +300,34 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
 	return fbio;
 }
 
+blk_status_t btrfs_check_encrypted_read_bio(struct btrfs_bio *bbio, struct bio *enc_bio)
+{
+	struct btrfs_inode *inode = bbio->inode;
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+	struct bvec_iter iter = bbio->saved_iter;
+	struct btrfs_device *dev = bbio->bio.bi_private;
+	const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
+	const u32 nr_steps = iter.bi_size / step;
+	phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];
+	phys_addr_t paddr;
+	unsigned int slot = 0;
+
+	/*
+	 * We have to use a copy of iter in case there's an error,
+	 * btrfs_check_read_bio will handle submitting the repair bios.
+	 */
+	btrfs_bio_for_each_block(paddr, enc_bio, &iter, step) {
+		ASSERT(slot < nr_steps);
+		paddrs[slot] = paddr;
+		slot++;
+	}
+	if (!btrfs_data_csum_ok(bbio, dev, 0, paddrs))
+			return BLK_STS_IOERR;
+
+	bbio->csum_ok = true;
+	return BLK_STS_OK;
+}
+
 static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *dev)
 {
 	struct btrfs_inode *inode = bbio->inode;
@@ -329,6 +357,10 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
 	/* Clear the I/O error. A failed repair will reset it. */
 	bbio->bio.bi_status = BLK_STS_OK;
 
+	/* This was an encrypted bio and we've already done the csum check. */
+	if (status == BLK_STS_OK && bbio->csum_ok)
+		goto out;
+
 	btrfs_bio_for_each_block(paddr, &bbio->bio, iter, step) {
 		paddrs[(offset / step) % nr_steps] = paddr;
 		offset += step;
@@ -340,6 +372,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
 							 paddrs, fbio);
 		}
 	}
+out:
 	if (bbio->csum != bbio->csum_inline)
 		kvfree(bbio->csum);
 
@@ -851,10 +884,13 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
 		/*
 		 * Csum items for reloc roots have already been cloned at this
 		 * point, so they are handled as part of the no-checksum case.
+		 *
+		 * Encrypted inodes are csum'ed via the ->process_bio callback.
 		 */
 		if (!(inode->flags & BTRFS_INODE_NODATASUM) &&
 		    !test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
-		    !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap) {
+		    !btrfs_is_data_reloc_root(inode->root) && !bbio->is_remap &&
+		    !IS_ENCRYPTED(&inode->vfs_inode)) {
 			if (should_async_write(bbio) &&
 			    btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
 				goto done;
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index 43f7544029ac..456d32db9e9e 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -43,6 +43,7 @@ struct btrfs_bio {
 		struct {
 			u8 *csum;
 			u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+			bool csum_ok;
 			struct bvec_iter saved_iter;
 		};
 
@@ -130,5 +131,7 @@ void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_
 int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 fileoff,
 			    u32 length, u64 logical, const phys_addr_t paddrs[],
 			    unsigned int step, int mirror_num);
+blk_status_t btrfs_check_encrypted_read_bio(struct btrfs_bio *bbio,
+					    struct bio *enc_bio);
 
 #endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ef0b6faf3de0..cee57a2f241b 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -331,6 +331,14 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 
+static inline bool inode_skip_csum(struct btrfs_inode *inode)
+{
+	struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+	return (inode->flags & BTRFS_INODE_NODATASUM) ||
+		test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
+}
+
 /*
  * Lookup the checksum for the read bio in csum tree.
  *
@@ -350,8 +358,7 @@ int btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
 	int ret = 0;
 	u32 bio_offset = 0;
 
-	if ((inode->flags & BTRFS_INODE_NODATASUM) ||
-	    test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
+	if (inode_skip_csum(inode))
 		return 0;
 
 	/*
@@ -810,6 +817,9 @@ int btrfs_csum_one_bio(struct btrfs_bio *bbio, struct bio *bio, bool async)
 	struct btrfs_ordered_sum *sums;
 	unsigned nofs_flag;
 
+	if (inode_skip_csum(inode))
+		return 0;
+
 	nofs_flag = memalloc_nofs_save();
 	sums = kvzalloc(btrfs_ordered_sum_size(fs_info, bio->bi_iter.bi_size),
 		       GFP_KERNEL);
diff --git a/fs/btrfs/fscrypt.c b/fs/btrfs/fscrypt.c
index b6350b043994..f74404bdd89e 100644
--- a/fs/btrfs/fscrypt.c
+++ b/fs/btrfs/fscrypt.c
@@ -16,6 +16,7 @@
 #include "transaction.h"
 #include "volumes.h"
 #include "xattr.h"
+#include "file-item.h"
 
 /*
  * From a given location in a leaf, read a name into a qstr (usually a
@@ -212,6 +213,33 @@ static struct block_device **btrfs_fscrypt_get_devices(struct super_block *sb,
 	return devs;
 }
 
+static blk_status_t btrfs_process_encrypted_bio(struct bio *orig_bio,
+						struct bio *enc_bio)
+{
+	struct btrfs_bio *bbio;
+
+	/*
+	 * If our bio is from the normal fs_bio_set then we know this is a
+	 * mirror split and we can skip it, we'll get the real bio on the last
+	 * mirror and we can process that one.
+	 */
+	if (orig_bio->bi_pool == &fs_bio_set)
+		return BLK_STS_OK;
+
+	bbio = btrfs_bio(orig_bio);
+
+	if (bio_op(orig_bio) == REQ_OP_READ) {
+		/*
+		 * We have ->saved_iter based on the orig_bio, so if the block
+		 * layer changes we need to notice this asap so we can update
+		 * our code to handle the new world order.
+		 */
+		ASSERT(orig_bio == enc_bio);
+		return btrfs_check_encrypted_read_bio(bbio, enc_bio);
+	}
+	return btrfs_csum_one_bio(bbio, enc_bio, false);
+}
+
 int btrfs_fscrypt_load_extent_info(struct btrfs_inode *inode,
 				   struct btrfs_path *path,
 				   struct btrfs_key *key,
@@ -360,4 +388,5 @@ const struct fscrypt_operations btrfs_fscrypt_ops = {
 	.set_context = btrfs_fscrypt_set_context,
 	.empty_dir = btrfs_fscrypt_empty_dir,
 	.get_devices = btrfs_fscrypt_get_devices,
+	.process_bio = btrfs_process_encrypted_bio,
 };
-- 
2.51.0
Re: [PATCH v6 32/43] btrfs: implement process_bio cb for fscrypt
Posted by Chris Mason 18 hours ago
Daniel Vacek <neelx@suse.com> wrote:
> From: Josef Bacik <josef@toxicpanda.com>
> 
> We are going to be checksumming the encrypted data, so we have to
> implement the ->process_bio fscrypt callback.  This will provide us with
> the original bio and the encrypted bio to do work on.  For WRITE's this
> will happen after the encrypted bio has been encrypted.  For READ's this
> will happen after the read has completed and before the decryption step
> is done.
> 
> For write's this is straightforward, we can just pass in the encrypted
> bio to btrfs_csum_one_bio and then the csums will be added to the bbio
> as normal.
> 
> For read's this is relatively straightforward, but requires some care.
> We assume (because that's how it works currently) that the encrypted bio
> match the original bio, this is important because we save the iter of
> the bio before we submit.  If this changes in the future we'll need a
> hook to give us the bi_iter of the decryption bio before it's submitted.
> We check the csums before decryption.  If it doesn't match we simply
> error out and we let the normal path handle the repair work.

Hi everyone,

I'm running the btrfs for-next branch, along with the encryption patch set
through my AI review prompts:

https://github.com/masoncl/review-prompts

I'm working on some btrfs specific instructions, but hopefully these reviews
are already useful.  I've kept the headers to make it easier to double check
that I'm actually replying to the right email, but please let me know if any
parts of this are incorrect:

commit 2b25418669e46c939ff864b1144e2c27d618920c
Author: Josef Bacik <josef@toxicpanda.com>

btrfs: implement process_bio cb for fscrypt

This commit implements the ->process_bio fscrypt callback for btrfs,
enabling checksum verification of encrypted data before decryption and
checksum generation after encryption.

> diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
> index 3b420866f3b82..c28f3f53bad28 100644
> --- a/fs/btrfs/bio.c
> +++ b/fs/btrfs/bio.c
> @@ -300,6 +300,34 @@ static struct btrfs_failed_bio *repair_one_sector(struct btrfs_bio *failed_bbio,
>  	return fbio;
>  }
>
> +blk_status_t btrfs_check_encrypted_read_bio(struct btrfs_bio *bbio, struct bio *enc_bio)
> +{
> +	struct btrfs_inode *inode = bbio->inode;
> +	struct btrfs_fs_info *fs_info = inode->root->fs_info;
> +	struct bvec_iter iter = bbio->saved_iter;
> +	struct btrfs_device *dev = bbio->bio.bi_private;
> +	const u32 step = min(fs_info->sectorsize, PAGE_SIZE);
> +	const u32 nr_steps = iter.bi_size / step;
> +	phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE];

Can paddrs[] overflow here? BTRFS_MAX_BLOCKSIZE is 64KB, so the array
has 16 elements (64KB / 4KB). However, nr_steps is calculated from
iter.bi_size which can be much larger than 64KB for merged bios.

If a bio has bi_size > 64KB, the loop below will write beyond the array
bounds when filling paddrs[slot].

> +	phys_addr_t paddr;
> +	unsigned int slot = 0;
> +
> +	/*
> +	 * We have to use a copy of iter in case there's an error,
> +	 * btrfs_check_read_bio will handle submitting the repair bios.
> +	 */
> +	btrfs_bio_for_each_block(paddr, enc_bio, &iter, step) {
> +		ASSERT(slot < nr_steps);
> +		paddrs[slot] = paddr;
> +		slot++;
> +	}
> +	if (!btrfs_data_csum_ok(bbio, dev, 0, paddrs))

Does this verify checksums for all sectors in the bio? Looking at
btrfs_data_csum_ok() in inode.c, it verifies one sector's worth of data
(one blocksize). The function is called with bio_offset=0, meaning it
only checksums the first sector.

However, the bio can contain multiple sectors (iter.bi_size could be
much larger than one sectorsize). If the bio has multiple sectors,
shouldn't this call btrfs_data_csum_ok() for each sector, or pass the
actual bio size so all sectors are verified?

The existing btrfs_check_read_bio() function loops through all sectors
and verifies each one individually.

> +			return BLK_STS_IOERR;
> +
> +	bbio->csum_ok = true;
> +	return BLK_STS_OK;
> +}

[ ... ]