[PATCH v6 24/43] btrfs: add extent encryption context tree item type

Daniel Vacek posted 43 patches 2 days, 14 hours ago
[PATCH v6 24/43] btrfs: add extent encryption context tree item type
Posted by Daniel Vacek 2 days, 14 hours ago
From: Josef Bacik <josef@toxicpanda.com>

The fscrypt encryption context will be stored as a new tree item type.
This gives us flexibility to include different things in the future.

Also update the tree-checker to validate the new item type.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Daniel Vacek <neelx@suse.com>
---

v5: https://lore.kernel.org/linux-btrfs/7ee9171262857336011bf0e121846617c5181fa4.1706116485.git.josef@toxicpanda.com/
    ("btrfs: add an optional encryption context to the end of file extents")
 * Not much left from the original commit.
   - This was reworked so that the encryption context is now a separate
     tree item with it's unique key.
   - It is tightly related to the file extent item but still optional and
     only used for encrypted extents.
   - The content (and hence the size as well) comes from the fscrrypt
     subsystem and it is not touched by btrfs at all.
   - It's handled as a raw binary data (u8 *).
   - This patch makes sure it is correctly removed when the related extent
     is dropped.
 * As a result, the following patch https://lore.kernel.org/linux-btrfs/f0d9b2d3a40b7a963a977d3dfb62793ff7b065d1.1706116485.git.josef@toxicpanda.com/
   ("btrfs: explicitly track file extent length for replace and drop")
   was dropped as not applicable.  There's no need to track the size
   anymore as it just matches the size of the stored item.

   [RFC]: Should I have kept the structure with __u8 type? Like:

   |  struct btrfs_encryption_info {
   |        __u8 context[0];
   |  };

   I did remove it as it was only used to extend the file extent item
   structure and hence no longer needed.
---
 fs/btrfs/file.c                 | 61 +++++++++++++++++++++++++++++++++
 fs/btrfs/tree-checker.c         | 59 ++++++++++++++++++++++++++++---
 include/uapi/linux/btrfs_tree.h |  8 +++++
 3 files changed, 124 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3c0db279f592..639462164d08 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -150,6 +150,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	u64 extent_offset = 0;
 	u64 extent_end = 0;
 	u64 last_end = args->start;
+	u64 first_ctx = 1, last_ctx = 0;
 	int del_nr = 0;
 	int del_slot = 0;
 	int extent_type;
@@ -407,6 +408,12 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 				del_nr++;
 			}
 
+			if (btrfs_file_extent_encryption(leaf, fi) == BTRFS_ENCRYPTION_FSCRYPT) {
+				if (first_ctx > last_ctx)
+					first_ctx = key.offset;
+				last_ctx = key.offset;
+			}
+
 			if (update_refs &&
 			    extent_type == BTRFS_FILE_EXTENT_INLINE) {
 				args->bytes_found += extent_end - key.offset;
@@ -496,6 +503,60 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		args->extent_inserted = true;
 	}
 
+	if (first_ctx <= last_ctx) {
+		int slot, nritems;
+
+		btrfs_release_path(path);
+
+		key.objectid = ino;
+		key.type = BTRFS_FSCRYPT_CTX_KEY;
+		key.offset = first_ctx;
+
+		ret = btrfs_search_slot(trans, root, &key, path, modify_tree, !!modify_tree);
+		if (ret < 0)
+			goto out_ctx;
+next_leaf:
+		leaf = path->nodes[0];
+		slot = path->slots[0];
+
+		del_slot = slot;
+		del_nr = 0;
+		nritems = btrfs_header_nritems(leaf);
+		while (slot < nritems) {
+			btrfs_item_key_to_cpu(leaf, &key, slot);
+			if (key.objectid > ino ||
+			    key.type > BTRFS_FSCRYPT_CTX_KEY ||
+			    key.offset > last_ctx)
+				break;
+			del_nr++;
+			slot++;
+		}
+		if (del_nr) {
+			ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+			if (unlikely(ret)) {
+				btrfs_abort_transaction(trans, ret);
+				goto out_ctx;
+			}
+
+			if (slot == nritems) {
+				ret = btrfs_next_leaf(root, path);
+				if (!ret)
+					goto next_leaf;
+				if (ret > 0)
+					ret = 0;
+			}
+		}
+out_ctx:
+		if (args->path && args->extent_inserted) {
+			btrfs_release_path(path);
+
+			key.objectid = ino;
+			key.type = BTRFS_EXTENT_DATA_KEY;
+			key.offset = args->start;
+			ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+		}
+	}
+
 	if (!args->path)
 		btrfs_free_path(path);
 	else if (!args->extent_inserted)
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 9675dbcd78a3..776901f297fe 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -186,6 +186,7 @@ static bool check_prev_ino(struct extent_buffer *leaf,
 	       key->type == BTRFS_INODE_EXTREF_KEY ||
 	       key->type == BTRFS_DIR_INDEX_KEY ||
 	       key->type == BTRFS_DIR_ITEM_KEY ||
+	       key->type == BTRFS_FSCRYPT_CTX_KEY ||
 	       key->type == BTRFS_EXTENT_DATA_KEY, "key->type=%u", key->type);
 
 	/*
@@ -204,6 +205,39 @@ static bool check_prev_ino(struct extent_buffer *leaf,
 		prev_key->objectid, key->objectid);
 	return false;
 }
+static int check_fscrypt_context(struct extent_buffer *leaf,
+				 struct btrfs_key *key, int slot,
+				 struct btrfs_key *prev_key)
+{
+	u32 sectorsize = leaf->fs_info->sectorsize;
+	u32 item_size = btrfs_item_size(leaf, slot);
+
+	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
+		file_extent_err(leaf, slot,
+"unaligned file_offset for encryption context, have %llu should be aligned to %u",
+			key->offset, sectorsize);
+		return -EUCLEAN;
+	}
+
+	/*
+	 * Previous key must have the same key->objectid (ino).
+	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
+	 * But if objectids mismatch, it means we have a missing
+	 * INODE_ITEM.
+	 */
+	if (unlikely(!check_prev_ino(leaf, key, slot, prev_key)))
+		return -EUCLEAN;
+
+	if (unlikely(item_size > BTRFS_MAX_EXTENT_CTX_SIZE)) {
+		file_extent_err(leaf, slot,
+	"invalid encryption context size, have %u expect a maximum of %u",
+				item_size, BTRFS_MAX_EXTENT_CTX_SIZE);
+		return -EUCLEAN;
+	}
+
+	return 0;
+}
+
 static int check_extent_data_item(struct extent_buffer *leaf,
 				  struct btrfs_key *key, int slot,
 				  struct btrfs_key *prev_key)
@@ -214,6 +248,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
 	u32 item_size = btrfs_item_size(leaf, slot);
 	u64 extent_end;
 	u8 policy;
+	u8 fe_type;
 
 	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
 		file_extent_err(leaf, slot,
@@ -244,12 +279,12 @@ static int check_extent_data_item(struct extent_buffer *leaf,
 				SZ_4K);
 		return -EUCLEAN;
 	}
-	if (unlikely(btrfs_file_extent_type(leaf, fi) >=
-		     BTRFS_NR_FILE_EXTENT_TYPES)) {
+
+	fe_type = btrfs_file_extent_type(leaf, fi);
+	if (unlikely(fe_type >= BTRFS_NR_FILE_EXTENT_TYPES)) {
 		file_extent_err(leaf, slot,
 		"invalid type for file extent, have %u expect range [0, %u]",
-			btrfs_file_extent_type(leaf, fi),
-			BTRFS_NR_FILE_EXTENT_TYPES - 1);
+			fe_type, BTRFS_NR_FILE_EXTENT_TYPES - 1);
 		return -EUCLEAN;
 	}
 
@@ -298,6 +333,19 @@ static int check_extent_data_item(struct extent_buffer *leaf,
 		return 0;
 	}
 
+	if (policy == BTRFS_ENCRYPTION_FSCRYPT) {
+		/*
+		 * Only regular and prealloc extents should have an encryption
+		 * context.
+		 */
+		if (unlikely(fe_type != BTRFS_FILE_EXTENT_REG &&
+			     fe_type != BTRFS_FILE_EXTENT_PREALLOC)) {
+			file_extent_err(leaf, slot,
+		"invalid type for encrypted file extent, have %u", fe_type);
+			return -EUCLEAN;
+		}
+	}
+
 	/* Regular or preallocated extent has fixed item size */
 	if (unlikely(item_size != sizeof(*fi))) {
 		file_extent_err(leaf, slot,
@@ -1948,6 +1996,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
 	case BTRFS_EXTENT_CSUM_KEY:
 		ret = check_csum_item(leaf, key, slot, prev_key);
 		break;
+	case BTRFS_FSCRYPT_CTX_KEY:
+		ret = check_fscrypt_context(leaf, key, slot, prev_key);
+		break;
 	case BTRFS_DIR_ITEM_KEY:
 	case BTRFS_DIR_INDEX_KEY:
 	case BTRFS_XATTR_ITEM_KEY:
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index cb8dbcc612e9..76937d66b5dd 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -168,6 +168,7 @@
 #define BTRFS_VERITY_MERKLE_ITEM_KEY	37
 
 #define BTRFS_FSCRYPT_INODE_CTX_KEY	41
+#define BTRFS_FSCRYPT_CTX_KEY		42
 
 #define BTRFS_ORPHAN_ITEM_KEY		48
 /* reserve 2-15 close to the inode for later flexibility */
@@ -1079,6 +1080,13 @@ enum {
 	BTRFS_NR_FILE_EXTENT_TYPES = 3,
 };
 
+/*
+ * Currently just the FSCRYPT_SET_CONTEXT_MAX_SIZE, which is larger than the
+ * current extent context size from fscrypt, so this should give us plenty of
+ * breathing room for expansion later.
+ */
+#define BTRFS_MAX_EXTENT_CTX_SIZE 40
+
 enum btrfs_encryption_type {
 	BTRFS_ENCRYPTION_NONE,
 	BTRFS_ENCRYPTION_FSCRYPT,
-- 
2.51.0
Re: [PATCH v6 24/43] btrfs: add extent encryption context tree item type
Posted by Chris Mason 17 hours ago
Daniel Vacek <neelx@suse.com> wrote:
> From: Josef Bacik <josef@toxicpanda.com>
> 
> The fscrypt encryption context will be stored as a new tree item type.
> This gives us flexibility to include different things in the future.
> 
> Also update the tree-checker to validate the new item type.

Hi everyone,

I'm running the btrfs for-next branch, along with the encryption patch set
through my AI review prompts:

https://github.com/masoncl/review-prompts

I'm working on some btrfs specific instructions, but hopefully these reviews
are already useful.  I've kept the headers to make it easier to double check
that I'm actually replying to the right email, but please let me know if any
parts of this are incorrect:

commit 203bfb91a4db9c826fa7f72034c13a1810a1b605
Author: Josef Bacik <josef@toxicpanda.com>

btrfs: add extent encryption context tree item type

The fscrypt encryption context will be stored as a new tree item type.
This gives us flexibility to include different things in the future.
Also update the tree-checker to validate the new item type.

> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 3c0db279f592e..639462164d089 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c

[ ... ]

> @@ -496,6 +503,60 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
>  		args->extent_inserted = true;
>  	}
>
> +	if (first_ctx <= last_ctx) {
> +		int slot, nritems;
> +
> +		btrfs_release_path(path);
> +
> +		key.objectid = ino;
> +		key.type = BTRFS_FSCRYPT_CTX_KEY;
> +		key.offset = first_ctx;
> +
> +		ret = btrfs_search_slot(trans, root, &key, path, modify_tree, !!modify_tree);
> +		if (ret < 0)
> +			goto out_ctx;

[ ... ]

> +out_ctx:
> +		if (args->path && args->extent_inserted) {
> +			btrfs_release_path(path);
> +
> +			key.objectid = ino;
> +			key.type = BTRFS_EXTENT_DATA_KEY;
> +			key.offset = args->start;
> +			ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
> +		}
> +	}
> +

Can btrfs_search_slot() at line 556 overwrite an error that occurred
earlier in the out_ctx path?

If btrfs_search_slot() at line 515 fails and execution jumps to out_ctx,
but args->path and args->extent_inserted are both true, the second
btrfs_search_slot() call will overwrite the original error stored in ret.

If this second search succeeds (returns 0 or >0), the function returns
success to the caller despite the fscrypt context cleanup having failed.
This could lead to inconsistent filesystem state where extent data was
inserted but the corresponding fscrypt context items were not properly
cleaned up.

> diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
> index 9675dbcd78a3b..776901f297fea 100644
> --- a/fs/btrfs/tree-checker.c
> +++ b/fs/btrfs/tree-checker.c

[ ... ]

> +static int check_fscrypt_context(struct extent_buffer *leaf,
> +				 struct btrfs_key *key, int slot,
> +				 struct btrfs_key *prev_key)
> +{
> +	u32 sectorsize = leaf->fs_info->sectorsize;
> +	u32 item_size = btrfs_item_size(leaf, slot);
> +
> +	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
> +		file_extent_err(leaf, slot,
> +"unaligned file_offset for encryption context, have %llu should be aligned to %u",
> +			key->offset, sectorsize);
> +		return -EUCLEAN;
> +	}
> +
> +	/*
> +	 * Previous key must have the same key->objectid (ino).
> +	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
> +	 * But if objectids mismatch, it means we have a missing
> +	 * INODE_ITEM.
> +	 */

This isn't a bug, but the comment says "It can be XATTR_ITEM, INODE_ITEM
or just another EXTENT_DATA" - can EXTENT_DATA actually be the previous
key type?

For BTRFS_FSCRYPT_CTX_KEY (type=42), EXTENT_DATA (type=108) cannot be a
previous key because keys are sorted by (objectid, type, offset) and
108 > 42.

The correct previous key types would be: XATTR_ITEM, INODE_ITEM,
FSCRYPT_INODE_CTX_KEY, or another FSCRYPT_CTX_KEY.

> @@ -214,6 +248,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
>  	u32 item_size = btrfs_item_size(leaf, slot);
>  	u64 extent_end;
>  	u8 policy;
> +	u8 fe_type;
>
>  	if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) {
>  		file_extent_err(leaf, slot,

[ ... ]

> @@ -298,6 +333,19 @@ static int check_extent_data_item(struct extent_buffer *leaf,
>  		return 0;
>  	}
>
> +	if (policy == BTRFS_ENCRYPTION_FSCRYPT) {
> +		/*
> +		 * Only regular and prealloc extents should have an encryption
> +		 * context.
> +		 */
> +		if (unlikely(fe_type != BTRFS_FILE_EXTENT_REG &&
> +			     fe_type != BTRFS_FILE_EXTENT_PREALLOC)) {
> +			file_extent_err(leaf, slot,
> +		"invalid type for encrypted file extent, have %u", fe_type);
> +			return -EUCLEAN;
> +		}
> +	}

Can this validation check detect encrypted inline extents? Looking at
the function flow, inline extents (BTRFS_FILE_EXTENT_INLINE) are handled
at lines 310-333 and return before reaching this check.

If a corrupted filesystem has an encrypted inline extent, it would pass
tree-checker validation. Should the encryption check be moved to also
cover the inline extent path, or should inline extents explicitly reject
BTRFS_ENCRYPTION_FSCRYPT?